moving predictions out of base class

CDonnerer · Mar 5, 2023 · f86a70a · f86a70a
1 parent 160a852
commit f86a70a
Show file tree

Hide file tree

Showing 7 changed files with 36 additions and 17 deletions.
diff --git a/src/xgboost_distribution/distributions/base.py b/src/xgboost_distribution/distributions/base.py
@@ -1,7 +1,6 @@
 """Distribution base class
 """
 from abc import ABC, abstractmethod
-from collections import namedtuple
 
 
 class BaseDistribution(ABC):
@@ -11,11 +10,6 @@ class BaseDistribution(ABC):
     functions that operate on the data (`y`) and the outputs of the xgboost (`params`).
     """
 
-    def __init__(self):
-        self.Predictions = namedtuple("Predictions", (p for p in self.params))
-        # attach to globals to make pickling of namedtuple work
-        globals()[self.Predictions.__name__] = self.Predictions
-
     def check_target(self, y):
         """Ensure that the target is compatible with the chosen distribution"""
 

diff --git a/src/xgboost_distribution/distributions/exponential.py b/src/xgboost_distribution/distributions/exponential.py
@@ -1,11 +1,15 @@
 """Exponential distribution
 """
+from collections import namedtuple
+
 import numpy as np
 from scipy.stats import expon
 
 from xgboost_distribution.distributions.base import BaseDistribution
 from xgboost_distribution.distributions.utils import check_all_ge_zero
 
+Predictions = namedtuple("Predictions", ("scale"))
+
 
 class Exponential(BaseDistribution):
     """Exponential distribution with log score
@@ -60,7 +64,9 @@ def loss(self, y, params):
     def predict(self, params):
         log_scale = params  # params are shape (n,)
         scale = np.exp(log_scale)
-        return self.Predictions(scale=scale)
+        return Predictions(scale=scale)
 
     def starting_params(self, y):
-        return (np.log(np.mean(y)),)
+        return Predictions(
+            scale=np.log(np.mean(y)),
+        )
diff --git a/src/xgboost_distribution/distributions/laplace.py b/src/xgboost_distribution/distributions/laplace.py
@@ -1,10 +1,14 @@
 """Laplace distribution
 """
+from collections import namedtuple
+
 import numpy as np
 from scipy.stats import cauchy, laplace
 
 from xgboost_distribution.distributions.base import BaseDistribution
 
+Predictions = namedtuple("Predictions", ("loc", "scale"))
+
 
 class Laplace(BaseDistribution):
     """Laplace distribution with log scoring
@@ -87,7 +91,7 @@ def loss(self, y, params):
     def predict(self, params):
         loc, log_scale = self._split_params(params)
         scale = np.exp(log_scale)
-        return self.Predictions(loc=loc, scale=scale)
+        return Predictions(loc=loc, scale=scale)
 
     def starting_params(self, y):
         return np.mean(y), np.log(np.std(y))

diff --git a/src/xgboost_distribution/distributions/log_normal.py b/src/xgboost_distribution/distributions/log_normal.py
@@ -1,11 +1,15 @@
 """LogNormal distribution
 """
+from collections import namedtuple
+
 import numpy as np
 from scipy.stats import lognorm
 
 from xgboost_distribution.distributions.base import BaseDistribution
 from xgboost_distribution.distributions.utils import check_all_gt_zero
 
+Predictions = namedtuple("Predictions", ("scale", "s"))
+
 
 class LogNormal(BaseDistribution):
     """LogNormal distribution with log scoring.
@@ -78,11 +82,11 @@ def predict(self, params):
         log_scale, log_s = self._split_params(params)
         scale, s = np.exp(log_scale), np.exp(log_s)
 
-        return self.Predictions(scale=scale, s=s)
+        return Predictions(scale=scale, s=s)
 
     def starting_params(self, y):
         log_y = np.log(y)
-        return np.mean(log_y), np.log(np.std(log_y))
+        return Predictions(scale=np.mean(log_y), s=np.log(np.std(log_y)))
 
     def _split_params(self, params):
         """Return log_scale (loc) and log_s from params"""

diff --git a/src/xgboost_distribution/distributions/negative_binomial.py b/src/xgboost_distribution/distributions/negative_binomial.py
@@ -1,5 +1,7 @@
 """Negative binomial distribution
 """
+from collections import namedtuple
+
 import numpy as np
 from scipy.special import digamma, expit
 from scipy.stats import nbinom
@@ -10,6 +12,8 @@
     check_all_integer,
 )
 
+Predictions = namedtuple("Predictions", ("n", "p"))
+
 
 class NegativeBinomial(BaseDistribution):
     """Negative binomial distribution with log score
@@ -111,8 +115,8 @@ def predict(self, params):
         log_n, raw_p = params[:, 0], params[:, 1]
         n = np.exp(log_n)
         p = expit(raw_p)
-        return self.Predictions(n=n, p=p)
+        return Predictions(n=n, p=p)
 
     def starting_params(self, y):
         # TODO: starting params can matter a lot?
-        return (np.log(np.mean(y)), 0)  # expit(0) = 0.5
+        return Predictions(n=np.log(np.mean(y)), p=0)  # expit(0) = 0.5
diff --git a/src/xgboost_distribution/distributions/normal.py b/src/xgboost_distribution/distributions/normal.py
@@ -1,10 +1,14 @@
 """Normal distribution
 """
+from collections import namedtuple
+
 import numpy as np
 from scipy.stats import norm
 
 from xgboost_distribution.distributions.base import BaseDistribution
 
+Predictions = namedtuple("Predictions", ("loc", "scale"))
+
 
 class Normal(BaseDistribution):
     """Normal distribution with log scoring
@@ -88,11 +92,10 @@ def predict(self, params):
         # TODO: do we need clipping for safety?
         # log_scale = np.clip(log_scale, -100, 100)
         scale = np.exp(log_scale)
-
-        return self.Predictions(loc=loc, scale=scale)
+        return Predictions(loc=loc, scale=scale)
 
     def starting_params(self, y):
-        return np.mean(y), np.log(np.std(y))
+        return Predictions(loc=np.mean(y), scale=np.log(np.std(y)))
 
     def _split_params(self, params):
         """Return loc and log_scale from params"""

diff --git a/src/xgboost_distribution/distributions/poisson.py b/src/xgboost_distribution/distributions/poisson.py
@@ -1,5 +1,7 @@
 """Poisson distribution
 """
+from collections import namedtuple
+
 import numpy as np
 from scipy.stats import poisson
 
@@ -9,6 +11,8 @@
     check_all_integer,
 )
 
+Predictions = namedtuple("Predictions", ("mu"))
+
 
 class Poisson(BaseDistribution):
     """Poisson distribution with log score
@@ -65,7 +69,7 @@ def loss(self, y, params):
     def predict(self, params):
         log_mu = params  # params are shape (n,)
         mu = np.exp(log_mu)
-        return self.Predictions(mu=mu)
+        return Predictions(mu=mu)
 
     def starting_params(self, y):
         return (np.log(np.mean(y)),)