Skip to content

Commit

Permalink
moving predictions out of base class
Browse files Browse the repository at this point in the history
  • Loading branch information
CDonnerer committed Mar 5, 2023
1 parent 160a852 commit f86a70a
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 17 deletions.
6 changes: 0 additions & 6 deletions src/xgboost_distribution/distributions/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Distribution base class
"""
from abc import ABC, abstractmethod
from collections import namedtuple


class BaseDistribution(ABC):
Expand All @@ -11,11 +10,6 @@ class BaseDistribution(ABC):
functions that operate on the data (`y`) and the outputs of the xgboost (`params`).
"""

def __init__(self):
self.Predictions = namedtuple("Predictions", (p for p in self.params))
# attach to globals to make pickling of namedtuple work
globals()[self.Predictions.__name__] = self.Predictions

def check_target(self, y):
"""Ensure that the target is compatible with the chosen distribution"""

Expand Down
10 changes: 8 additions & 2 deletions src/xgboost_distribution/distributions/exponential.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
"""Exponential distribution
"""
from collections import namedtuple

import numpy as np
from scipy.stats import expon

from xgboost_distribution.distributions.base import BaseDistribution
from xgboost_distribution.distributions.utils import check_all_ge_zero

Predictions = namedtuple("Predictions", ("scale"))


class Exponential(BaseDistribution):
"""Exponential distribution with log score
Expand Down Expand Up @@ -60,7 +64,9 @@ def loss(self, y, params):
def predict(self, params):
log_scale = params # params are shape (n,)
scale = np.exp(log_scale)
return self.Predictions(scale=scale)
return Predictions(scale=scale)

def starting_params(self, y):
return (np.log(np.mean(y)),)
return Predictions(
scale=np.log(np.mean(y)),
)
6 changes: 5 additions & 1 deletion src/xgboost_distribution/distributions/laplace.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
"""Laplace distribution
"""
from collections import namedtuple

import numpy as np
from scipy.stats import cauchy, laplace

from xgboost_distribution.distributions.base import BaseDistribution

Predictions = namedtuple("Predictions", ("loc", "scale"))


class Laplace(BaseDistribution):
"""Laplace distribution with log scoring
Expand Down Expand Up @@ -87,7 +91,7 @@ def loss(self, y, params):
def predict(self, params):
loc, log_scale = self._split_params(params)
scale = np.exp(log_scale)
return self.Predictions(loc=loc, scale=scale)
return Predictions(loc=loc, scale=scale)

def starting_params(self, y):
return np.mean(y), np.log(np.std(y))
Expand Down
8 changes: 6 additions & 2 deletions src/xgboost_distribution/distributions/log_normal.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
"""LogNormal distribution
"""
from collections import namedtuple

import numpy as np
from scipy.stats import lognorm

from xgboost_distribution.distributions.base import BaseDistribution
from xgboost_distribution.distributions.utils import check_all_gt_zero

Predictions = namedtuple("Predictions", ("scale", "s"))


class LogNormal(BaseDistribution):
"""LogNormal distribution with log scoring.
Expand Down Expand Up @@ -78,11 +82,11 @@ def predict(self, params):
log_scale, log_s = self._split_params(params)
scale, s = np.exp(log_scale), np.exp(log_s)

return self.Predictions(scale=scale, s=s)
return Predictions(scale=scale, s=s)

def starting_params(self, y):
log_y = np.log(y)
return np.mean(log_y), np.log(np.std(log_y))
return Predictions(scale=np.mean(log_y), s=np.log(np.std(log_y)))

def _split_params(self, params):
"""Return log_scale (loc) and log_s from params"""
Expand Down
8 changes: 6 additions & 2 deletions src/xgboost_distribution/distributions/negative_binomial.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Negative binomial distribution
"""
from collections import namedtuple

import numpy as np
from scipy.special import digamma, expit
from scipy.stats import nbinom
Expand All @@ -10,6 +12,8 @@
check_all_integer,
)

Predictions = namedtuple("Predictions", ("n", "p"))


class NegativeBinomial(BaseDistribution):
"""Negative binomial distribution with log score
Expand Down Expand Up @@ -111,8 +115,8 @@ def predict(self, params):
log_n, raw_p = params[:, 0], params[:, 1]
n = np.exp(log_n)
p = expit(raw_p)
return self.Predictions(n=n, p=p)
return Predictions(n=n, p=p)

def starting_params(self, y):
# TODO: starting params can matter a lot?
return (np.log(np.mean(y)), 0) # expit(0) = 0.5
return Predictions(n=np.log(np.mean(y)), p=0) # expit(0) = 0.5
9 changes: 6 additions & 3 deletions src/xgboost_distribution/distributions/normal.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
"""Normal distribution
"""
from collections import namedtuple

import numpy as np
from scipy.stats import norm

from xgboost_distribution.distributions.base import BaseDistribution

Predictions = namedtuple("Predictions", ("loc", "scale"))


class Normal(BaseDistribution):
"""Normal distribution with log scoring
Expand Down Expand Up @@ -88,11 +92,10 @@ def predict(self, params):
# TODO: do we need clipping for safety?
# log_scale = np.clip(log_scale, -100, 100)
scale = np.exp(log_scale)

return self.Predictions(loc=loc, scale=scale)
return Predictions(loc=loc, scale=scale)

def starting_params(self, y):
return np.mean(y), np.log(np.std(y))
return Predictions(loc=np.mean(y), scale=np.log(np.std(y)))

def _split_params(self, params):
"""Return loc and log_scale from params"""
Expand Down
6 changes: 5 additions & 1 deletion src/xgboost_distribution/distributions/poisson.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Poisson distribution
"""
from collections import namedtuple

import numpy as np
from scipy.stats import poisson

Expand All @@ -9,6 +11,8 @@
check_all_integer,
)

Predictions = namedtuple("Predictions", ("mu"))


class Poisson(BaseDistribution):
"""Poisson distribution with log score
Expand Down Expand Up @@ -65,7 +69,7 @@ def loss(self, y, params):
def predict(self, params):
log_mu = params # params are shape (n,)
mu = np.exp(log_mu)
return self.Predictions(mu=mu)
return Predictions(mu=mu)

def starting_params(self, y):
return (np.log(np.mean(y)),)

0 comments on commit f86a70a

Please sign in to comment.