In [44]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist
from scipy.stats import multivariate_hypergeom
import statsmodels.api as sm
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from scipy import special
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
from scipy.special import gammaln

# data preparation 

In [45]:
(train_img, train_lab), (test_img, test_lab) = mnist.load_data()

In [46]:
data = train_img.reshape(60000, 784, 1).reshape(60000, 784)
# convert vectors to binary
data[data>1]=1

# image labels
label = train_lab
# use number of black pixels as labels
num_black = np.sum(data == 0, axis = 1)

# thinning functions

In [47]:
def poisson_thin(data, epsilon):
    if np.any(data < 0):
        print("Possion data must be non-negative")
        return
    if np.any(data != np.floor(data)):
        print("Possion data must be integer valued")
        return
    
#     Thinning
    thinned_data = np.array([np.random.multinomial(x, epsilon) for x in data])
    return thinned_data

In [48]:
def binom_thin(data, epsilon, pop):
    if np.min(data) < 0:
        print("Binomial data must be non-negative.")
        return
    
    if np.any((data - np.floor(data)) != 0):
        print("Binomial data must be integer valued.")
        return
    
    test = np.outer(epsilon, pop)
    tolerance = 1e-10
    if not np.all(np.isclose(test - np.round(test), 0, atol=tolerance)):
        print("Epsilon implies non-integer thinned population parameters.")
        return
    
    X = [multivariate_hypergeom.rvs(np.array(y * epsilon, dtype=int), x, size=1) for x, y in zip(data, pop)]
    X = np.array(X).transpose(1, 2, 0).squeeze()

    return X

# $-log L(\lambda; y) = \sum(y_{pred} - y_{true}log(y_{pred})+log(y_{true}!))$

In [49]:
# Compute the negative log-likelihood for the test set
def negative_log_likelihood(y_true, y_pred):
    
    if np.any(y_pred <= 0):
        raise ValueError("Predicted values must be greater than 0")

    # Calculate the negative log-likelihood for Poisson distribution
    nll = np.sum(y_pred - y_true * np.log(y_pred) + gammaln(y_true + 1))
    return nll

# linear model with different thinning methods

### traditional data splitting (70%)

In [50]:
X_train, X_test, y_train, y_test = train_test_split(data, num_black, test_size = 0.3, random_state=42)

In [51]:
X_train_cons = sm.add_constant(X_train)
model_trad_70 = sm.GLM(y_train, X_train_cons, family=sm.families.Poisson())
result_trad_70 = model_trad.fit()
print(result_trad.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                      y   No. Observations:                29400
Model:                            GLM   Df Residuals:                    28701
Model Family:                 Poisson   Df Model:                          698
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -1.2189e+05
Date:                Sun, 14 Apr 2024   Deviance:                       106.48
Time:                        17:34:57   Pearson chi2:                     106.
No. Iterations:                   100   Pseudo R-squ. (CS):             0.9338
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.6888      0.001   4939.393      0.0

In [52]:
y_pred_train = result_trad_70.predict(X_train_cons)
X_test_cons = sm.add_constant(X_test)
y_pred_test = result_trad_70.predict(X_test_cons)

neg_ll_train = negative_log_likelihood(y_train, y_pred_train)
print(f"Negative Log-Likelihood for the trainnig set: {neg_ll_train:.4f}")
avg_neg_ll_train = neg_ll_train / len(y_train)
print(f"Average Negative Log-Likelihood per data point (Traditional Splitting): {avg_neg_ll_train:.4f}")

neg_ll_test = negative_log_likelihood(y_test, y_pred_test)
print(f"Negative Log-Likelihood for the test set: {neg_ll_test:.4f}")
avg_neg_ll_test = neg_ll_test / len(y_test)
print(f"Average Negative Log-Likelihood per data point (Traditional Splitting): {avg_neg_ll_test:.4f}")

Negative Log-Likelihood for the trainnig set: 174128.3302
Average Negative Log-Likelihood per data point (Traditional Splitting): 4.1459
Negative Log-Likelihood for the test set: 74628.8663
Average Negative Log-Likelihood per data point (Traditional Splitting): 4.1460


### traiditional data splitting(70%*70%)

In [53]:
X_trad_train, X_trad_test, y_trad_train, y_trad_test = train_test_split(X_train, y_train, test_size = 0.3, random_state=42)

In [54]:
X_train_cons = sm.add_constant(X_trad_train)

model_trad = sm.GLM(y_trad_train, X_train_cons, family=sm.families.Poisson())
result_trad = model_trad.fit()
print(result_trad.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                      y   No. Observations:                29400
Model:                            GLM   Df Residuals:                    28701
Model Family:                 Poisson   Df Model:                          698
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -1.2189e+05
Date:                Sun, 14 Apr 2024   Deviance:                       106.48
Time:                        17:36:27   Pearson chi2:                     106.
No. Iterations:                   100   Pseudo R-squ. (CS):             0.9338
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.6888      0.001   4939.393      0.0

### Poisson thinning (70%)

In [55]:
num_black = np.sum(X_train == 0, axis = 1)
eps = np.array([0.7, 0.3])
poisson_thinning = poisson_thin(num_black, eps)

y_pois_train = poisson_thinning[:,0]

In [56]:
X_train_cons = sm.add_constant(X_train)

model = sm.GLM(y_pois_train, X_train_cons, family=sm.families.Poisson())
result_poisson = model.fit()
print(result_poisson.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                      y   No. Observations:                42000
Model:                            GLM   Df Residuals:                    41296
Model Family:                 Poisson   Df Model:                          703
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -1.7280e+05
Date:                Sun, 14 Apr 2024   Deviance:                       12489.
Time:                        17:38:34   Pearson chi2:                 1.25e+04
No. Iterations:                   100   Pseudo R-squ. (CS):             0.8495
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.3321      0.001   4665.476      0.0

### binomial thinning (70%)

In [57]:
pop =np.array([num_black.shape[0]]*num_black.shape[0])

binom_thinning = binom_thin(num_black, eps, pop)
y_binom_train = binom_thinning[0]

In [58]:
model = sm.GLM(y_binom_train, X_train_cons, family=sm.families.Poisson())
result_binom = model.fit()
print(result_binom.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                      y   No. Observations:                42000
Model:                            GLM   Df Residuals:                    41296
Model Family:                 Poisson   Df Model:                          703
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -1.7271e+05
Date:                Sun, 14 Apr 2024   Deviance:                       12300.
Time:                        17:40:36   Pearson chi2:                 1.23e+04
No. Iterations:                   100   Pseudo R-squ. (CS):             0.8518
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.3325      0.001   4664.906      0.0

### training loss

In [61]:
X_train_cons = sm.add_constant(X_trad_train)
y_pred_train = result_trad.predict(X_train_cons)
neg_ll_train = negative_log_likelihood(y_trad_train, y_pred_train)
print(f"Negative Log-Likelihood for the training set(70%*70%): {neg_ll_train:.4f}")
avg_neg_ll_train = neg_ll_train / len(y_trad_train)
print(f"Average Negative Log-Likelihood per data point (Traditional Splitting): {avg_neg_ll_train:.4f}")

X_train_cons = sm.add_constant(X_train)
y_pred_train = result_poisson.predict(X_train_cons)
neg_ll_train = negative_log_likelihood(y_pois_train, y_pred_train)
print(f"Negative Log-Likelihood (Training Set - 70% Poisson Thinning): {neg_ll_train:.4f}")
avg_neg_ll_train = neg_ll_train / len(y_pois_train)
print(f"Average Negative Log-Likelihood per data point (Poisson Thinning): {avg_neg_ll_train:.4f}")

y_pred_train = result_binom.predict(X_train_cons)
neg_ll_train = negative_log_likelihood(y_binom_train, y_pred_train)
print(f"Negative Log-Likelihood (Training Set - 70% Binomial Thinning): {neg_ll_train:.4f}")
avg_neg_ll_train = neg_ll_train / len(y_binom_train)
print(f"Average Negative Log-Likelihood per data point (Binomial Thinning): {avg_neg_ll_train:.4f}")

Negative Log-Likelihood for the training set(70%*70%): 121890.0290
Average Negative Log-Likelihood per data point (Traditional Splitting): 4.1459
Negative Log-Likelihood (Training Set - 70% Poisson Thinning): 172803.5508
Average Negative Log-Likelihood per data point (Poisson Thinning): 4.1144
Negative Log-Likelihood (Training Set - 70% Binomial Thinning): 172709.0203
Average Negative Log-Likelihood per data point (Binomial Thinning): 4.1121


### test

In [65]:
X_test_cons = sm.add_constant(X_test)
y_pred_trad = result_trad.predict(X_test_cons)
y_pred_pois = result_poisson.predict(X_test_cons)
y_pred_binom = result_binom.predict(X_test_cons)

In [66]:
print("Traditional data splitting 70%*70%")
neg_ll_test = negative_log_likelihood(y_test, y_pred_trad)
print(f"Negative Log-Likelihood for the test set: {neg_ll_test:.4f}")
avg_neg_ll_test = neg_ll_test / len(y_test)
print(f"Average Negative Log-Likelihood per data point: {avg_neg_ll_test:.4f}\n")

print("Poisson thinning 70%")
neg_ll_test = negative_log_likelihood(y_test, y_pred_pois/0.7)
print(f"Negative Log-Likelihood for the test set: {neg_ll_test:.4f}")
avg_neg_ll_test = neg_ll_test / len(y_test)
print(f"Average Negative Log-Likelihood per data point: {avg_neg_ll_test:.4f}\n")

print("Binomial thinning 70%")
neg_ll_test = negative_log_likelihood(y_test, y_pred_binom/0.7)
print(f"Negative Log-Likelihood for the test set: {neg_ll_test:.4f}")
avg_neg_ll_test = neg_ll_test / len(y_test)
print(f"Average Negative Log-Likelihood per data point: {avg_neg_ll_test:.4f}")

Traditional data splitting 70%*70%
Negative Log-Likelihood for the test set: 74629.2304
Average Negative Log-Likelihood per data point: 4.1461

Poisson thinning 70%
Negative Log-Likelihood for the test set: 74694.0369
Average Negative Log-Likelihood per data point: 4.1497

Binomial thinning 70%
Negative Log-Likelihood for the test set: 74696.0740
Average Negative Log-Likelihood per data point: 4.1498


In [67]:
print("Poisson thinning 70%")
neg_ll_test = negative_log_likelihood(y_test*0.7, y_pred_pois)
print(f"Negative Log-Likelihood for the test set: {neg_ll_test:.4f}")
avg_neg_ll_test = neg_ll_test / len(y_test)
print(f"Average Negative Log-Likelihood per data point: {avg_neg_ll_test:.4f}\n")

Poisson thinning 70%
Negative Log-Likelihood for the test set: 71454.8437
Average Negative Log-Likelihood per data point: 3.9697


In [68]:
test_thinning = poisson_thin(y_test, eps)
y_test_thin = test_thinning[:,0]

neg_ll_test = negative_log_likelihood(y_test_thin, y_pred_pois)
print(f"Negative Log-Likelihood for the test set: {neg_ll_test:.4f}")
avg_neg_ll_test = neg_ll_test / len(y_test)
print(f"Average Negative Log-Likelihood per data point: {avg_neg_ll_test:.4f}\n")

Negative Log-Likelihood for the test set: 74161.7971
Average Negative Log-Likelihood per data point: 4.1201


### poisson thinning (entire set)

In [47]:
eps = np.array([0.7, 0.3])
poisson_thinning = poisson_thin(num_black, eps)

y_train = poisson_thinning[:,0]
y_test = poisson_thinning[:,1]

In [49]:
X_cons = sm.add_constant(data)
y_var = np.var(y_train)
y_mean = np.mean(y_train)
alpha = (y_var-y_mean)/(y_mean **2)

model = sm.GLM(y_train, X_cons, family=sm.families.NegativeBinomial(alpha=alpha))
result_poisson = model.fit()
print(result_poisson.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                      y   No. Observations:                60000
Model:                            GLM   Df Residuals:                    59287
Model Family:        NegativeBinomial   Df Model:                          712
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -2.6561e+05
Date:                Fri, 22 Mar 2024   Deviance:                       8286.1
Time:                        00:27:29   Pearson chi2:                 8.28e+03
No. Iterations:                   100   Pseudo R-squ. (CS):             0.5805
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.3323      0.002   3777.204      0.0

In [51]:
y_pred_train = result_poisson.predict(X_cons)
neg_ll_train = negative_log_likelihood(y_train, y_pred_train, alpha)
print(f"Negative Log-Likelihood (Training Set - 70% Poisson Thinning): {neg_ll_train:.4f}")
avg_neg_ll_train = neg_ll_train / len(y_train)
print(f"Average Negative Log-Likelihood per data point (Poisson Thinning): {avg_neg_ll_train:.4f}")

y_pred_test = result_poisson.predict(X_cons) /0.7 *0.3
neg_ll_test = negative_log_likelihood(y_test, y_pred_test, alpha)
print(f"Negative Log-Likelihood (Test Set - 30% Poisson Thinning): {neg_ll_test:.4f}")
avg_neg_ll_test = neg_ll_test / len(y_test)
print(f"Average Negative Log-Likelihood per data point (Poisson Thinning): {avg_neg_ll_test:.4f}")

Negative Log-Likelihood (Training Set - 70% Poisson Thinning): 265608.2777
Average Negative Log-Likelihood per data point (Poisson Thinning): 4.4268
Negative Log-Likelihood (Test Set - 30% Poisson Thinning): 239126.0349
Average Negative Log-Likelihood per data point (Poisson Thinning): 3.9854


### binomial thinning

In [67]:
pop =np.array([num_black.shape[0]]*num_black.shape[0])

binom_thinning = binom_thin(num_black, eps, pop)
y_train = binom_thinning[0]
y_test = binom_thinning[1]

In [68]:
y_var = np.var(y_train)
y_mean = np.mean(y_train)
alpha = (y_var-y_mean)/(y_mean **2)

model = sm.GLM(y_train, X_cons, family=sm.families.NegativeBinomial(alpha=alpha))
result_binom = model.fit()
print(result_binom.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                      y   No. Observations:                60000
Model:                            GLM   Df Residuals:                    59287
Model Family:        NegativeBinomial   Df Model:                          712
Link Function:                    Log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -2.6532e+05
Date:                Fri, 22 Mar 2024   Deviance:                       8065.8
Time:                        01:16:11   Pearson chi2:                 8.06e+03
No. Iterations:                   100   Pseudo R-squ. (CS):             0.5820
Covariance Type:            nonrobust                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.3318      0.002   3788.087      0.0

In [69]:
y_pred_train = result_binom.predict(X_cons)
neg_ll_train = negative_log_likelihood(y_train, y_pred_train, alpha)
print(f"Negative Log-Likelihood (Training Set - 70% Binomial Thinning): {neg_ll_train:.4f}")
avg_neg_ll_train = neg_ll_train / len(y_train)
print(f"Average Negative Log-Likelihood per data point (Binomial Thinning): {avg_neg_ll_train:.4f}")

y_pred_test = result_binom.predict(X_cons) /0.7 *0.3
neg_ll_test = negative_log_likelihood(y_test, y_pred_test, alpha)
print(f"Negative Log-Likelihood (Test Set - 30% Binomial Thinning): {neg_ll_test:.4f}")
avg_neg_ll_test = neg_ll_test / len(y_test)
print(f"Average Negative Log-Likelihood per data point (Binomial Thinning): {avg_neg_ll_test:.4f}")

Negative Log-Likelihood (Training Set - 70% Binomial Thinning): 265324.4895
Average Negative Log-Likelihood per data point (Binomial Thinning): 4.4221
Negative Log-Likelihood (Test Set - 30% Binomial Thinning): 238636.5829
Average Negative Log-Likelihood per data point (Binomial Thinning): 3.9773


# thinning after clustering

In [18]:
def train_NB(X, y):
    """
    training model using Negative Binomial model
    """
    y_var = np.var(y)
    y_mean = np.mean(y)
    alpha = np.abs((y_var-y_mean)/(y_mean **2))
    model = sm.GLM(y, X, family=sm.families.NegativeBinomial(alpha=alpha))
    results = model.fit()
    
    return results, alpha

### traditional data splitting

In [20]:
for i in range(2, 6):
    print('number of clusters: ', i)
    kmeans = KMeans(n_clusters = i, random_state = 0)
    kmeans.fit(data)

    ll_train = []
    size_train = []
    ll_test = []
    size_test = []
    for k in range(i):
        # data within cluster
        data_const = sm.add_constant(data[kmeans.labels_ == k])
        black_cluster =  num_black[np.where(kmeans.labels_ == k)[0]]

        # traditional data splitting
        X_train, X_test, y_train, y_test = train_test_split(data_const, black_cluster, test_size = 0.3, random_state=42)
        
        # train model
        model, alpha = train_NB(X_train, y_train)

        # predict
        y_pred_train = model.predict(X_train)
        neg_ll_train = negative_log_likelihood(y_train, y_pred_train, alpha)
        ll_train.append(neg_ll_train)
        size_train.append(len(y_train))

        y_pred_test = model.predict(X_test)
        neg_ll_test = negative_log_likelihood(y_test, y_pred_test, alpha)
        ll_test.append(neg_ll_test)
        size_test.append(len(y_test))

    avg_neg_ll_train = np.sum(ll_train) / np.sum(size_train)
    print(f"Average Negative Log-Likelihood per data point (Traditional Splitting training): {avg_neg_ll_train:.4f}")

    avg_neg_ll_test = np.sum(ll_test) / np.sum(size_test)
    print(f"Average Negative Log-Likelihood per data point (Traditional Splitting testing): {avg_neg_ll_test:.4f}")

number of clusters:  2
Average Negative Log-Likelihood per data point (Traditional Splitting training): 4.3872
Average Negative Log-Likelihood per data point (Traditional Splitting testing): 4.3874
number of clusters:  3
Average Negative Log-Likelihood per data point (Traditional Splitting training): 4.3435
Average Negative Log-Likelihood per data point (Traditional Splitting testing): 4.3434
number of clusters:  4
Average Negative Log-Likelihood per data point (Traditional Splitting training): 4.3573
Average Negative Log-Likelihood per data point (Traditional Splitting testing): 4.3573
number of clusters:  5
Average Negative Log-Likelihood per data point (Traditional Splitting training): 4.3058
Average Negative Log-Likelihood per data point (Traditional Splitting testing): 4.3060


### poisson thinning

In [22]:
eps = np.array([0.7, 0.3])

for i in range(2, 6):
    print('number of clusters: ', i)
    kmeans = KMeans(n_clusters = i, random_state = 0)
    kmeans.fit(data)

    ll_train = []
    size_train = []
    ll_test = []
    size_test = []
    for k in range(i):
        # data within cluster
        data_cluster = data[kmeans.labels_ == k]
        X_const = sm.add_constant(data_cluster)
        black_cluster =  num_black[np.where(kmeans.labels_ == k)[0]]

        # poisson splitting
        poisson_thinning = poisson_thin(black_cluster, eps)

        y_train = poisson_thinning[:,0]
        y_test = poisson_thinning[:,1]
        
        # train model
        model, alpha = train_NB(X_const, y_train)

        # predict
        y_pred_train = model.predict(X_const)
        neg_ll_train = negative_log_likelihood(y_train, y_pred_train, alpha)
        ll_train.append(neg_ll_train)
        size_train.append(len(y_train))

        y_pred_test = model.predict(X_const)/0.7 * 0.3
        neg_ll_test = negative_log_likelihood(y_test, y_pred_test, alpha)
        ll_test.append(neg_ll_test)
        size_test.append(len(y_test))

    avg_neg_ll_train = np.sum(ll_train) / np.sum(size_train)
    print(f"Average Negative Log-Likelihood per data point (Poisson training): {avg_neg_ll_train:.4f}")

    avg_neg_ll_test = np.sum(ll_test) / np.sum(size_test)
    print(f"Average Negative Log-Likelihood per data point (Poisson testing): {avg_neg_ll_test:.4f}")

number of clusters:  2
Average Negative Log-Likelihood per data point (Poisson training): 4.2478
Average Negative Log-Likelihood per data point (Poisson testing): 3.9300
number of clusters:  3
Average Negative Log-Likelihood per data point (Poisson training): 4.2260
Average Negative Log-Likelihood per data point (Poisson testing): 3.9326
number of clusters:  4
Average Negative Log-Likelihood per data point (Poisson training): 4.2263
Average Negative Log-Likelihood per data point (Poisson testing): 3.9288
number of clusters:  5
Average Negative Log-Likelihood per data point (Poisson training): 4.2016
Average Negative Log-Likelihood per data point (Poisson testing): 3.9297


### binomial thinning

In [21]:
eps = np.array([0.7, 0.3])

for i in range(2, 6):
    print('number of clusters: ', i)
    kmeans = KMeans(n_clusters = i, random_state = 0)
    kmeans.fit(data)

    ll_train = []
    size_train = []
    ll_test = []
    size_test = []
    for k in range(i):
        # data within cluster
        data_cluster = data[kmeans.labels_ == k]
        X_const = sm.add_constant(data_cluster)
        black_cluster =  num_black[np.where(kmeans.labels_ == k)[0]]

        # binomial splitting
        pop =[round(black_cluster.shape[0], -1)]*black_cluster.shape[0]
        
        binom_thinning = binom_thin(black_cluster, eps, pop)
        y_train = binom_thinning[0]
        y_test = binom_thinning[1]
                
        # train model
        model, alpha = train_NB(X_const, y_train)

        # predict
        y_pred_train = model.predict(X_const)
        neg_ll_train = negative_log_likelihood(y_train, y_pred_train, alpha)
        ll_train.append(neg_ll_train)
        size_train.append(len(y_train))

        y_pred_test = model.predict(X_const)/0.7 * 0.3
        neg_ll_test = negative_log_likelihood(y_test, y_pred_test, alpha)
        ll_test.append(neg_ll_test)
        size_test.append(len(y_test))

    avg_neg_ll_train = np.sum(ll_train) / np.sum(size_train)
    print(f"Average Negative Log-Likelihood per data point (Poisson training): {avg_neg_ll_train:.4f}")

    avg_neg_ll_test = np.sum(ll_test) / np.sum(size_test)
    print(f"Average Negative Log-Likelihood per data point (Poisson testing): {avg_neg_ll_test:.4f}")

number of clusters:  2
Average Negative Log-Likelihood per data point (Poisson training): 4.2458
Average Negative Log-Likelihood per data point (Poisson testing): 3.9249
number of clusters:  3
Average Negative Log-Likelihood per data point (Poisson training): 4.2121
Average Negative Log-Likelihood per data point (Poisson testing): 3.9128
number of clusters:  4
Average Negative Log-Likelihood per data point (Poisson training): 4.2218
Average Negative Log-Likelihood per data point (Poisson testing): 3.9187
number of clusters:  5
Average Negative Log-Likelihood per data point (Poisson training): 4.1911
Average Negative Log-Likelihood per data point (Poisson testing): 3.9113
