In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from statsmodels.genmod.generalized_linear_model import SET_USE_BIC_LLF
from scipy.stats import nbinom

# Import custom modules and functions
from generate_hnb import generate_hnb
from generate_ZI import generate_ZI
from AIC_BIC import calculate_aic_bic

# Import model classes
from models.HurdlePoisson import ZeroKInflatedPoisson as ZKIHurdle
from models.ZINB import ZINB_EM, predict_mean as ZINB_pred_mean
from models.ZIP import ZIP_EM, predict_mean as ZIP_pred_mean
from models.ZKINB import ZkINB_EM
from models.ZKIP import ZKIP_EM
from models.ZkICMP import ZkICMP


class ModelEvaluator:
    """Class to evaluate and compare different count data models."""
    
    def __init__(self, X_train, X_test, y_train, y_test):
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self.n_train = X_train.shape[0]
        self.results = {}
    
    def evaluate_model(self, model_name, y_pred, llf, k_params):
        """Calculate evaluation metrics for a model."""
        metrics = {
            'mse': mean_squared_error(self.y_test, y_pred),
            'mae': mean_absolute_error(self.y_test, y_pred),
            'r2': r2_score(self.y_test, y_pred),
            'llf': llf,
            'aic': calculate_aic_bic(self.n_train, llf, k_params)[0],
            'bic': calculate_aic_bic(self.n_train, llf, k_params)[1]
        }
        self.results[model_name] = metrics
        return metrics


def define_model_parameters():
    """Define parameter counts for different models."""
    return {
        'poisson': 2,
        'nb': 3,  # if r known (2)
        'zip': 4,  # 2+2
        'zinb': 5,  # if r known (2+2+1)
        'zkhurdle_poi': 4,  # 2+2
        'zkip': 6,  # 2+2+2
        'zkinb': 7,  # if r known (2+2+2+1)
        'zkicmp': 7  # 2+2+2+1
    }


def fit_models(X_train, X_test, y_train, y_test, k):
    # Configuration
    SET_USE_BIC_LLF(True)
    
    # Get parameter counts
    param_counts = define_model_parameters()
    
    # Initialize evaluator
    evaluator = ModelEvaluator(X_train, X_test, y_train, y_test)
    
    # 1. Poisson Model
    print("Fitting Poisson model...")
    poisson_model = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
    poisson_pred = poisson_model.predict(X_test)
    evaluator.evaluate_model(
        'poisson', poisson_pred, poisson_model.llf, param_counts['poisson']
    )
    
    # 2. Negative Binomial Model
    print("Fitting Negative Binomial model...")
    nb_model = sm.NegativeBinomial(y_train, X_train).fit()
    nb_pred = nb_model.predict(X_test)
    evaluator.evaluate_model(
        'negative_binomial', nb_pred, nb_model.llf, param_counts['nb']
    )
    
    # 3. Zero-K Inflated Poisson Hurdle Model
    print("Fitting Zero-K Inflated Poisson Hurdle model...")
    zkihurdle_model = ZKIHurdle(k=3)
    zkihurdle_res = zkihurdle_model.fit(X_train, y_train)
    zkihurdle_ll = zkihurdle_model.loglikelihood(X_train, y_train)
    zkihurdle_pred = zkihurdle_model.predict_mean(X_test)
    evaluator.evaluate_model(
        'zk_hurdle_poisson', zkihurdle_pred, zkihurdle_ll, param_counts['zkhurdle_poi']
    )
    
    # 4. ZINB Model
    print("Fitting ZINB model...")
    alpha = 1/10
    beta, gamma, zinb_ll = ZINB_EM(y_train.values, X_train.values, X_train.values, alpha)
    zinb_pred = ZINB_pred_mean(X_test.values, X_test.values, beta, gamma)
    evaluator.evaluate_model(
        'zinb', zinb_pred, zinb_ll, param_counts['zinb']
    )
    
    # 5. ZIP Model
    print("Fitting ZIP model...")
    beta, gamma, zip_ll = ZIP_EM(y_train.values, X_train.values, X_train.values)
    zip_pred = ZIP_pred_mean(X_test.values, X_test.values, beta, gamma)
    evaluator.evaluate_model(
        'zip', zip_pred, zip_ll, param_counts['zip']
    )
    
    # 6. ZKINB Model
    print("Fitting ZKINB model...")
    zkinb_model = ZkINB_EM()
    zkinb_res = zkinb_model.fit_em(y_train, X_train.values, X_train.values, k)
    zkinb_pred = zkinb_model.predict(X_test.values, X_test.values)
    evaluator.evaluate_model(
        'zkinb', zkinb_pred, zkinb_res['final_loglik'], param_counts['zkinb']
    )
    
    # 7. ZKIP Model
    print("Fitting ZKIP model...")
    zkip_model = ZKIP_EM(k_inflated=k)
    zkip_res = zkip_model.fit(X_train.values, y_train.values)
    zkip_pred = zkip_model.predict_expected(X_test.values)
    evaluator.evaluate_model(
        'zkip', zkip_pred, zkip_res.final_loglik, param_counts['zkip']
    )
    
    # 8. ZkICMP Model
    print("Fitting ZkICMP model...")
    zkicmp_model = ZkICMP(k=k)
    zkicmp_res = zkicmp_model.fit(X_train.values, y_train.values)
    pred_results = zkicmp_model.predict(X_test.values)
    _, _, zkicmp_pred, _ = pred_results
    evaluator.evaluate_model(
        'zkicmp', zkicmp_pred, -zkicmp_res.final_loglik, param_counts['zkicmp']
    )
    
    # Print results
    print("\n" + "="*80)
    print("MODEL COMPARISON RESULTS")
    print("="*80)
    
    results={}
    for model_name, metrics in evaluator.results.items():
        results[f'{model_name.upper()}_MSE']= metrics['mse']
        results[f'{model_name.upper()}_MAE']= metrics['mae']
        results[f'{model_name.upper()}_R2']= metrics['r2']
        results[f'{model_name.upper()}_LLF']= metrics['llf']
        results[f'{model_name.upper()}_AIC']= metrics['aic']
        results[f'{model_name.upper()}_BIC']= metrics['bic']
    return results

def main():
    k = 3
    n = 300
    
    # Split data indexes
    train_ind, test_ind = train_test_split(
        np.arange(0,n), 
        test_size=0.3, random_state=42
    )

    results=[]
    for beta in np.linspace(-2, 2, 9):
        for gamma in np.linspace(-2, 2, 9):
            for alpha in np.linspace(-2, 2, 9):
                k+=1
                """Generate and prepare the dataset for modeling."""
                # Generate data
                df = generate_ZI(
                    n=n, k=k, beta0=-1, beta1=1, gamma0=-2, gamma1=0.3,
                    alpha0=0.5, alpha1=1, r=10, cov_type="nbinary"
                )
                
                # Prepare features and target
                X = df.loc[:, 'x'].values
                y_target = df.loc[:, 'y'].values
                
                # Add intercept
                X = sm.add_constant(X)
                X = pd.DataFrame(X, columns=['intercept', 'x'])
                y_target = pd.DataFrame(y_target, columns=['y'])
                
                X_train = X.iloc[train_ind, :]
                X_test = X.iloc[test_ind, :]
                y_train = y_target.loc[train_ind, 'y']
                y_test = y_target.loc[test_ind, 'y']
                print(type(y_train))
                # outputs
                p_0 = (y_train==0).mean()
                p_k = (y_train==k).mean()
                p_p = max(1 - p_0 - p_k, 0)
                y_mean = y_train.mean()
                y_std = y_train.std()
                n_unique = len(y_train.value_counts())

                r_hat = y_mean**2 / (y_std**2 - y_mean)
                p0_nb = nbinom.pmf(0, r_hat, r_hat/(r_hat+y_mean))
                pk_nb = nbinom.pmf(k, r_hat, r_hat/(r_hat+ y_mean))

                zero_inflated = p_0>p0_nb
                k_inflated = p_k>pk_nb

                model_results=fit_models(X_train, X_test, y_train, y_test, k)
        
                results.append(
                    {
                        'beta':beta,
                        'gamma':gamma,
                        'alpha': alpha,
                        'p_0':p_0,
                        'p_k':p_k,
                        'p_p':p_p,
                        'y_mean':y_mean,
                        'y_std':y_std,
                        'n_unique':n_unique,
                        'r_hat':r_hat,
                        'p0_nb':p0_nb,
                        'pk_nb':pk_nb,
                        'zero_inflated':zero_inflated,
                        'k_infated':k_inflated,
                    } | model_results
                )    

    return results    

    
if __name__ == "__main__":
    
    results = main()

<class 'pandas.core.series.Series'>
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 1.712625
         Iterations: 8
         Function evaluations: 9
         Gradient evaluations: 9
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...
Fitting ZKIP model...
Fitting ZkICMP model...

MODEL COMPARISON RESULTS
<class 'pandas.core.series.Series'>
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 1.658644
         Iterations: 9
         Function evaluations: 10
         Gradient evaluations: 10
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...
Fitting ZKIP model...
Fitting ZkICMP model...

MODEL COMPARISON RESULTS
<class 'pandas.core.series.Series'>
Fitting Poisson model...
Fitting Negative Binomial model...


In [9]:
pd.DataFrame(results).to_csv('results_test.csv')

In [23]:
# Split data indexes
import numpy as np
n=300
train_ind, test_ind = train_test_split(
        np.arange(0,n), 
        test_size=0.3, random_state=42
)

train_ind

array([194, 101,  68, 224,  37,  16, 179, 147, 274,  67, 228,  69,  31,
       183, 265, 225, 140,  18, 181,  96, 132, 262,  86, 248, 245, 116,
       146, 292, 197, 206,  55, 172, 184, 167, 139, 253,  38, 125, 195,
       283, 137, 112, 168, 117, 277, 271, 155, 176, 178,   2, 115, 143,
       177, 120, 210, 260, 127,  74,  29,  83, 269, 107, 223, 158, 280,
       246, 222,  65, 198,  85, 213, 159,  12,  35,  28, 142, 284, 254,
       170,  51,  95, 208, 247,  41,  89, 244, 136,  26, 293, 141, 200,
         0, 268, 272, 100, 259, 255, 171,  98,  36,  61, 150, 236, 202,
       242,  11, 296, 267,  27, 219,   4, 122,  32, 204, 162, 209, 285,
       138,  62, 135, 128, 290,   8,  70, 264,  64,  44, 279, 156,  40,
       123, 275, 216, 153,  23, 261, 110,  81, 207, 212,  39, 240, 291,
       258, 199,  14,  47,  94, 263, 227, 273, 201, 161,  43, 217, 145,
       190, 220, 251,   3, 105,  53, 133,   1, 131, 103,  49,  80, 205,
        34,  91,  52, 241,  13,  88, 166, 294, 134, 287, 243,  5

In [None]:
abg=[]
for i in data:
    for beta in np.linspace(-2, 2, 9):
        for gamma in np.linspace(-2, 2, 9):
            for alpha in np.linspace(-2, 2, 9):
                if (i['params']['beta1']==beta) and (i['params']['gamma1']==gamma) and (i['params']['alpha1']==alpha):
                    df=i['data']
                    p_0 = df['y'].value_counts().get(0, 0) / len(df)
                    p_k = df['y'].value_counts().get(k, 0) / len(df)
                    p_p = max(1 - p_0 - p_k, 0)
                    y_mean = df['y'].mean()
                    y_std = df['y'].std()
                    abg.append({
                            'data': df,
                            'beta1':beta,
                            'gamma1':gamma,
                            'alpha1':alpha,
                            'n_unique': len(df['y'].value_counts()), 
                            'p_0': p_0,
                            'p_k': p_k,
                            'p_p': p_p,
                            'y_mean': y_mean,
                            'y_std': y_std
                    })
                    


NameError: name 'evaluator' is not defined

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from statsmodels.genmod.generalized_linear_model import SET_USE_BIC_LLF
from scipy.stats import nbinom

# Import custom modules and functions
from generate_hnb import generate_hnb
from generate_ZI import generate_ZI
from AIC_BIC import calculate_aic_bic

# Import model classes
from models.HurdlePoisson import ZeroKInflatedPoisson as ZKIHurdle
from models.ZINB import ZINB_EM, predict_mean as ZINB_pred_mean
from models.ZIP import ZIP_EM, predict_mean as ZIP_pred_mean
from models.ZKINB import ZkINB_EM
from models.ZKIP import ZKIP_EM
from models.ZkICMP import ZkICMP


class ModelEvaluator:
    """Class to evaluate and compare different count data models."""
    
    def __init__(self, X_train, X_test, y_train, y_test):
        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test
        self.n_train = X_train.shape[0]
        self.results = {}
    
    def evaluate_model(self, model_name, y_pred, llf, k_params):
        """Calculate evaluation metrics for a model."""
        metrics = {
            'mse': mean_squared_error(self.y_test, y_pred),
            'mae': mean_absolute_error(self.y_test, y_pred),
            'r2': r2_score(self.y_test, y_pred),
            'llf': llf,
            'aic': calculate_aic_bic(self.n_train, llf, k_params)[0],
            'bic': calculate_aic_bic(self.n_train, llf, k_params)[1]
        }
        self.results[model_name] = metrics
        return metrics


def define_model_parameters():
    """Define parameter counts for different models."""
    return {
        'poisson': 2,
        'negative_binomial': 3,  # if r known (2)
        'zk_hurdle_poisson': 4,  # 2+2
        'zinb': 5,  # if r known (2+2+1)
        'zip': 4,  # 2+2
        'zkinb': 7,  # if r known (2+2+2+1)
        'zkip': 6,  # 2+2+2
        'zkicmp': 7  # 2+2+2+1
    }


def fit_models(X_train, X_test, y_train, y_test, k):
    # Configuration
    SET_USE_BIC_LLF(True)
    
    # Get parameter counts
    param_counts = define_model_parameters()
    
    # Initialize evaluator
    evaluator = ModelEvaluator(X_train, X_test, y_train, y_test)
    
    # 1. Poisson Model
    print("Fitting Poisson model...")
    poisson_model = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
    poisson_pred = poisson_model.predict(X_test)
    evaluator.evaluate_model(
        'poisson', poisson_pred, poisson_model.llf, param_counts['poisson']
    )
    
    # 2. Negative Binomial Model
    print("Fitting Negative Binomial model...")
    nb_model = sm.NegativeBinomial(y_train, X_train).fit()
    nb_pred = nb_model.predict(X_test)
    evaluator.evaluate_model(
        'negative_binomial', nb_pred, nb_model.llf, param_counts['negative_binomial']
    )
    
    # 3. Zero-K Inflated Poisson Hurdle Model
    print("Fitting Zero-K Inflated Poisson Hurdle model...")
    zkihurdle_model = ZKIHurdle(k=k)
    zkihurdle_res = zkihurdle_model.fit(X_train, y_train)
    zkihurdle_ll = zkihurdle_model.loglikelihood(X_train, y_train)
    zkihurdle_pred = zkihurdle_model.predict_mean(X_test)
    evaluator.evaluate_model(
        'zk_hurdle_poisson', zkihurdle_pred, zkihurdle_ll, param_counts['zk_hurdle_poisson']
    )
    
    # 4. ZINB Model
    print("Fitting ZINB model...")
    alpha = 1/10
    beta, gamma, zinb_ll = ZINB_EM(y_train.values, X_train.values, X_train.values, alpha)
    zinb_pred = ZINB_pred_mean(X_test.values, X_test.values, beta, gamma)
    evaluator.evaluate_model(
        'zinb', zinb_pred, zinb_ll, param_counts['zinb']
    )
    
    # 5. ZIP Model
    print("Fitting ZIP model...")
    beta, gamma, zip_ll = ZIP_EM(y_train.values, X_train.values, X_train.values)
    zip_pred = ZIP_pred_mean(X_test.values, X_test.values, beta, gamma)
    evaluator.evaluate_model(
        'zip', zip_pred, zip_ll, param_counts['zip']
    )
    
    # 6. ZKINB Model
    print("Fitting ZKINB model...")
    zkinb_model = ZkINB_EM()
    zkinb_res = zkinb_model.fit_em(y_train.values, X_train.values, X_train.values, k)
    zkinb_pred = zkinb_model.predict(X_test.values, X_test.values)
    evaluator.evaluate_model(
        'zkinb', zkinb_pred, zkinb_res['final_loglik'], param_counts['zkinb']
    )
    
    # 7. ZKIP Model
    print("Fitting ZKIP model...")
    zkip_model = ZKIP_EM(k_inflated=k)
    zkip_res = zkip_model.fit(X_train.values, y_train.values)
    zkip_pred = zkip_model.predict_expected(X_test.values)
    evaluator.evaluate_model(
        'zkip', zkip_pred, zkip_res.final_loglik, param_counts['zkip']
    )
    '''
    # 8. ZkICMP Model
    print("Fitting ZkICMP model...")
    zkicmp_model = ZkICMP(k=k)
    zkicmp_res = zkicmp_model.fit(X_train.values, y_train.values)
    pred_results = zkicmp_model.predict(X_test.values)
    _, _, zkicmp_pred, _ = pred_results
    evaluator.evaluate_model(
        'zkicmp', zkicmp_pred, -zkicmp_res.final_loglik, param_counts['zkicmp']
    )
    '''
    # Print results
    print("\n" + "="*80)
    print("MODEL COMPARISON RESULTS")
    print("="*80)
    
    results = {}
    for model_name, metrics in evaluator.results.items():
        results[f'{model_name.upper()}_MSE'] = metrics['mse']
        results[f'{model_name.upper()}_MAE'] = metrics['mae']
        results[f'{model_name.upper()}_R2'] = metrics['r2']
        results[f'{model_name.upper()}_LLF'] = metrics['llf']
        results[f'{model_name.upper()}_AIC'] = metrics['aic']
        results[f'{model_name.upper()}_BIC'] = metrics['bic']
    
    return results


def main():
    k = 3
    n = 300
    test_size=0.3
    # Split data indexes
    train_ind, test_ind = train_test_split(
        np.arange(0, n), 
        test_size=test_size, random_state=42
    )

    results = []
    results_r2_minus = []
    
    # Only run a few iterations for testing
    for beta in np.linspace(-2, 2, 3):  
        for gamma in np.linspace(-2, 2, 3):
            for alpha in np.linspace(-2, 2, 3):
                print(f"\nIteration: beta={beta:.2f}, gamma={gamma:.2f}, alpha={alpha:.2f}")
                
                """Generate and prepare the dataset for modeling."""
                # Generate data - using the parameters from the loops
                valid = False
                while not valid:
                    df = generate_ZI(
                        n=n, k=k, beta0=beta, beta1=1, gamma0=gamma, gamma1=0.3,
                        alpha0=alpha, alpha1=1, r=10, cov_type="nbinary"
                    )
                    # Prepare features and target
                    X = df.loc[:, 'x'].values.reshape(-1, 1)
                    y_target = df.loc[:, 'y'].values
                    
                    # Add intercept
                    X = sm.add_constant(X)
                    X = pd.DataFrame(X, columns=['intercept', 'x'])
                    y_target = pd.Series(y_target, name='y')
                    
                    X_train = X.iloc[train_ind, :]
                    X_test = X.iloc[test_ind, :]
                    y_train = y_target.iloc[train_ind]
                    y_test = y_target.iloc[test_ind]

                    p_0 = (y_train == 0).mean()
                    p_k = (y_train == k).mean()
                    p_p = max(1 - p_0 - p_k, 0)
                    valid = min(p_0, p_k, p_p) >= 1/(n*(1-test_size))
                
                # Calculate statistics
                y_mean = y_train.mean()
                y_std = y_train.std()
                n_unique = len(pd.Series(y_train).value_counts())
                
                # Estimate r for negative binomial
                if y_std**2 > y_mean:
                    r_hat = y_mean**2 / (y_std**2 - y_mean)
                    p0_nb = nbinom.pmf(0, r_hat, r_hat/(r_hat + y_mean))
                    pk_nb = nbinom.pmf(k, r_hat, r_hat/(r_hat + y_mean))
                    zero_inflated = p_0 > p0_nb 
                    k_inflated = p_k > pk_nb 
                    # Fit models
                    try:
                        model_results = fit_models(X_train, X_test, y_train, y_test, k)
                        results.append({
                                    'beta': beta,
                                    'gamma': gamma,
                                    'alpha': alpha,
                                    'p_0': p_0,
                                    'p_k': p_k,
                                    'p_p': p_p,
                                    'y_mean': y_mean,
                                    'y_std': y_std,
                                    'n_unique': n_unique,
                                    'r_hat': r_hat,
                                    'p0_nb': p0_nb,
                                    'pk_nb': pk_nb,
                                    'zero_inflated': zero_inflated,
                                    'k_inflated': k_inflated,
                            } | model_results)
                    except Exception as e:
                        print(f"Error fitting models: {e}")
                    continue
                else:
                    r_hat = np.nan
                    p0_nb = np.nan
                    pk_nb = np.nan
                    results_r2_minus.append({
                                    'beta': beta,
                                    'gamma': gamma,
                                    'alpha': alpha,
                                    'p_0': p_0,
                                    'p_k': p_k,
                                    'p_p': p_p,
                                    'y_mean': y_mean,
                                    'y_std': y_std,
                                    'n_unique': n_unique,
                                })
    
    return pd.DataFrame(results), pd.DataFrame(results_r2_minus)


if __name__ == "__main__":
    results, results_r2_minus= main()
    


Iteration: beta=-2.00, gamma=-2.00, alpha=-2.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 0.890162
         Iterations: 11
         Function evaluations: 13
         Gradient evaluations: 13
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...
Fitting ZKIP model...

MODEL COMPARISON RESULTS

Iteration: beta=-2.00, gamma=-2.00, alpha=0.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 1.475757
         Iterations: 8
         Function evaluations: 9
         Gradient evaluations: 9
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...
Fitting ZKIP model...

MODEL COMPARISON RESULTS

Iteration: beta=-2.00, gamma=-2.00, alpha=2.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimizati



Fitting ZKIP model...

MODEL COMPARISON RESULTS

Iteration: beta=0.00, gamma=2.00, alpha=0.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 1.608172
         Iterations: 9
         Function evaluations: 10
         Gradient evaluations: 10
Fitting Zero-K Inflated Poisson Hurdle model...
Error fitting models: Input contains NaN.

Iteration: beta=0.00, gamma=2.00, alpha=2.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 1.610728
         Iterations: 9
         Function evaluations: 10
         Gradient evaluations: 10
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...


  eXB = np.column_stack((np.ones(len(X)), np.exp(X)))
  return eXB/eXB.sum(1)[:,None]


Fitting ZKIP model...

MODEL COMPARISON RESULTS

Iteration: beta=2.00, gamma=-2.00, alpha=-2.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 0.401271
         Iterations: 9
         Function evaluations: 14
         Gradient evaluations: 14
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...
Fitting ZKIP model...

MODEL COMPARISON RESULTS

Iteration: beta=2.00, gamma=-2.00, alpha=0.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 0.630028
         Iterations: 15
         Function evaluations: 17
         Gradient evaluations: 17
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...
Fitting ZKIP model...

MODEL COMPARISON RESULTS

Iteration: beta=2.00, gamma=-2.00, alpha=2.00
Fitting Poisson model..

  logprob = np.log(self.cdf(np.dot(self.exog,params)))
  return np.sum(d * logprob)


Fitting ZKIP model...





MODEL COMPARISON RESULTS

Iteration: beta=2.00, gamma=0.00, alpha=0.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 0.832348
         Iterations: 12
         Function evaluations: 14
         Gradient evaluations: 14
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...




Fitting ZKIP model...

MODEL COMPARISON RESULTS

Iteration: beta=2.00, gamma=0.00, alpha=2.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 0.652676
         Iterations: 15
         Function evaluations: 18
         Gradient evaluations: 18
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...


  output_errors = _average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
  numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0)


Fitting ZKIP model...

MODEL COMPARISON RESULTS

Iteration: beta=2.00, gamma=2.00, alpha=-2.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 0.914209
         Iterations: 12
         Function evaluations: 14
         Gradient evaluations: 14
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...




Fitting ZKIP model...

MODEL COMPARISON RESULTS

Iteration: beta=2.00, gamma=2.00, alpha=0.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 0.675534
         Iterations: 13
         Function evaluations: 18
         Gradient evaluations: 18
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...




Fitting ZKIP model...

MODEL COMPARISON RESULTS

Iteration: beta=2.00, gamma=2.00, alpha=2.00
Fitting Poisson model...
Fitting Negative Binomial model...
Optimization terminated successfully.
         Current function value: 0.610383
         Iterations: 13
         Function evaluations: 15
         Gradient evaluations: 15
Fitting Zero-K Inflated Poisson Hurdle model...
Fitting ZINB model...
Fitting ZIP model...
Fitting ZKINB model...




Fitting ZKIP model...

MODEL COMPARISON RESULTS


In [16]:
results.to_csv('results.csv')
results_r2_minus.to_csv('results_r2_minus.csv')

In [6]:
test_size=0.3
n=300

In [8]:
1/(n*(1-test_size))

0.004761904761904762