## Getting the Predictions on Small Test Set

In [20]:
import catboost as cb
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from statsmodels.stats.contingency_tables import mcnemar

In [2]:
df = pd.read_csv('higgs_cleaned.csv')

In [3]:
X, y = df.drop('class', axis=1), df['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1864)

In [9]:
print(f'Train / test split size: train set size: {X_train.shape[0]}, test set size: {X_test.shape[0]}')

Train / test split size: train set size: 78439, test set size: 19610


In [4]:
default_clf = cb.CatBoostClassifier(random_state=1864)
default_clf.fit(X_train, y_train, verbose= False)
default_preds = default_clf.predict(X_test)

In [23]:
best_catboost = {'bagging_temperature': 0.2879213699517955,
 'border_count': 128,
 'depth': 10,
 'eval_metric': 'Accuracy',
 'l2_leaf_reg': 5,
 'learning_rate': 0.031731849925222905,
 'loss_function': 'Logloss',
 'random_seed': 1864,
 'random_strength': 59.7185747097968}

In [24]:
tuned_clf = cb.CatBoostClassifier(**opt_params)
tuned_clf.fit(X_train, y_train, verbose=False)
tuned_preds = tuned_clf.predict(X_test)

In [27]:
print(f'Test accuracy of the default catboost model: {accuracy_score(y_test, default_preds)}')
print(f'Test accuracy of the optimal catboost model: {accuracy_score(y_test, tuned_preds)}')

Test accuracy of the default catboost model: 0.7282508924018358
Test accuracy of the optimal catboost model: 0.7291177970423254


In [26]:
#y_test.to_csv('higgs_test_small.csv')
#default_preds.to_csv('higgs_default_preds_small.csv')
#tuned_preds.to_csv('higgs_tuned_preds_small.csv')

## Significance Testing

In [28]:
tuned_preds = pd.read_csv('catboost_optimal_on_small_set.csv', header=None, dtype=int)

In [35]:
default_preds

array([1, 0, 1, ..., 0, 0, 1], dtype=int64)

In [36]:
y_test = y_test.reset_index()['class']
#default_preds = default_preds.reset_index()['class']

In [37]:
mc_data = pd.DataFrame()
mc_data['ground_truth'] = y_test
mc_data['y_pred_cb_default'] = default_preds
mc_data['y_pred_cb_tuned'] = tuned_preds
mc_data.head()

Unnamed: 0,ground_truth,y_pred_cb_default,y_pred_cb_tuned
0,0,1,1
1,0,0,0
2,1,1,1
3,0,0,0
4,1,0,0


In [38]:
mc_data.loc[mc_data['ground_truth']==mc_data['y_pred_cb_default'],'correct_cb_default']="Yes"
mc_data.loc[mc_data['ground_truth']==mc_data['y_pred_cb_tuned'],'correct_cb_tuned']="Yes"
mc_data.loc[mc_data['ground_truth']!=mc_data['y_pred_cb_default'],'correct_cb_default']="No"
mc_data.loc[mc_data['ground_truth']!=mc_data['y_pred_cb_tuned'],'correct_cb_tuned']="No"
mc_data.head()

Unnamed: 0,ground_truth,y_pred_cb_default,y_pred_cb_tuned,correct_cb_default,correct_cb_tuned
0,0,1,1,No,No
1,0,0,0,Yes,Yes
2,1,1,1,Yes,Yes
3,0,0,0,Yes,Yes
4,1,0,0,No,No


In [39]:
nr_corr_default_corr_tuned=0
nr_corr_default_incorr_tuned=0
nr_incorr_default_corr_tuned=0
nr_incorr_default_incorr_tuned=0

for index, row in mc_data.iterrows():
    
    if  row['correct_cb_default']== "Yes" and  row['correct_cb_tuned']=="Yes":
        nr_corr_default_corr_tuned += 1
        
    elif row['correct_cb_default']== "Yes" and  row['correct_cb_tuned']=="No":
        nr_corr_default_incorr_tuned += 1
        
    elif row['correct_cb_default']== "No" and  row['correct_cb_tuned']=="Yes":
        nr_incorr_default_corr_tuned += 1
        
    elif row['correct_cb_default']== "No" and  row['correct_cb_tuned']=="No":
        nr_incorr_default_incorr_tuned += 1

In [40]:
contingency_table_df=pd.DataFrame(data={"nr_correct_cb_default":["Yes/Yes","No/Yes"], 
                                        "nr_incorrect_cb_default":["Yes/No","No/No"]}, 
                                  index=["nr_correct_cb_tuned","nr_incorrect_cb_tuned"])
contingency_table_df

Unnamed: 0,nr_correct_cb_default,nr_incorrect_cb_default
nr_correct_cb_tuned,Yes/Yes,Yes/No
nr_incorrect_cb_tuned,No/Yes,No/No


In [41]:
contingency_table_df.iloc[0,0] = nr_corr_default_corr_tuned
contingency_table_df.iloc[0,1] = nr_incorr_default_corr_tuned
contingency_table_df.iloc[1,0] = nr_corr_default_incorr_tuned
contingency_table_df.iloc[1,1] = nr_incorr_default_incorr_tuned

contingency_table_df

Unnamed: 0,nr_correct_cb_default,nr_incorrect_cb_default
nr_correct_cb_tuned,13648,699
nr_incorrect_cb_tuned,633,4630


In [43]:
contingency_table = [[13648, 699],
                     [633, 4630]]

# Apply McNemar test
result = mcnemar(contingency_table, exact=False, correction=True)
print('Test statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue))
print()
# interpret the p-value
alpha = 0.05
if result.pvalue > alpha:
    print("""Same proportions of errors for the used models (fail to reject H0). 
Therefore the performance difference between two models are not statistically significant.""")

else:
    print("""Different proportions of errors for the used models (reject H0). 
Therefore the performance difference between two models are statistically significant.""")

Test statistic=3.172, p-value=0.075

Same proportions of errors for the used models (fail to reject H0). 
Therefore the performance difference between two models are not statistically significant.
