In [1]:
from StarbucksAnalysis.model import pretty_coefficients, expand_grid, fit_logistic, model_outcome
import pandas as pd
import numpy as np
import pickle
import patsy

# Load data

In [2]:
offers = pickle.load(open("processed_data/offers_cleaned.p", "rb"))

# No interaction

In [3]:
lg_no_interaction, formula, exog_train_no_int, exog_val_no_int, _,  y_val_no_int = fit_logistic(offers, ["income_thousands", "gender", "young", "seniority", "social"])

In [4]:
model_outcome(lg_no_interaction, exog_val_no_int, y_val_no_int)

Accuracy:  0.6662656943086986

Confusion matrix
[[5987 1557]
 [2882 2875]]

Calssification report
              precision    recall  f1-score   support

           0       0.68      0.79      0.73      7544
           1       0.65      0.50      0.56      5757

    accuracy                           0.67     13301
   macro avg       0.66      0.65      0.65     13301
weighted avg       0.66      0.67      0.66     13301



In [5]:
pretty_coefficients(lg_no_interaction, exog_train_no_int)

Unnamed: 0,Feature,Coef,odds ratio
0,Intercept,-0.389,0.678
1,[T.Bogo],-0.542,0.581
2,[T.Discount],-0.05,0.951
3,gender[T.Male],-0.197,0.822
4,gender[T.Other],0.371,1.45
5,young[T.True],-0.262,0.769
6,seniority[T.Gen2],0.365,1.441
7,seniority[T.Gen3],-0.633,0.531
8,social[T.True],1.221,3.39
9,income_thousands,0.004,1.004


# Interaction

In [6]:

lg_interaction, formula, exog_train_int, exog_val_int, _, y_val_int = fit_logistic(offers, ["income_thousands", "gender", "young", "seniority", "social"], interaction = True)

In [7]:
pretty_coefficients(lg_interaction, exog_train_int)

Unnamed: 0,Feature,Coef,odds ratio
0,Intercept,0.196,1.216
1,[T.Bogo],-1.901,0.149
2,[T.Discount],-1.371,0.254
3,gender[T.Male]:[Bogo],-0.338,0.713
4,gender[T.Other]:[Bogo],0.531,1.7
5,gender[T.Male]:[Discount],-0.158,0.854
6,gender[T.Other]:[Discount],0.438,1.55
7,gender[T.Male]:[Informational],0.004,1.004
8,gender[T.Other]:[Informational],0.25,1.284
9,young[T.True]:[Bogo],-0.33,0.719


In [8]:
model_outcome(lg_interaction, exog_val_int, y_val_int)

Accuracy:  0.6734831967521239

Confusion matrix
[[5869 1675]
 [2668 3089]]

Calssification report
              precision    recall  f1-score   support

           0       0.69      0.78      0.73      7544
           1       0.65      0.54      0.59      5757

    accuracy                           0.67     13301
   macro avg       0.67      0.66      0.66     13301
weighted avg       0.67      0.67      0.67     13301



# Parameter tunning

The model with interaction terms performs slightly better. We go one step further and tune hyperparameters for that model. Specifically, we'll tune the solver parameter and the tol parameter, the algorithm for optimitazion and the stopping criteria respectivelly

In [9]:

param_grid = {
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'tol': [1e-4, 1e-3, 1e-2]
}


In [10]:
lg_tunning, formula, exog_train_tun, exog_val_tun, X_val, y_val_tun = fit_logistic(offers, ["income_thousands", "gender", "young", "seniority", "social"], interaction = True, param_grid = param_grid)

In [11]:
pretty_coefficients(lg_tunning, exog_train_tun)

Unnamed: 0,Feature,Coef,odds ratio
0,Intercept,0.192,1.211
1,[T.Bogo],-1.913,0.148
2,[T.Discount],-1.374,0.253
3,gender[T.Male]:[Bogo],-0.341,0.711
4,gender[T.Other]:[Bogo],0.462,1.587
5,gender[T.Male]:[Discount],-0.166,0.847
6,gender[T.Other]:[Discount],0.393,1.481
7,gender[T.Male]:[Informational],-0.001,0.999
8,gender[T.Other]:[Informational],0.184,1.202
9,young[T.True]:[Bogo],-0.328,0.72


In [12]:
model_outcome(lg_tunning, exog_val_tun, y_val_tun)

Accuracy:  0.674235019923314

Confusion matrix
[[5884 1660]
 [2673 3084]]

Calssification report
              precision    recall  f1-score   support

           0       0.69      0.78      0.73      7544
           1       0.65      0.54      0.59      5757

    accuracy                           0.67     13301
   macro avg       0.67      0.66      0.66     13301
weighted avg       0.67      0.67      0.67     13301



In [13]:
f1_df = pd.DataFrame({"No interaction": [np.round(f1_score(y_val_no_int, lg_no_interaction.predict(exog_val_no_int)), 2)],
                      "Interaction": [np.round(f1_score(y_val_int, lg_interaction.predict(exog_val_int)), 2)],
                      "Tunned": [np.round(f1_score(y_val_tun, lg_tunning.predict(exog_val_tun)), 2)]})

NameError: name 'f1_score' is not defined

In [None]:
f1_df

There is an issue with patsy: it can not be save, in some circunstances, using the pickle module. The fitted model must be use for some visualizations in the notebook number for. The model will just be fitted again using the same code