In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 

from sklearn.multioutput import MultiOutputClassifier
from sklearn.preprocessing import OneHotEncoder, StandardScaler

from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.feature_selection import SelectFromModel

from sklearn.metrics import plot_confusion_matrix, confusion_matrix
from sklearn.metrics import plot_roc_curve, roc_curve, roc_auc_score

from sklearn.pipeline import Pipeline

# GridSearch
from sklearn.model_selection import GridSearchCV

# Recursive Feature Selection 
from sklearn.feature_selection import RFE 

In [2]:
X_train= pd.read_csv('X_train_imputed.csv')
y_train = pd.read_csv('y_train.csv')

X_test = pd.read_csv('X_test_imputed.csv')
y_test = pd.read_csv('y_test.csv')


In [3]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15304 entries, 0 to 15303
Data columns (total 43 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   respondent_id                15304 non-null  int64  
 1   h1n1_concern                 15304 non-null  float64
 2   h1n1_knowledge               15304 non-null  float64
 3   behavioral_antiviral_meds    15304 non-null  float64
 4   behavioral_avoidance         15304 non-null  float64
 5   behavioral_face_mask         15304 non-null  float64
 6   behavioral_wash_hands        15304 non-null  float64
 7   behavioral_large_gatherings  15304 non-null  float64
 8   behavioral_outside_home      15304 non-null  float64
 9   behavioral_touch_face        15304 non-null  float64
 10  doctor_recc_h1n1             15304 non-null  float64
 11  doctor_recc_seasonal         15304 non-null  float64
 12  chronic_med_condition        15304 non-null  float64
 13  child_under_6_mo

In [4]:
X_train=X_train.drop('respondent_id', axis=1)
y_train=y_train.drop('respondent_id', axis=1)

X_test=X_test.drop('respondent_id', axis=1)
y_test=y_test.drop('respondent_id', axis=1)


In [19]:
X_test.shape

(6559, 34)

In [5]:
# One hot encoding for non-binary features 

non_binary = ['h1n1_concern', 'h1n1_knowledge', 'opinion_h1n1_vacc_effective',
'opinion_h1n1_risk', 'opinion_h1n1_sick_from_vacc', 'opinion_seas_vacc_effective',
'opinion_seas_risk', 'opinion_seas_sick_from_vacc']


# X_test data
nb_train = X_train[non_binary]

ohe = OneHotEncoder(drop='first', sparse=False)

dums = ohe.fit_transform(nb_train)

dums_df = pd.DataFrame(dums,
                       columns=ohe.get_feature_names(),
                       index=nb_train.index)




# X_test data

nb_test = X_test[non_binary]

dums_t = ohe.transform(nb_test)

dums_t_df = pd.DataFrame(dums_t,
                       columns=ohe.get_feature_names(),
                       index=nb_test.index)





In [6]:
# concat one hot encoded df and X 

# X_train data
X_train = X_train.drop(non_binary, axis=1)

X_train_ohe = pd.concat([X_train, dums_df], axis=1)

# X_test data
X_test = X_test.drop(non_binary, axis=1)

X_test_ohe = pd.concat([X_test, dums_t_df], axis=1)


In [17]:
X_test_ohe.shape

(6559, 63)

### Baseline Model 

In [7]:
y_train.h1n1_vaccine.value_counts(normalize=True)

0    0.783521
1    0.216479
Name: h1n1_vaccine, dtype: float64

In [8]:
y_train.seasonal_vaccine.value_counts(normalize=True)

0    0.531887
1    0.468113
Name: seasonal_vaccine, dtype: float64

### Logistic Regression

#### Model 1

In [14]:

logreg_h1n1=LogisticRegression(C=1,penalty="l1", class_weight='balanced', max_iter=1000000,
                              solver='saga')

logreg_h1n1.fit(X_train_ohe, y_train.h1n1_vaccine)


LogisticRegression(C=1, class_weight='balanced', max_iter=1000000, penalty='l1',
                   solver='saga')

In [15]:
logreg_h1n1.predict_log_proba(X_test_ohe)

array([[-0.29322716, -1.36984123],
       [-1.74559343, -0.19181614],
       [-1.7108865 , -0.19931171],
       ...,
       [-0.35599052, -1.20557161],
       [-0.28132573, -1.40560945],
       [-1.55138772, -0.23819835]])

In [13]:
lr.fit(X_train_ohe, y_train.h1n1_vaccine)
lr.predict_proba(X_test_ohe)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


array([[0.90610263, 0.09389737],
       [0.46649074, 0.53350926],
       [0.4594126 , 0.5405874 ],
       ...,
       [0.88718811, 0.11281189],
       [0.90517618, 0.09482382],
       [0.47959744, 0.52040256]])

In [11]:


lr = LogisticRegression()

# H1N1 
h1n1 = cross_val_score(estimator=lr, X=X_train_ohe, y=y_train.h1n1_vaccine, 
                cv=5, scoring='roc_auc').mean() 


# Seasonal
sea = cross_val_score(estimator=lr, X=X_train_ohe, y=y_train.seasonal_vaccine, 
                cv=5, scoring='roc_auc').mean() 

print(f'H1N1:{h1n1}')
print(f'Seasonal:{sea}')


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

H1N1:0.8392587424976716
Seasonal:0.8563744952638093


#### Grid Search 

In [10]:
# C, penalty, max_iter, solver, class_weight   

grid={"C":np.logspace(-3,3,7), "penalty":["l1","l2", 'elasticnet', 'none'],
     "max_iter":[100, 1000, 10000, 100000, 1000000], 
      'solver':['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga' ],
     "class_weight":['balanced', None]} # l1 lasso l2 ridge

logreg=LogisticRegression()
logreg_cv=GridSearchCV(logreg,grid,cv=5, scoring='roc_auc')
logreg_cv.fit(X_train,y_train.h1n1_vaccine)


Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 442, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/U

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-pa

Traceback (most recent call last):
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/Users/emikonaomasa/opt/anaconda3/envs/learn-env/lib/python3.8/site-packages/sklearn/linear_model/_logistic.py", line 453, in _check_solver
    raise ValueError(
ValueError: penalty='none' is not supported for the liblinear solver



KeyboardInterrupt: 

#### All binary features model

#### H1N1

{'C': 1.0,
 'class_weight': 'balanced',
 'max_iter': 1000000,
 'penalty': 'l1',
 'solver': 'saga'}
 
 Mean of Cross val ROC_AUC: 0.8396008401858026

In [None]:
# C, penalty, max_iter, solver, class_weight   

grid={"C":np.logspace(-3,3,7), "penalty":["l1","l2", 'elasticnet', 'none'],
     "max_iter":[100, 1000, 10000, 100000, 1000000], 
      'solver':['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga' ],
     "class_weight":['balanced', None]} # l1 lasso l2 ridge

logreg=LogisticRegression()
logreg_cv=GridSearchCV(logreg,grid,cv=5, scoring='roc_auc')
logreg_cv.fit(X_train_ohe,y_train.h1n1_vaccine)



In [None]:
logreg_cv.best_params_

In [None]:
logreg_cv.best_score_

In [None]:
logreg_h1n1=LogisticRegression(C=1,penalty="l1", class_weight='balanced', max_iter=1000000,
                              solver='saga')

logreg_h1n1.fit(X_train_ohe, y_train.h1n1_vaccine)
yhat_proba = logreg_h1n1.predict_proba(X_test_ohe)[:, 1]
print("Accuracy score w/test",logreg_h1n1.score(X_test_ohe, y_test.h1n1_vaccine))
print("ROC_AUC w/test", roc_auc_score(y_test.h1n1_vaccine, yhat_proba) )



#### Seasonal Flu

In [None]:
# C, penalty, max_iter, solver, class_weight   

grid_s={"C":np.logspace(-3,3,7), "penalty":["l1","l2", 'elasticnet', 'none'],
     "max_iter":[100, 1000, 10000, 100000, 1000000], 
      'solver':['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga' ],
     "class_weight":['balanced', None]} # l1 lasso l2 ridge

logreg=LogisticRegression()
logreg_cv=GridSearchCV(logreg,grid_s,cv=5, scoring='roc_auc')
logreg_cv.fit(X_train_ohe,y_train.seasonal_vaccine)



In [None]:
print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)
print("ROC_AUC :",logreg_cv.best_score_)


In [None]:
logreg_cv.best_params_

In [None]:
# C, penalty  
grid={"C":np.logspace(-3,3,7), "penalty":["l1","l2", 'elasticnet', 'none']}# l1 lasso l2 ridge
logreg=LogisticRegression()
logreg_cv=GridSearchCV(logreg,grid,cv=5, scoring='roc_auc')
logreg_cv.fit(X_train_ohe,y_train.h1n1_vaccine)

print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)
print("ROC_AUC :",logreg_cv.best_score_)


tuned hpyerparameters :(best parameters)  {'C': 1.0, 'penalty': 'l2'}
ROC_AUC : 0.8392587424976716

In [None]:
logreg2=LogisticRegression(C=1,penalty="l2")
logreg2.fit(X_train_ohe, y_train.h1n1_vaccine)
yhat_proba = logreg2.predict_proba(X_test_ohe)[:, 1]
print("Accuracy score w/test",logreg2.score(X_test_ohe, y_test.h1n1_vaccine))
print("ROC_AUC w/test", roc_auc_score(y_test.h1n1_vaccine, yhat_proba) )

In [None]:
# max_iter , solver 
grid={"max_iter":[100, 1000, 10000, 100000, 1000000], 
      'solver':['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga' ]}
# use logreg2 
logreg_cv=GridSearchCV(logreg2, grid,cv=5)
logreg_cv.fit(X_train_ohe, y_train.h1n1_vaccine)

print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)
print("ROC_AUC:",logreg_cv.best_score_)


In [None]:
logreg3=LogisticRegression(C=1,penalty="l2", solver="liblinear", max_iter=100) #max_iter=100 is default
logreg3.fit(X_train_ohe, y_train.h1n1_vaccine)
yhat_proba = logreg3.predict_proba(X_test_ohe)[:, 1]
print("Accuracy score w/test",logreg3.score(X_test_ohe, y_test.h1n1_vaccine))
print("ROC_AUC w/test", roc_auc_score(y_test.h1n1_vaccine, yhat_proba) )

In [None]:
# class_weight 

grid={"class_weight":['balanced', None]} # default = None
# use logreg3 
logreg_cv=GridSearchCV(logreg3, grid,cv=5)
logreg_cv.fit(X_train_ohe, y_train.h1n1_vaccine)

print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)
print("ROC_AUC:",logreg_cv.best_score_)


In [None]:
logreg_h1n1=LogisticRegression(C=1,penalty="l2", solver="liblinear", max_iter=100) #max_iter=100 is default
logreg_h1n1.fit(X_train_ohe, y_train.h1n1_vaccine)
yhat_proba = logreg_h1n1.predict_proba(X_test_ohe)[:, 1]
print("Accuracy score w/test",logreg_h1n1.score(X_test_ohe, y_test.h1n1_vaccine))
print("ROC_AUC w/test", roc_auc_score(y_test.h1n1_vaccine, yhat_proba) )

In [None]:
# H1N1 model: C=1,penalty="l2", solver="liblinear", max_iter=100, class_weight=None  

#### Seasonal

In [None]:
# C, penalty 

grid={"C":np.logspace(-3,3,7), "penalty":["l1","l2", 'elasticnet', 'none']}# l1 lasso l2 ridge
logreg=LogisticRegression()
logreg_cv=GridSearchCV(logreg,grid,cv=5, scoring='roc_auc')
logreg_cv.fit(X_train_ohe,y_train.seasonal_vaccine)

print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)
print("ROC_AUC :",logreg_cv.best_score_)


tuned hpyerparameters :(best parameters)  {'C': 10.0, 'penalty': 'l2'}
ROC_AUC : 0.8563812662042194

In [None]:
logreg2=LogisticRegression(C=10,penalty="l2")
logreg2.fit(X_train_ohe, y_train.seasonal_vaccine)
yhat_proba = logreg2.predict_proba(X_test_ohe)[:, 1]
print("Accuracy score w/test",logreg2.score(X_test_ohe, y_test.seasonal_vaccine))
print("ROC_AUC w/test", roc_auc_score(y_test.seasonal_vaccine, yhat_proba) )

In [None]:
# max_iter , solver 
grid={"max_iter":[100, 1000, 10000, 100000, 1000000], 
      'solver':['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga' ]}
# use logreg2 
logreg_cv=GridSearchCV(logreg2, grid,cv=5)
logreg_cv.fit(X_train_ohe, y_train.seasonal_vaccine)

print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)
print("ROC_AUC:",logreg_cv.best_score_)

In [None]:
logreg3=LogisticRegression(C=10,penalty="l2", solver='liblinear', max_iter=100)
logreg3.fit(X_train_ohe, y_train.seasonal_vaccine)
yhat_proba = logreg3.predict_proba(X_test_ohe)[:, 1]
print("Accuracy score w/test",logreg3.score(X_test_ohe, y_test.seasonal_vaccine))
print("ROC_AUC w/test", roc_auc_score(y_test.seasonal_vaccine, yhat_proba) )

In [None]:
# class_weight 

grid={"class_weight":['balanced', None]} # default = None
# use logreg3 
logreg_cv=GridSearchCV(logreg3, grid,cv=5)
logreg_cv.fit(X_train_ohe, y_train.seasonal_vaccine)

print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)
print("ROC_AUC:",logreg_cv.best_score_)

In [None]:
# why class_weight = balanced is the pest parameter ? 

logreg_sea=LogisticRegression(C=10,penalty="l2", solver='liblinear', max_iter=100, class_weight='balanced')
logreg_sea.fit(X_train_ohe, y_train.seasonal_vaccine)
yhat_proba = logreg_sea.predict_proba(X_test_ohe)[:, 1]
print("Accuracy score w/test",logreg_sea.score(X_test_ohe, y_test.seasonal_vaccine))
print("ROC_AUC w/test", roc_auc_score(y_test.seasonal_vaccine, yhat_proba) )

In [None]:
# Seasonal Model: C=10,penalty="l2", solver='liblinear', max_iter=100, class_weight='balanced'