In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_auc_score, make_scorer
from sklearn.tree import DecisionTreeClassifier

# Read data and split into train and test sets

In [None]:
X = pd.read_csv('processed_data.csv', index_col = 0)

In [None]:
y = pd.read_csv('training_set_labels.csv',index_col = 0)

In [None]:
prediction_features = pd.read_csv('processed_pred.csv',index_col = 0)

In [None]:
y.head()

In [None]:
y = y.drop('seasonal_vaccine', axis = 1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
grid = GridSearchCV(AdaBoostClassifier(), param_grid={'n_estimators': [300, 350, 400], 'learning_rate' : [0.5,1,2]},scoring='roc_auc', cv=5)

In [None]:
grid.fit(X_train,y_train.values.ravel())

In [None]:
grid_results = pd.DataFrame(grid.cv_results_)

In [None]:
grid_results

In [None]:
ada_model = AdaBoostClassifier(n_estimators = 400, learning_rate = 0.5)

In [None]:
ada_model.fit(X_train,y_train.values.ravel())

In [None]:
for feat, importance in zip(X_train.columns, ada_model.feature_importances_):
    print('feature: {f}, importance: {i}'.format(f=feat, i=importance))

In [None]:
cross_val_score(ada_model, X_train, y_train.values.ravel(), scoring='roc_auc', cv=10)

In [None]:
prediction = pd.DataFrame(ada_model.predict_proba(prediction_features))

In [None]:
prediction.to_csv('H1N1_pred.csv')