Notes on MLFlow:
- 'Run name' field: model name, type of output (multilabel vs unilabel), which vaccine (for multiclass only)
- 'Parameters' field: methods applied for data (data cleaning, data balancing, hyperparameters)--insert feature engineering info here, if relevant?
- 'Tags' field: details about the features used for the run (is one of the vaccines in the features?)

In [None]:
import sys
# adding to the path variables the one folder higher (locally, not changing system variables)
sys.path.append("..")
import pandas as pd
import numpy as np
import warnings

#mlflow import
import mlflow
from modeling.config import EXPERIMENT_NAME_multilabel, EXPERIMENT_NAME_h1n1, EXPERIMENT_NAME_seasonal, EXPERIMENT_NAME_multiclass
TRACKING_URI = open("../.mlflow_uri").read().strip()

RSEED = 42

# Modeling Libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

!pip install plotly
import plotly.express as px


from sklearn.dummy import DummyClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import cross_val_predict, cross_val_score, cross_validate
from sklearn.metrics import roc_curve, confusion_matrix, accuracy_score, recall_score, precision_score, f1_score, roc_auc_score

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn import svm

warnings.filterwarnings('ignore')

# for resampling (data balancing)
from sklearn.utils import resample
!pip install -U imbalanced-learn
from imblearn.over_sampling import SMOTE

Import data from a previously prepared dataframe:

In [None]:
df = pd.read_csv('../data/Flu_Shot_Data_cleaned_2.csv')

In [None]:
df.head()

Dropping the strange 'Unnamed: 0' column:

In [None]:
df.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
df.shape

# Set up for modelling (stays the same for all experiemnts)

Set up of pipeline preprocessor:

In [None]:
# Pipeline for categorical features
# This stays the same for everything
cat_pipeline = Pipeline([
    ('1hot', OneHotEncoder(handle_unknown='error', drop='first'))
])

In [None]:
cat_features = list(df.columns)

Instantiating the models:

In [None]:
# for Logistic Regression
logreg = LogisticRegression()

# for KNN
knn= KNeighborsClassifier()

# for Random Forest
rand_forst= RandomForestClassifier()

# for SVM
svm= svm.SVC(kernel='rbf')

# TheFluShot_multilabel: Multilabel prediction (both vaccinations)

Removal of target variables from cat_features list (this needs to be adjusted for each dataset):

In [None]:
cat_features_no_vacc = cat_features.copy()

In [None]:
cat_features_no_vacc.remove('h1n1_vaccine')

In [None]:
cat_features_no_vacc.remove('seasonal_vaccine')

In [None]:
cat_features_no_vacc

Rename the features and target to 'X' and 'y', to make the test-train split easier:

In [None]:
y_both_vacc = df[['h1n1_vaccine', 'seasonal_vaccine']]#.copy

In [None]:
#y_both_vacc = y_both_vacc.to_numpy()
y_both_vacc

In [None]:
#NB: dropping the 'h1n1_vaccine' and 'seasonal_vaccine' columns
X_no_vacc = df.drop(columns=['h1n1_vaccine', 'seasonal_vaccine'])

Performing test-train split (the same data can be used for each model in multilabelling):

In [None]:
X_no_vacc_train, X_no_vacc_test, y_both_vacc_train, y_both_vacc_test = train_test_split(X_no_vacc, y_both_vacc, stratify = y_both_vacc, test_size=0.2, random_state=RSEED)

In [None]:
print('X_no_vacc_train shape:', X_no_vacc_train.shape)
print('X_no_vacc_test shape:', X_no_vacc_test.shape)
print('y_both_vacc_train:', y_both_vacc_train.shape)
print('y_both_vacc_test:', y_both_vacc_test.shape)

Setting up the preprocessor (the same one can be used for each modelling in multilabelling):

In [None]:
preprocessor = ColumnTransformer([
    ('cat', cat_pipeline, cat_features_no_vacc)
])

Setting up the multilabel estimators for each model:

In [None]:
# for logistic regression
multilabel_est_logreg = MultiOutputClassifier(
    estimator=logreg
)

# for KNN
multilabel_est_knn= MultiOutputClassifier(
    estimator=knn
)

# for Random Forest
multilabel_est_rand_forst= MultiOutputClassifier(
    estimator=rand_forst
)


# for SVM
multilabel_est_SVC= MultiOutputClassifier(
    estimator=svm
)

Setting up the pipeline for each model:

In [None]:
# for logreg
logreg_multilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", multilabel_est_logreg),
])

# for KNN
knn_multilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", multilabel_est_knn),
])

# for Random Forest

rand_forst_multilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", multilabel_est_rand_forst),
])

#for SVM
svm_multilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", multilabel_est_SVC),
])

### Data Balancing

https://towardsdatascience.com/methods-for-dealing-with-imbalanced-data-5b761be45a18

Since the H1N1 label is unbalanced, three approaches will be tried to balance it:

1. Oversampling of the minority class (h1n1_vaccine == 1)
2. Undersampling of the majority class (h1n1_vaccine == 0)

**Creating concatenated training dataframe and separating into minority and majority class  (for data balancing)**

In [None]:
# concatenating our train data back together

concat_train_df = pd.concat([X_no_vacc_train, y_both_vacc_train], axis = 1)

# separating into minority and majority classes

# majority class
no_h1n1_vacc = concat_train_df[concat_train_df.h1n1_vaccine==0]

# minority class
yes_h1n1_vacc = concat_train_df[concat_train_df.h1n1_vaccine==1]


In [None]:
yes_h1n1_vacc.head()

**1. Oversampling of the minority class (upsampling)**

In [None]:
#upsample minority class
yes_h1n1_vacc_upsampled = resample(yes_h1n1_vacc,
                                   replace = True,
                                   n_samples = len(no_h1n1_vacc),
                                   random_state = RSEED)

In [None]:
# combine majority and upsampled minority

upsampled = pd.concat([no_h1n1_vacc, yes_h1n1_vacc_upsampled])

In [None]:
# checking new class counts
upsampled.h1n1_vaccine.value_counts()

In [None]:
upsampled.head()

In [None]:
# relabelling the upsampled data as train dataset

y_multi_vacc_upsamp_train = upsampled[['h1n1_vaccine', 'seasonal_vaccine']]

In [None]:
X_no_vacc_upsamp_train =upsampled.drop(['h1n1_vaccine','seasonal_vaccine'], axis = 1)

In [None]:
X_no_vacc_upsamp_train

**2. Undersampling of majority class (downsampling)**

In [None]:
# downsample minority class
no_h1n1_vacc_downsampled = resample(no_h1n1_vacc,
                                   replace = False,
                                   n_samples = len(yes_h1n1_vacc),
                                   random_state = RSEED)

In [None]:
# combine minority and downsampled majority

downsampled = pd.concat([no_h1n1_vacc_downsampled, yes_h1n1_vacc])

In [None]:
# checking new class counts
downsampled.h1n1_vaccine.value_counts()

In [None]:
# relabelling the downsampled data as train dataset

y_multi_vacc_downsamp_train = downsampled[['h1n1_vaccine', 'seasonal_vaccine']]

X_no_vacc_downsamp_train =downsampled.drop(['h1n1_vaccine','seasonal_vaccine'], axis = 1)

---

Convert target variable to array, otherwise the evaluation metrics fail:

In [None]:
y_both_vacc_train = y_both_vacc_train.to_numpy()

In [None]:
y_both_vacc_test = y_both_vacc_test.to_numpy()

### Model training and predictions (run predictions immediately after training the model!)

No resampling--fitting model:

In [None]:
# for logreg
logreg_multilabel = logreg_multilabel_pipeline.fit(X_no_vacc_train,  y_both_vacc_train)

# for KNN
knn_multilabel = knn_multilabel_pipeline.fit(X_no_vacc_train,  y_both_vacc_train)

# for Random Forest
rand_forst_multilabel = rand_forst_multilabel_pipeline.fit(X_no_vacc_train,  y_both_vacc_train)

#for SVM
svm_multilabel = svm_multilabel_pipeline.fit(X_no_vacc_train,  y_both_vacc_train)

In [None]:
# Figure out later what this does and if we want to use it


#y_train_predicted = cross_val_predict(full_pipeline, X_train, y_train, cv=5)

No resampling--making predictions:

In [None]:
# for logreg
logreg_multilabel_trainpreds = logreg_multilabel.predict(X_no_vacc_train)
logreg_multilabel_testpreds = logreg_multilabel.predict(X_no_vacc_test)

# for KNN
knn_multilabel_trainpreds = knn_multilabel.predict(X_no_vacc_train)
knn_multilabel_testpreds = knn_multilabel.predict(X_no_vacc_test)

# for Random Forest
rand_forst_multilabel_trainpreds = rand_forst_multilabel.predict(X_no_vacc_train)
rand_forst_multilabel_testpreds = rand_forst_multilabel.predict(X_no_vacc_test)

# for SVM
svm_multilabel_trainpreds = svm_multilabel.predict(X_no_vacc_train)
svm_multilabel_testpreds = svm_multilabel.predict(X_no_vacc_test)

Upsampling--fitting model:

In [None]:
# for logreg
logreg_upsamp_multilabel = logreg_multilabel_pipeline.fit(X_no_vacc_upsamp_train, y_multi_vacc_upsamp_train)

# for KNN
knn_upsamp_multilabel = knn_multilabel_pipeline.fit(X_no_vacc_upsamp_train, y_multi_vacc_upsamp_train)

# for Random Forest
rand_forst_upsamp_multilabel = rand_forst_multilabel_pipeline.fit(X_no_vacc_upsamp_train, y_multi_vacc_upsamp_train)

#for SVM
svm_upsamp_multilabel = svm_multilabel_pipeline.fit(X_no_vacc_upsamp_train, y_multi_vacc_upsamp_train)

Upsampling--making predictions:

In [None]:
# for logreg
logreg_upsamp_multilabel_trainpreds = logreg_upsamp_multilabel.predict(X_no_vacc_train)
logreg_upsamp_multilabel_testpreds = logreg_upsamp_multilabel.predict(X_no_vacc_test)

# for KNN
knn_upsamp_multilabel_trainpreds = knn_upsamp_multilabel.predict(X_no_vacc_train)
knn_upsamp_multilabel_testpreds = knn_upsamp_multilabel.predict(X_no_vacc_test)

# for Random Forest
rand_forst_upsamp_multilabel_trainpreds = rand_forst_upsamp_multilabel.predict(X_no_vacc_train)
rand_forst_upsamp_multilabel_testpreds = rand_forst_upsamp_multilabel.predict(X_no_vacc_test)

# for SVM
svm_upsamp_multilabel_trainpreds = svm_upsamp_multilabel.predict(X_no_vacc_train)
svm_upsamp_multilabel_testpreds = svm_upsamp_multilabel.predict(X_no_vacc_test)

Downsampling--fitting model:

In [None]:
# for logreg
logreg_downsamp_multilabel = logreg_multilabel_pipeline.fit(X_no_vacc_downsamp_train, y_multi_vacc_downsamp_train)

# for KNN
knn_downsamp_multilabel = knn_multilabel_pipeline.fit(X_no_vacc_downsamp_train, y_multi_vacc_downsamp_train)

# for Random Forest
rand_forst_downsamp_multilabel = rand_forst_multilabel_pipeline.fit(X_no_vacc_downsamp_train, y_multi_vacc_downsamp_train)

#for SVM
svm_downsamp_multilabel = svm_multilabel_pipeline.fit(X_no_vacc_downsamp_train, y_multi_vacc_downsamp_train)

Downsampling--making predictions:

In [None]:
# for logreg
logreg_downsamp_multilabel_trainpreds = logreg_downsamp_multilabel.predict(X_no_vacc_train)
logreg_downsamp_multilabel_testpreds = logreg_downsamp_multilabel.predict(X_no_vacc_test)

# for KNN
knn_downsamp_multilabel_trainpreds = knn_downsamp_multilabel.predict(X_no_vacc_train)
knn_downsamp_multilabel_testpreds = knn_downsamp_multilabel.predict(X_no_vacc_test)

# for Random Forest
rand_forst_downsamp_multilabel_trainpreds = rand_forst_downsamp_multilabel.predict(X_no_vacc_train)
rand_forst_downsamp_multilabel_testpreds = rand_forst_downsamp_multilabel.predict(X_no_vacc_test)

# for SVM
svm_downsamp_multilabel_trainpreds = svm_downsamp_multilabel.predict(X_no_vacc_train)
svm_downsamp_multilabel_testpreds = svm_downsamp_multilabel.predict(X_no_vacc_test)

### Model evaluation

#### Train data--no resampling

In [None]:
# Logreg--Train data Evaluation Metrics for H1N1 Vaccines
h1n1_logreg_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], logreg_multilabel_trainpreds[:, 0])
h1n1_logreg_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], logreg_multilabel_trainpreds[:, 0])
h1n1_logreg_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], logreg_multilabel_trainpreds[:, 0])
h1n1_logreg_multilabel_train_f0 = f0_score(y_both_vacc_train[:, 0], logreg_multilabel_trainpreds[:, 0])
h1n1_logreg_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], logreg_multilabel_trainpreds[:, 0])

In [None]:
# Logreg--Train data Evaluation Metrics for seasonal Vaccines
seasonal_logreg_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], logreg_multilabel_trainpreds[:, 1])
seasonal_logreg_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], logreg_multilabel_trainpreds[:, 1])
seasonal_logreg_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], logreg_multilabel_trainpreds[:, 1])
seasonal_logreg_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], logreg_multilabel_trainpreds[:, 1])
seasonal_logreg_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], logreg_multilabel_trainpreds[:, 1])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_train[:, 1], logreg_multilabel_trainpreds[:, 1])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_train[:, 1], logreg_multilabel_trainpreds[:, 1])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_train[:, 1], logreg_multilabel_trainpreds[:, 1])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_train[:, 1], logreg_multilabel_trainpreds[:, 1])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_train[:, 1], logreg_multilabel_trainpreds[:, 1])))

In [None]:
# KNN--Train data evaluation Metrics for H1N1 Vaccines
h1n1_knn_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], knn_multilabel_trainpreds[:, 0])
h1n1_knn_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], knn_multilabel_trainpreds[:, 0])
h1n1_knn_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], knn_multilabel_trainpreds[:, 0])
h1n1_knn_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 0], knn_multilabel_trainpreds[:, 0])
h1n1_knn_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], knn_multilabel_trainpreds[:, 0])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_train[:, 0], knn_multilabel_trainpreds[:, 0])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_train[:, 0], knn_multilabel_trainpreds[:, 0])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_train[:, 0], knn_multilabel_trainpreds[:, 0])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_train[:, 0], knn_multilabel_trainpreds[:, 0])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_train[:, 0], knn_multilabel_trainpreds[:, 0])))

In [None]:
# KNN--Train data Evaluation Metrics for seasonal Vaccines
seasonal_knn_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], knn_multilabel_trainpreds[:, 1])
seasonal_knn_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], knn_multilabel_trainpreds[:, 1])
seasonal_knn_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], knn_multilabel_trainpreds[:, 1])
seasonal_knn_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], knn_multilabel_trainpreds[:, 1])
seasonal_knn_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], knn_multilabel_trainpreds[:, 1])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_train[:, 1], knn_multilabel_trainpreds[:, 1])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_train[:, 1], knn_multilabel_trainpreds[:, 1])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_train[:, 1], knn_multilabel_trainpreds[:, 1])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_train[:, 1], knn_multilabel_trainpreds[:, 1])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_train[:, 1], knn_multilabel_trainpreds[:, 1])))

In [None]:
# Random Forest--Train data evaluation Metrics for H1N1 Vaccines
# THESE RESULTS ARE SLIGHTLY OFF FROM PREVIOUS NOTEBOOK (BY 0.01)--CHECK WHAT'S AT PLAY. VARIATION COMING FROM TEST-TRAIN SPLIT? EXPORT TEST-TRAIN DATA AS CSV AND TRY AGAIN
#IS IT BECAUSE IT'S RANDOM FOREST? DIFFERENT STARTING STUMP EACH TIME?
h1n1_rand_forst_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], rand_forst_multilabel_trainpreds[:, 0])
h1n1_rand_forst_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], rand_forst_multilabel_trainpreds[:, 0])
h1n1_rand_forst_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], rand_forst_multilabel_trainpreds[:, 0])
h1n1_rand_forst_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 0], rand_forst_multilabel_trainpreds[:, 0])
h1n1_rand_forst_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], rand_forst_multilabel_trainpreds[:, 0])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_train[:, 0], rand_forst_multilabel_trainpreds[:, 0])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_train[:, 0], rand_forst_multilabel_trainpreds[:, 0])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_train[:, 0], rand_forst_multilabel_trainpreds[:, 0])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_train[:, 0], rand_forst_multilabel_trainpreds[:, 0])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_train[:, 0], rand_forst_multilabel_trainpreds[:, 0])))

In [None]:
# Random Forest--Train data Evaluation Metrics for seasonal Vaccines
# THESE RESULTS ARE SLIGHTLY OFF FROM PREVIOUS NOTEBOOK (BY 0.01)--CHECK WHAT'S AT PLAY. VARIATION COMING FROM TEST-TRAIN SPLIT? EXPORT TEST-TRAIN DATA AS CSV AND TRY AGAIN
#IS IT BECAUSE IT'S RANDOM FOREST? DIFFERENT STARTING STUMP EACH TIME?
seasonal_rand_forst_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], rand_forst_multilabel_trainpreds[:, 1])
seasonal_rand_forst_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], rand_forst_multilabel_trainpreds[:, 1])
seasonal_rand_forst_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], rand_forst_multilabel_trainpreds[:, 1])
seasonal_rand_forst_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], rand_forst_multilabel_trainpreds[:, 1])
seasonal_rand_forst_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], rand_forst_multilabel_trainpreds[:, 1])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_train[:, 1], rand_forst_multilabel_trainpreds[:, 1])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_train[:, 1], rand_forst_multilabel_trainpreds[:, 1])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_train[:, 1], rand_forst_multilabel_trainpreds[:, 1])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_train[:, 1], rand_forst_multilabel_trainpreds[:, 1])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_train[:, 1], rand_forst_multilabel_trainpreds[:, 1])))

In [None]:
# SVM--Train data evaluation Metrics for H1N1 Vaccines
h1n1_svm_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], svm_multilabel_trainpreds[:, 0])
h1n1_svm_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], svm_multilabel_trainpreds[:, 0])
h1n1_svm_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], svm_multilabel_trainpreds[:, 0])
h1n1_svm_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 0], svm_multilabel_trainpreds[:, 0])
h1n1_svm_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], svm_multilabel_trainpreds[:, 0])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_train[:, 0], svm_multilabel_trainpreds[:, 0])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_train[:, 0], svm_multilabel_trainpreds[:, 0])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_train[:, 0], svm_multilabel_trainpreds[:, 0])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_train[:, 0], svm_multilabel_trainpreds[:, 0])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_train[:, 0], svm_multilabel_trainpreds[:, 0])))

In [None]:
# SVM--Train data Evaluation Metrics for seasonal Vaccines
seasonal_svm_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], svm_multilabel_trainpreds[:, 1])
seasonal_svm_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], svm_multilabel_trainpreds[:, 1])
seasonal_svm_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], svm_multilabel_trainpreds[:, 1])
seasonal_svm_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], svm_multilabel_trainpreds[:, 1])
seasonal_svm_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], svm_multilabel_trainpreds[:, 1])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_train[:, 1], svm_multilabel_trainpreds[:, 1])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_train[:, 1], svm_multilabel_trainpreds[:, 1])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_train[:, 1], svm_multilabel_trainpreds[:, 1])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_train[:, 1], svm_multilabel_trainpreds[:, 1])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_train[:, 1], svm_multilabel_trainpreds[:, 1])))

#### Test data--no resampling

In [None]:
# Logreg--Test data evaluation Metrics for H1N1 Vaccines
h1n1_logreg_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], logreg_multilabel_testpreds[:, 0])
h1n1_logreg_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], logreg_multilabel_testpreds[:, 0])
h1n1_logreg_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], logreg_multilabel_testpreds[:, 0])
h1n1_logreg_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], logreg_multilabel_testpreds[:, 0])
h1n1_logreg_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], logreg_multilabel_testpreds[:, 0])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_test[:, 0], logreg_multilabel_testpreds[:, 0])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_test[:, 0], logreg_multilabel_testpreds[:, 0])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_test[:, 0], logreg_multilabel_testpreds[:, 0])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_test[:, 0], logreg_multilabel_testpreds[:, 0])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_test[:, 0], logreg_multilabel_testpreds[:, 0])))

In [None]:
# Logreg--Test data Evaluation Metrics for seasonal Vaccines
seasonal_logreg_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])
seasonal_logreg_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])
seasonal_logreg_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])
seasonal_logreg_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])
seasonal_logreg_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])))

In [None]:
# KNN--Test data evaluation Metrics for H1N1 Vaccines
h1n1_knn_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], knn_multilabel_testpreds[:, 0])
h1n1_knn_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], knn_multilabel_testpreds[:, 0])
h1n1_knn_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], knn_multilabel_testpreds[:, 0])
h1n1_knn_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], knn_multilabel_testpreds[:, 0])
h1n1_knn_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], knn_multilabel_testpreds[:, 0])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_test[:, 0], knn_multilabel_testpreds[:, 0])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_test[:, 0], knn_multilabel_testpreds[:, 0])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_test[:, 0], knn_multilabel_testpreds[:, 0])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_test[:, 0], knn_multilabel_testpreds[:, 0])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_test[:, 0], knn_multilabel_testpreds[:, 0])))

In [None]:
# KNN--Test data Evaluation Metrics for seasonal Vaccines
seasonal_knn_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], knn_multilabel_testpreds[:, 1])
seasonal_knn_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], knn_multilabel_testpreds[:, 1])
seasonal_knn_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], knn_multilabel_testpreds[:, 1])
seasonal_knn_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], knn_multilabel_testpreds[:, 1])
seasonal_knn_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], knn_multilabel_testpreds[:, 1])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_test[:, 1], knn_multilabel_testpreds[:, 1])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_test[:, 1], knn_multilabel_testpreds[:, 1])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_test[:, 1], knn_multilabel_testpreds[:, 1])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_test[:, 1], knn_multilabel_testpreds[:, 1])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_test[:, 1], knn_multilabel_testpreds[:, 1])))

In [None]:
# Random Forest--Test data evaluation Metrics for H1N1 Vaccines
# THESE RESULTS ARE SLIGHTLY OFF FROM PREVIOUS NOTEBOOK (BY 0.01)--CHECK WHAT'S AT PLAY. VARIATION COMING FROM TEST-TRAIN SPLIT? EXPORT TEST-TRAIN DATA AS CSV AND TRY AGAIN
#IS IT BECAUSE IT'S RANDOM FOREST? DIFFERENT STARTING STUMP EACH TIME?
h1n1_rand_forst_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], rand_forst_multilabel_testpreds[:, 0])
h1n1_rand_forst_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], rand_forst_multilabel_testpreds[:, 0])
h1n1_rand_forst_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], rand_forst_multilabel_testpreds[:, 0])
h1n1_rand_forst_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], rand_forst_multilabel_testpreds[:, 0])
h1n1_rand_forst_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], rand_forst_multilabel_testpreds[:, 0])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_test[:, 0], rand_forst_multilabel_testpreds[:, 0])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_test[:, 0], rand_forst_multilabel_testpreds[:, 0])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_test[:, 0], rand_forst_multilabel_testpreds[:, 0])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_test[:, 0], rand_forst_multilabel_testpreds[:, 0])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_test[:, 0], rand_forst_multilabel_testpreds[:, 0])))

In [None]:
# Random Forest--Test data Evaluation Metrics for seasonal Vaccines
# THESE RESULTS ARE SLIGHTLY OFF FROM PREVIOUS NOTEBOOK (BY 0.01)--CHECK WHAT'S AT PLAY. VARIATION COMING FROM TEST-TRAIN SPLIT? EXPORT TEST-TRAIN DATA AS CSV AND TRY AGAIN
#IS IT BECAUSE IT'S RANDOM FOREST? DIFFERENT STARTING STUMP EACH TIME?
seasonal_rand_forst_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], rand_forst_multilabel_testpreds[:, 1])
seasonal_rand_forst_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], rand_forst_multilabel_testpreds[:, 1])
seasonal_rand_forst_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], rand_forst_multilabel_testpreds[:, 1])
seasonal_rand_forst_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], rand_forst_multilabel_testpreds[:, 1])
seasonal_rand_forst_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], rand_forst_multilabel_testpreds[:, 1])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_test[:, 1], rand_forst_multilabel_testpreds[:, 1])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_test[:, 1], rand_forst_multilabel_testpreds[:, 1])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_test[:, 1], rand_forst_multilabel_testpreds[:, 1])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_test[:, 1], rand_forst_multilabel_testpreds[:, 1])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_test[:, 1], rand_forst_multilabel_testpreds[:, 1])))

In [None]:
# SVM--Test data evaluation Metrics for H1N1 Vaccines
h1n1_svm_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], svm_multilabel_testpreds[:, 0])
h1n1_svm_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], svm_multilabel_testpreds[:, 0])
h1n1_svm_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], svm_multilabel_testpreds[:, 0])
h1n1_svm_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], svm_multilabel_testpreds[:, 0])
h1n1_svm_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], svm_multilabel_testpreds[:, 0])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_test[:, 0], svm_multilabel_testpreds[:, 0])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_test[:, 0], svm_multilabel_testpreds[:, 0])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_test[:, 0], svm_multilabel_testpreds[:, 0])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_test[:, 0], svm_multilabel_testpreds[:, 0])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_test[:, 0], svm_multilabel_testpreds[:, 0])))

In [None]:
# SVM--Test data Evaluation Metrics for seasonal Vaccines
seasonal_svm_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], svm_multilabel_testpreds[:, 1])
seasonal_svm_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], svm_multilabel_testpreds[:, 1])
seasonal_svm_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], svm_multilabel_testpreds[:, 1])
seasonal_svm_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], svm_multilabel_testpreds[:, 1])
seasonal_svm_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], svm_multilabel_testpreds[:, 1])

#print("Accuracy: {:.2f}".format(accuracy_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])))
#print("Recall: {:.2f}".format(recall_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])))
#print("Precision: {:.2f}".format(precision_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])))
#print("F1: {:.2f}".format(f1_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])))
#print("ROC: {:.2f}".format(roc_auc_score(y_both_vacc_test[:, 1], logreg_multilabel_testpreds[:, 1])))

In [None]:
y_both_vacc_train = y_both_vacc_train.to_numpy()

#### Train data--upsampling

In [None]:
# Logreg--Train data Evaluation Metrics for H1N1 Vaccines
h1n1_logreg_upsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], logreg_upsamp_multilabel_trainpreds[:, 0])
h1n1_logreg_upsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], logreg_upsamp_multilabel_trainpreds[:, 0])
h1n1_logreg_upsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], logreg_upsamp_multilabel_trainpreds[:, 0])
h1n1_logreg_upsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 0], logreg_upsamp_multilabel_trainpreds[:, 0])
h1n1_logreg_upsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], logreg_upsamp_multilabel_trainpreds[:, 0])

In [None]:
# Logreg--Train data Evaluation Metrics for seasonal Vaccines
seasonal_logreg_upsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], logreg_upsamp_multilabel_trainpreds[:, 1])
seasonal_logreg_upsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], logreg_upsamp_multilabel_trainpreds[:, 1])
seasonal_logreg_upsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], logreg_upsamp_multilabel_trainpreds[:, 1])
seasonal_logreg_upsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], logreg_upsamp_multilabel_trainpreds[:, 1])
seasonal_logreg_upsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], logreg_upsamp_multilabel_trainpreds[:, 1])

In [None]:
# KNN--Train data Evaluation Metrics for H1N1 Vaccines
h1n1_knn_upsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], knn_upsamp_multilabel_trainpreds[:, 0])
h1n1_knn_upsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], knn_upsamp_multilabel_trainpreds[:, 0])
h1n1_knn_upsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], knn_upsamp_multilabel_trainpreds[:, 0])
h1n1_knn_upsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 0], knn_upsamp_multilabel_trainpreds[:, 0])
h1n1_knn_upsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], knn_upsamp_multilabel_trainpreds[:, 0])

In [None]:
# KNN--Train data Evaluation Metrics for seasonal Vaccines
seasonal_knn_upsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], knn_upsamp_multilabel_trainpreds[:, 1])
seasonal_knn_upsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], knn_upsamp_multilabel_trainpreds[:, 1])
seasonal_knn_upsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], knn_upsamp_multilabel_trainpreds[:, 1])
seasonal_knn_upsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], knn_upsamp_multilabel_trainpreds[:, 1])
seasonal_knn_upsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], knn_upsamp_multilabel_trainpreds[:, 1])

In [None]:
# Random forest--Train data Evaluation Metrics for H1N1 Vaccines
h1n1_rand_forst_upsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], rand_forst_upsamp_multilabel_trainpreds[:, 0])
h1n1_rand_forst_upsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], rand_forst_upsamp_multilabel_trainpreds[:, 0])
h1n1_rand_forst_upsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], rand_forst_upsamp_multilabel_trainpreds[:, 0])
h1n1_rand_forst_upsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 0], rand_forst_upsamp_multilabel_trainpreds[:, 0])
h1n1_rand_forst_upsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], rand_forst_upsamp_multilabel_trainpreds[:, 0])

In [None]:
# Random forest--Train data Evaluation Metrics for seasonal Vaccines
seasonal_rand_forst_upsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], rand_forst_upsamp_multilabel_trainpreds[:, 1])
seasonal_rand_forst_upsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], rand_forst_upsamp_multilabel_trainpreds[:, 1])
seasonal_rand_forst_upsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], rand_forst_upsamp_multilabel_trainpreds[:, 1])
seasonal_rand_forst_upsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], rand_forst_upsamp_multilabel_trainpreds[:, 1])
seasonal_rand_forst_upsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], rand_forst_upsamp_multilabel_trainpreds[:, 1])

In [None]:
# SVM--Train data Evaluation Metrics for H1N1 Vaccines
h1n1_svm_upsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], svm_upsamp_multilabel_trainpreds[:, 0])
h1n1_svm_upsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], svm_upsamp_multilabel_trainpreds[:, 0])
h1n1_svm_upsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], svm_upsamp_multilabel_trainpreds[:, 0])
h1n1_svm_upsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 0], svm_upsamp_multilabel_trainpreds[:, 0])
h1n1_svm_upsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], svm_upsamp_multilabel_trainpreds[:, 0])

In [None]:
# SVM--Train data Evaluation Metrics for seasonal Vaccines
seasonal_svm_upsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], svm_upsamp_multilabel_trainpreds[:, 1])
seasonal_svm_upsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], svm_upsamp_multilabel_trainpreds[:, 1])
seasonal_svm_upsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], svm_upsamp_multilabel_trainpreds[:, 1])
seasonal_svm_upsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], svm_upsamp_multilabel_trainpreds[:, 1])
seasonal_svm_upsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], svm_upsamp_multilabel_trainpreds[:, 1])

#### Test data--upsampling

In [None]:
# Logreg--Test data Evaluation Metrics for H1N1 Vaccines
h1n1_logreg_upsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], logreg_upsamp_multilabel_testpreds[:, 0])
h1n1_logreg_upsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], logreg_upsamp_multilabel_testpreds[:, 0])
h1n1_logreg_upsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], logreg_upsamp_multilabel_testpreds[:, 0])
h1n1_logreg_upsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], logreg_upsamp_multilabel_testpreds[:, 0])
h1n1_logreg_upsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], logreg_upsamp_multilabel_testpreds[:, 0])

In [None]:
# Logreg--Test data Evaluation Metrics for seasonal Vaccines
seasonal_logreg_upsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], logreg_upsamp_multilabel_testpreds[:, 1])
seasonal_logreg_upsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], logreg_upsamp_multilabel_testpreds[:, 1])
seasonal_logreg_upsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], logreg_upsamp_multilabel_testpreds[:, 1])
seasonal_logreg_upsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], logreg_upsamp_multilabel_testpreds[:, 1])
seasonal_logreg_upsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], logreg_upsamp_multilabel_testpreds[:, 1])

In [None]:
# KNN--Test data Evaluation Metrics for H1N1 Vaccines
h1n1_knn_upsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], knn_upsamp_multilabel_testpreds[:, 0])
h1n1_knn_upsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], knn_upsamp_multilabel_testpreds[:, 0])
h1n1_knn_upsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], knn_upsamp_multilabel_testpreds[:, 0])
h1n1_knn_upsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], knn_upsamp_multilabel_testpreds[:, 0])
h1n1_knn_upsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], knn_upsamp_multilabel_testpreds[:, 0])

In [None]:
# KNN--Test data Evaluation Metrics for seasonal Vaccines
seasonal_knn_upsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], knn_upsamp_multilabel_testpreds[:, 1])
seasonal_knn_upsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], knn_upsamp_multilabel_testpreds[:, 1])
seasonal_knn_upsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], knn_upsamp_multilabel_testpreds[:, 1])
seasonal_knn_upsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], knn_upsamp_multilabel_testpreds[:, 1])
seasonal_knn_upsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], knn_upsamp_multilabel_testpreds[:, 1])

In [None]:
# Random forest--Test data Evaluation Metrics for H1N1 Vaccines
h1n1_rand_forst_upsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], rand_forst_upsamp_multilabel_testpreds[:, 0])
h1n1_rand_forst_upsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], rand_forst_upsamp_multilabel_testpreds[:, 0])
h1n1_rand_forst_upsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], rand_forst_upsamp_multilabel_testpreds[:, 0])
h1n1_rand_forst_upsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], rand_forst_upsamp_multilabel_testpreds[:, 0])
h1n1_rand_forst_upsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], rand_forst_upsamp_multilabel_testpreds[:, 0])

In [None]:
# Random forest--Test data Evaluation Metrics for seasonal Vaccines
seasonal_rand_forst_upsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], rand_forst_upsamp_multilabel_testpreds[:, 1])
seasonal_rand_forst_upsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], rand_forst_upsamp_multilabel_testpreds[:, 1])
seasonal_rand_forst_upsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], rand_forst_upsamp_multilabel_testpreds[:, 1])
seasonal_rand_forst_upsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], rand_forst_upsamp_multilabel_testpreds[:, 1])
seasonal_rand_forst_upsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], rand_forst_upsamp_multilabel_testpreds[:, 1])

In [None]:
# SVM--Test data Evaluation Metrics for H1N1 Vaccines
h1n1_svm_upsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], svm_upsamp_multilabel_testpreds[:, 0])
h1n1_svm_upsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], svm_upsamp_multilabel_testpreds[:, 0])
h1n1_svm_upsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], svm_upsamp_multilabel_testpreds[:, 0])
h1n1_svm_upsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], svm_upsamp_multilabel_testpreds[:, 0])
h1n1_svm_upsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], svm_upsamp_multilabel_testpreds[:, 0])

In [None]:
# SVM--Test data Evaluation Metrics for seasonal Vaccines
seasonal_svm_upsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], svm_upsamp_multilabel_testpreds[:, 1])
seasonal_svm_upsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], svm_upsamp_multilabel_testpreds[:, 1])
seasonal_svm_upsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], svm_upsamp_multilabel_testpreds[:, 1])
seasonal_svm_upsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], svm_upsamp_multilabel_testpreds[:, 1])
seasonal_svm_upsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], svm_upsamp_multilabel_testpreds[:, 1])

#### Train data--downsampling

In [None]:
# Logreg--Train data Evaluation Metrics for H1N1 Vaccines
h1n1_logreg_downsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], logreg_downsamp_multilabel_trainpreds[:, 0])
h1n1_logreg_downsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], logreg_downsamp_multilabel_trainpreds[:, 0])
h1n1_logreg_downsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], logreg_downsamp_multilabel_trainpreds[:, 0])
h1n1_logreg_downsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 0], logreg_downsamp_multilabel_trainpreds[:, 0])
h1n1_logreg_downsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], logreg_downsamp_multilabel_trainpreds[:, 0])

In [None]:
# Logreg--Train data Evaluation Metrics for seasonal Vaccines
seasonal_logreg_downsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], logreg_downsamp_multilabel_trainpreds[:, 1])
seasonal_logreg_downsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], logreg_downsamp_multilabel_trainpreds[:, 1])
seasonal_logreg_downsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], logreg_downsamp_multilabel_trainpreds[:, 1])
seasonal_logreg_downsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], logreg_downsamp_multilabel_trainpreds[:, 1])
seasonal_logreg_downsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], logreg_downsamp_multilabel_trainpreds[:, 1])

In [None]:
# KNN--Train data Evaluation Metrics for H1N1 Vaccines
h1n1_knn_downsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], knn_downsamp_multilabel_trainpreds[:, 0])
h1n1_knn_downsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], knn_downsamp_multilabel_trainpreds[:, 0])
h1n1_knn_downsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], knn_downsamp_multilabel_trainpreds[:, 0])
h1n1_knn_downsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 0], knn_downsamp_multilabel_trainpreds[:, 0])
h1n1_knn_downsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], knn_downsamp_multilabel_trainpreds[:, 0])

In [None]:
# KNN--Train data Evaluation Metrics for seasonal Vaccines
seasonal_knn_downsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], knn_downsamp_multilabel_trainpreds[:, 1])
seasonal_knn_downsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], knn_downsamp_multilabel_trainpreds[:, 1])
seasonal_knn_downsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], knn_downsamp_multilabel_trainpreds[:, 1])
seasonal_knn_downsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], knn_downsamp_multilabel_trainpreds[:, 1])
seasonal_knn_downsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], knn_downsamp_multilabel_trainpreds[:, 1])

In [None]:
# Random forest--Train data Evaluation Metrics for H1N1 Vaccines
h1n1_rand_forst_downsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], rand_forst_downsamp_multilabel_trainpreds[:, 0])
h1n1_rand_forst_downsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], rand_forst_downsamp_multilabel_trainpreds[:, 0])
h1n1_rand_forst_downsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], rand_forst_downsamp_multilabel_trainpreds[:, 0])
h1n1_rand_forst_downsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 0], rand_forst_downsamp_multilabel_trainpreds[:, 0])
h1n1_rand_forst_downsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], rand_forst_downsamp_multilabel_trainpreds[:, 0])

In [None]:
# Random forest--Train data Evaluation Metrics for seasonal Vaccines
seasonal_rand_forst_downsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], rand_forst_downsamp_multilabel_trainpreds[:, 1])
seasonal_rand_forst_downsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], rand_forst_downsamp_multilabel_trainpreds[:, 1])
seasonal_rand_forst_downsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], rand_forst_downsamp_multilabel_trainpreds[:, 1])
seasonal_rand_forst_downsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], rand_forst_downsamp_multilabel_trainpreds[:, 1])
seasonal_rand_forst_downsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], rand_forst_downsamp_multilabel_trainpreds[:, 1])

In [None]:
# SVM--Train data Evaluation Metrics for H1N1 Vaccines
h1n1_svm_downsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 0], svm_downsamp_multilabel_trainpreds[:, 0])
h1n1_svm_downsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 0], svm_downsamp_multilabel_trainpreds[:, 0])
h1n1_svm_downsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 0], svm_downsamp_multilabel_trainpreds[:, 0])
h1n1_svm_downsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 0], svm_downsamp_multilabel_trainpreds[:, 0])
h1n1_svm_downsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 0], svm_downsamp_multilabel_trainpreds[:, 0])

In [None]:
# SVM--Train data Evaluation Metrics for seasonal Vaccines
seasonal_svm_downsamp_multilabel_train_acc = accuracy_score(y_both_vacc_train[:, 1], svm_downsamp_multilabel_trainpreds[:, 1])
seasonal_svm_downsamp_multilabel_train_recall = recall_score(y_both_vacc_train[:, 1], svm_downsamp_multilabel_trainpreds[:, 1])
seasonal_svm_downsamp_multilabel_train_precision = precision_score(y_both_vacc_train[:, 1], svm_downsamp_multilabel_trainpreds[:, 1])
seasonal_svm_downsamp_multilabel_train_f1 = f1_score(y_both_vacc_train[:, 1], svm_downsamp_multilabel_trainpreds[:, 1])
seasonal_svm_downsamp_multilabel_train_roc = roc_auc_score(y_both_vacc_train[:, 1], svm_downsamp_multilabel_trainpreds[:, 1])

#### Test data--downsampling

In [None]:
# Logreg--Test data Evaluation Metrics for H1N1 Vaccines
h1n1_logreg_downsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], logreg_downsamp_multilabel_testpreds[:, 0])
h1n1_logreg_downsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], logreg_downsamp_multilabel_testpreds[:, 0])
h1n1_logreg_downsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], logreg_downsamp_multilabel_testpreds[:, 0])
h1n1_logreg_downsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], logreg_downsamp_multilabel_testpreds[:, 0])
h1n1_logreg_downsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], logreg_downsamp_multilabel_testpreds[:, 0])

In [None]:
# Logreg--Test data Evaluation Metrics for seasonal Vaccines
seasonal_logreg_downsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], logreg_downsamp_multilabel_testpreds[:, 1])
seasonal_logreg_downsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], logreg_downsamp_multilabel_testpreds[:, 1])
seasonal_logreg_downsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], logreg_downsamp_multilabel_testpreds[:, 1])
seasonal_logreg_downsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], logreg_downsamp_multilabel_testpreds[:, 1])
seasonal_logreg_downsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], logreg_downsamp_multilabel_testpreds[:, 1])

In [None]:
# KNN--Test data Evaluation Metrics for H1N1 Vaccines
h1n1_knn_downsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], knn_downsamp_multilabel_testpreds[:, 0])
h1n1_knn_downsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], knn_downsamp_multilabel_testpreds[:, 0])
h1n1_knn_downsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], knn_downsamp_multilabel_testpreds[:, 0])
h1n1_knn_downsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], knn_downsamp_multilabel_testpreds[:, 0])
h1n1_knn_downsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], knn_downsamp_multilabel_testpreds[:, 0])

In [None]:
# KNN--Test data Evaluation Metrics for seasonal Vaccines
seasonal_knn_downsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], knn_downsamp_multilabel_testpreds[:, 1])
seasonal_knn_downsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], knn_downsamp_multilabel_testpreds[:, 1])
seasonal_knn_downsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], knn_downsamp_multilabel_testpreds[:, 1])
seasonal_knn_downsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], knn_downsamp_multilabel_testpreds[:, 1])
seasonal_knn_downsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], knn_downsamp_multilabel_testpreds[:, 1])

In [None]:
# Random forest--Test data Evaluation Metrics for H1N1 Vaccines
h1n1_rand_forst_downsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], rand_forst_downsamp_multilabel_testpreds[:, 0])
h1n1_rand_forst_downsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], rand_forst_downsamp_multilabel_testpreds[:, 0])
h1n1_rand_forst_downsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], rand_forst_downsamp_multilabel_testpreds[:, 0])
h1n1_rand_forst_downsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], rand_forst_downsamp_multilabel_testpreds[:, 0])
h1n1_rand_forst_downsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], rand_forst_downsamp_multilabel_testpreds[:, 0])

In [None]:
# Random forest--Test data Evaluation Metrics for seasonal Vaccines
seasonal_rand_forst_downsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], rand_forst_downsamp_multilabel_testpreds[:, 1])
seasonal_rand_forst_downsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], rand_forst_downsamp_multilabel_testpreds[:, 1])
seasonal_rand_forst_downsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], rand_forst_downsamp_multilabel_testpreds[:, 1])
seasonal_rand_forst_downsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], rand_forst_downsamp_multilabel_testpreds[:, 1])
seasonal_rand_forst_downsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], rand_forst_downsamp_multilabel_testpreds[:, 1])

In [None]:
# SVM--Test data Evaluation Metrics for H1N1 Vaccines
h1n1_svm_downsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 0], svm_downsamp_multilabel_testpreds[:, 0])
h1n1_svm_downsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 0], svm_downsamp_multilabel_testpreds[:, 0])
h1n1_svm_downsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 0], svm_downsamp_multilabel_testpreds[:, 0])
h1n1_svm_downsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 0], svm_downsamp_multilabel_testpreds[:, 0])
h1n1_svm_downsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 0], svm_downsamp_multilabel_testpreds[:, 0])

In [None]:
# SVM--Test data Evaluation Metrics for seasonal Vaccines
seasonal_svm_downsamp_multilabel_test_acc = accuracy_score(y_both_vacc_test[:, 1], svm_downsamp_multilabel_testpreds[:, 1])
seasonal_svm_downsamp_multilabel_test_recall = recall_score(y_both_vacc_test[:, 1], svm_downsamp_multilabel_testpreds[:, 1])
seasonal_svm_downsamp_multilabel_test_precision = precision_score(y_both_vacc_test[:, 1], svm_downsamp_multilabel_testpreds[:, 1])
seasonal_svm_downsamp_multilabel_test_f1 = f1_score(y_both_vacc_test[:, 1], svm_downsamp_multilabel_testpreds[:, 1])
seasonal_svm_downsamp_multilabel_test_roc = roc_auc_score(y_both_vacc_test[:, 1], svm_downsamp_multilabel_testpreds[:, 1])

## Tracking the model with MLFlow

### H1N1 vaccine output

#### Logistic regression

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'logreg_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_logreg_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_logreg_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_logreg_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_logreg_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_logreg_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_logreg_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_logreg_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_logreg_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_logreg_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_logreg_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'logreg_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_logreg_upsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_logreg_upsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_logreg_upsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_logreg_upsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_logreg_upsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_logreg_upsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_logreg_upsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_logreg_upsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_logreg_upsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_logreg_upsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'logreg_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_logreg_downsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_logreg_downsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_logreg_downsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_logreg_downsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_logreg_downsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_logreg_downsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_logreg_downsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_logreg_downsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_logreg_downsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_logreg_downsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### KNN

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel)
name = 'knn_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_knn_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_knn_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_knn_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_knn_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_knn_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_knn_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_knn_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_knn_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_knn_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_knn_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'knn_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_knn_upsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_knn_upsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_knn_upsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_knn_upsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_knn_upsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_knn_upsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_knn_upsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_knn_upsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_knn_upsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_knn_upsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'knn_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_knn_downsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_knn_downsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_knn_downsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_knn_downsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_knn_downsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_knn_downsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_knn_downsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_knn_downsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_knn_downsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_knn_downsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### Random forest

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel)
name = 'rand_forst_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_rand_forst_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_rand_forst_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_rand_forst_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_rand_forst_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_rand_forst_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_rand_forst_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_rand_forst_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_rand_forst_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_rand_forst_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_rand_forst_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'rand_forst_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_rand_forst_upsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_rand_forst_upsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_rand_forst_upsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_rand_forst_upsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_rand_forst_upsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_rand_forst_upsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_rand_forst_upsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_rand_forst_upsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_rand_forst_upsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_rand_forst_upsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'rand_forst_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_rand_forst_downsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_rand_forst_downsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_rand_forst_downsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_rand_forst_downsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_rand_forst_downsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_rand_forst_downsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_rand_forst_downsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_rand_forst_downsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_rand_forst_downsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_rand_forst_downsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### SVM

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel)
name = 'svm_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_svm_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_svm_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_svm_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_svm_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_svm_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_svm_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_svm_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_svm_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_svm_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_svm_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'svm_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_svm_upsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_svm_upsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_svm_upsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_svm_upsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_svm_upsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_svm_upsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_svm_upsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_svm_upsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_svm_upsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_svm_upsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'logreg_multilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_logreg_downsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_logreg_downsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_logreg_downsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_logreg_downsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_logreg_downsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_logreg_downsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_logreg_downsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_logreg_downsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_logreg_downsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_logreg_downsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

### Seasonal vaccine output

#### Logistic regression

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel)
name = 'logreg_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_logreg_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_logreg_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_logreg_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_logreg_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_logreg_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", seasonal_logreg_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_logreg_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_logreg_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_logreg_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_logreg_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'logreg_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_logreg_upsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_logreg_upsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_logreg_upsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_logreg_upsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_logreg_upsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", seasonal_logreg_upsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_logreg_upsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_logreg_upsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_logreg_upsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_logreg_upsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'logreg_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_logreg_downsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_logreg_downsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_logreg_downsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_logreg_downsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_logreg_downsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", seasonal_logreg_downsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_logreg_downsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_logreg_downsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_logreg_downsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_logreg_downsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### KNN

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel)
name = 'knn_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_knn_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_knn_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_knn_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_knn_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_knn_multilabel_train_recall)
mlflow.log_metric("test -" +m "recall", seasonal_knn_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_knn_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_knn_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_knn_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_knn_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'knn_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_knn_upsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_knn_upsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_knn_upsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_knn_upsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_knn_upsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", seasonal_knn_upsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_knn_upsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_knn_upsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_knn_upsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_knn_upsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'knn_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_knn_downsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_knn_downsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_knn_downsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_knn_downsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_knn_downsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", seasonal_knn_downsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_knn_downsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_knn_downsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_knn_downsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_knn_downsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### Random forest

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel)
name = 'rand_forst_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_rand_forst_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_rand_forst_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_rand_forst_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_rand_forst_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_rand_forst_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", seasonal_rand_forst_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_rand_forst_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_rand_forst_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_rand_forst_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_rand_forst_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'rand_forst_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_rand_forst_upsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_rand_forst_upsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_rand_forst_upsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_rand_forst_upsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_rand_forst_upsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", seasonal_rand_forst_upsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_rand_forst_upsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_rand_forst_upsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_rand_forst_upsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_rand_forst_upsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'rand_forst_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_rand_forst_downsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_rand_forst_downsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_rand_forst_downsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_rand_forst_downsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_rand_forst_downsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", seasonal_rand_forst_downsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_rand_forst_downsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_rand_forst_downsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_rand_forst_downsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_rand_forst_downsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### SVM

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel)
name = 'svm_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_svm_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_svm_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_svm_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_svm_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_svm_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", seasonal_svm_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_svm_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_svm_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_svm_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_svm_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'svm_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_svm_upsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_svm_upsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_svm_upsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_svm_upsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_svm_upsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", seasonal_svm_upsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_svm_upsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_svm_upsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_svm_upsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_svm_upsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_multilabel) #this needs to be adjusted for each experiment
name = 'svm_multilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling H1N1",
    "Hyperparameters": "None"
  }

mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seasonal_svm_downsamp_multilabel_train_roc)
mlflow.log_metric("test -" + "ROC", seasonal_svm_downsamp_multilabel_test_roc)
mlflow.log_metric("train -" + "accuracy", seasonal_svm_downsamp_multilabel_train_acc)
mlflow.log_metric("test -" + "accuracy", seasonal_svm_downsamp_multilabel_test_acc)
mlflow.log_metric("train -" + "recall", seasonal_svm_downsamp_multilabel_train_recall)
mlflow.log_metric("test -" + "recall", seasonal_svm_downsamp_multilabel_test_recall)
mlflow.log_metric("train -" + "precision", seasonal_svm_downsamp_multilabel_train_precision)
mlflow.log_metric("test -" + "precision", seasonal_svm_downsamp_multilabel_test_precision)
mlflow.log_metric("train -" + "f1", seasonal_svm_downsamp_multilabel_train_f1)
mlflow.log_metric("test -" + "f1", seasonal_svm_downsamp_multilabel_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

# TheFluShot_H1N1: Single Label Modelling, output H1N1 vaccine

## Single Label Modelling, output H1N1 vaccine -> Seasonal Flu Vaccine not in features

The cat_features_no_vacc and X_no_vacc variables and the preprocessor remain the same from the multilabel modelling:

Set up the target variable:

In [None]:
y_h1n1_vacc = df.h1n1_vaccine

In [None]:
#y_h1n1_vacc = y_h1n1_vacc.to_numpy()
#y_h1n1_vacc = y_h1n1_vacc.squeeze()
y_h1n1_vacc

Performing test-train split:

In [None]:
X_no_vacc_train, X_no_vacc_test, y_h1n1_vacc_train, y_h1n1_vacc_test = train_test_split(X_no_vacc, y_h1n1_vacc, stratify = y_h1n1_vacc, test_size=0.2, random_state=RSEED)

In [None]:
print('X_no_vacc_train shape:', X_no_vacc_train.shape)
print('X_no_vacc_test shape:', X_no_vacc_test.shape)
print('y_h1n1_vacc_train:', y_h1n1_vacc_train.shape)
print('y_h1n1_vacc_test:', y_h1n1_vacc_test.shape)

Setting up the pipeline for each model:

In [None]:
# for logreg
logreg_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", logreg),
])

# for KNN
knn_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", knn),
])

# for Random Forest

rand_forst_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", rand_forst),
])

#for SVM
svm_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", svm),
])

### Data Balancing

https://towardsdatascience.com/methods-for-dealing-with-imbalanced-data-5b761be45a18

Since the H1N1 label is unbalanced, three approaches will be tried to balance it:

1. Oversampling of the minority class (h1n1_vaccine == 1)
2. Undersampling of the majority class (h1n1_vaccine == 0)
3. Creation of synthetic samples using SMOTE

**Creating concatenated training dataframe and separating into minority and majority class  (for data balancing)**

In [None]:
# concatenating our train data back together

concat_train_df = pd.concat([X_no_vacc_train, y_h1n1_vacc_train], axis = 1)

# separating into minority and majority classes

# majority class
no_h1n1_vacc = concat_train_df[concat_train_df.h1n1_vaccine==0]

# minority class
yes_h1n1_vacc = concat_train_df[concat_train_df.h1n1_vaccine==1]


**1. Oversampling of the minority class (upsampling)**

In [None]:
#upsample minority class
yes_h1n1_vacc_upsampled = resample(yes_h1n1_vacc,
                                   replace = True,
                                   n_samples = len(no_h1n1_vacc),
                                   random_state = RSEED)

In [None]:
# combine majority and upsampled minority

upsampled = pd.concat([no_h1n1_vacc, yes_h1n1_vacc_upsampled])

In [None]:
# checking new class counts
upsampled.h1n1_vaccine.value_counts()

In [None]:
# relabelling the upsampled data as train dataset

y_h1n1_vacc_upsamp_train = upsampled.h1n1_vaccine

X_no_vacc_upsamp_train =upsampled.drop('h1n1_vaccine', axis = 1)

In [None]:
X_no_vacc_upsamp_train.info()

**2. Undersampling of majority class (downsampling)**

In [None]:
# downsample minority class
no_h1n1_vacc_downsampled = resample(no_h1n1_vacc,
                                   replace = False,
                                   n_samples = len(yes_h1n1_vacc),
                                   random_state = RSEED)

In [None]:
# combine minority and downsampled majority

downsampled = pd.concat([no_h1n1_vacc_downsampled, yes_h1n1_vacc])

In [None]:
# checking new class counts
downsampled.h1n1_vaccine.value_counts()

In [None]:
# relabelling the downsampled data as train dataset

y_h1n1_vacc_downsamp_train = downsampled.h1n1_vaccine

X_no_vacc_downsamp_train =downsampled.drop('h1n1_vaccine', axis = 1)

**3. Generate synthetic samples (SMOTE)**

SMOTE doesn't work with missing values and was therefore not trialled.
If using SMOTE in the future--note that it doesn't work with string data and therefore some columns need to be encoded to numbers

### Model training and predictions (run predictions immediately after training the model!)

No resampling--fitting model:

In [None]:
# for logreg
logreg_unilabel_no_vacc = logreg_unilabel_pipeline.fit(X_no_vacc_train, y_h1n1_vacc_train)

# for KNN
knn_unilabel_no_vacc = knn_unilabel_pipeline.fit(X_no_vacc_train, y_h1n1_vacc_train)

# for Random Forest
rand_forst_unilabel_no_vacc = rand_forst_unilabel_pipeline.fit(X_no_vacc_train, y_h1n1_vacc_train)

#for SVM
svm_unilabel_no_vacc = svm_unilabel_pipeline.fit(X_no_vacc_train, y_h1n1_vacc_train)

No resampling--making predictions:

In [None]:
# for logreg
logreg_unilabel_no_vacc_trainpreds = logreg_unilabel_no_vacc.predict(X_no_vacc_train)
logreg_unilabel_no_vacc_testpreds = logreg_unilabel_no_vacc.predict(X_no_vacc_test)

# for KNN
knn_unilabel_no_vacc_trainpreds = knn_unilabel_no_vacc.predict(X_no_vacc_train)
knn_unilabel_no_vacc_testpreds = knn_unilabel_no_vacc.predict(X_no_vacc_test)

# for Random Forest
rand_forst_unilabel_no_vacc_trainpreds = rand_forst_unilabel_no_vacc.predict(X_no_vacc_train)
rand_forst_unilabel_no_vacc_testpreds = rand_forst_unilabel_no_vacc.predict(X_no_vacc_test)

# for SVM
svm_unilabel_no_vacc_trainpreds = svm_unilabel_no_vacc.predict(X_no_vacc_train)
svm_unilabel_no_vacc_testpreds = svm_unilabel_no_vacc.predict(X_no_vacc_test)

Upsampled data--fitting model: 

In [None]:
# for logreg
logreg_unilabel_no_vacc_upsamp = logreg_unilabel_pipeline.fit(X_no_vacc_upsamp_train, y_h1n1_vacc_upsamp_train)

# for KNN
knn_unilabel_no_vacc_upsamp = knn_unilabel_pipeline.fit(X_no_vacc_upsamp_train, y_h1n1_vacc_upsamp_train)

# for Random Forest
rand_forst_unilabel_no_vacc_upsamp = rand_forst_unilabel_pipeline.fit(X_no_vacc_upsamp_train, y_h1n1_vacc_upsamp_train)

#for SVM
svm_unilabel_no_vacc_upsamp = svm_unilabel_pipeline.fit(X_no_vacc_upsamp_train, y_h1n1_vacc_upsamp_train)

Upsampled data--making predictions:

In [None]:
# for logreg
logreg_unilabel_no_vacc_upsamp_trainpreds = logreg_unilabel_no_vacc_upsamp.predict(X_no_vacc_train)
logreg_unilabel_no_vacc_upsamp_testpreds = logreg_unilabel_no_vacc_upsamp.predict(X_no_vacc_test)

# for KNN
knn_unilabel_no_vacc_upsamp_trainpreds = knn_unilabel_no_vacc_upsamp.predict(X_no_vacc_train)
knn_unilabel_no_vacc_upsamp_testpreds = knn_unilabel_no_vacc_upsamp.predict(X_no_vacc_test)

# for Random Forest
rand_forst_unilabel_no_vacc_upsamp_trainpreds = rand_forst_unilabel_no_vacc_upsamp.predict(X_no_vacc_train)
rand_forst_unilabel_no_vacc_upsamp_testpreds = rand_forst_unilabel_no_vacc_upsamp.predict(X_no_vacc_test)

# for SVM
svm_unilabel_no_vacc_upsamp_trainpreds = svm_unilabel_no_vacc_upsamp.predict(X_no_vacc_train)
svm_unilabel_no_vacc_upsamp_testpreds = svm_unilabel_no_vacc_upsamp.predict(X_no_vacc_test)

Downsampled data--fitting the model:

In [None]:
# for logreg
logreg_unilabel_no_vacc_downsamp = logreg_unilabel_pipeline.fit(X_no_vacc_downsamp_train, y_h1n1_vacc_downsamp_train)

# for KNN
knn_unilabel_no_vacc_downsamp = knn_unilabel_pipeline.fit(X_no_vacc_downsamp_train, y_h1n1_vacc_downsamp_train)

# for Random Forest
rand_forst_unilabel_no_vacc_downsamp = rand_forst_unilabel_pipeline.fit(X_no_vacc_downsamp_train, y_h1n1_vacc_downsamp_train)

#for SVM
svm_unilabel_no_vacc_downsamp = svm_unilabel_pipeline.fit(X_no_vacc_downsamp_train, y_h1n1_vacc_downsamp_train)

Downsampled data--making predictions:

In [None]:
# for logreg
logreg_unilabel_no_vacc_downsamp_trainpreds = logreg_unilabel_no_vacc_downsamp.predict(X_no_vacc_train)
logreg_unilabel_no_vacc_downsamp_testpreds = logreg_unilabel_no_vacc_downsamp.predict(X_no_vacc_test)

# for KNN
knn_unilabel_no_vacc_downsamp_trainpreds = knn_unilabel_no_vacc_downsamp.predict(X_no_vacc_train)
knn_unilabel_no_vacc_downsamp_testpreds = knn_unilabel_no_vacc_downsamp.predict(X_no_vacc_test)

# for Random Forest
rand_forst_unilabel_no_vacc_downsamp_trainpreds = rand_forst_unilabel_no_vacc_downsamp.predict(X_no_vacc_train)
rand_forst_unilabel_no_vacc_downsamp_testpreds = rand_forst_unilabel_no_vacc_downsamp.predict(X_no_vacc_test)

# for SVM
svm_unilabel_no_vacc_downsamp_trainpreds = svm_unilabel_no_vacc_downsamp.predict(X_no_vacc_train)
svm_unilabel_no_vacc_downsamp_testpreds = svm_unilabel_no_vacc_downsamp.predict(X_no_vacc_test)

In [None]:
# Figure out later what this does and if we want to use it


#y_train_predicted = cross_val_predict(full_pipeline, X_train, y_train, cv=5)

### Model evaluation

#### Train data--no resampling

In [None]:
# Logreg--Train data evaluation Metrics
h1n1_logreg_unilabel_no_vacc_train_acc = accuracy_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_trainpreds)
h1n1_logreg_unilabel_no_vacc_train_recall = recall_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_trainpreds)
h1n1_logreg_unilabel_no_vacc_train_precision = precision_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_trainpreds)
h1n1_logreg_unilabel_no_vacc_train_f1 = f1_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_trainpreds)
h1n1_logreg_unilabel_no_vacc_train_roc = roc_auc_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_trainpreds)))

In [None]:
# KNN--Train data evaluation Metrics
h1n1_knn_unilabel_no_vacc_train_acc = accuracy_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_trainpreds)
h1n1_knn_unilabel_no_vacc_train_recall = recall_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_trainpreds)
h1n1_knn_unilabel_no_vacc_train_precision = precision_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_trainpreds)
h1n1_knn_unilabel_no_vacc_train_f1 = f1_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_trainpreds)
h1n1_knn_unilabel_no_vacc_train_roc = roc_auc_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_trainpreds)))

In [None]:
# Random Forest--Train data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_no_vacc_train_acc = accuracy_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_train_recall = recall_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_train_precision = precision_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_train_f1 = f1_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_train_roc = roc_auc_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)))

In [None]:
# SVM--Train data evaluation Metrics
h1n1_svm_unilabel_no_vacc_train_acc = accuracy_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_trainpreds)
h1n1_svm_unilabel_no_vacc_train_recall = recall_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_trainpreds)
h1n1_svm_unilabel_no_vacc_train_precision = precision_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_trainpreds)
h1n1_svm_unilabel_no_vacc_train_f1 = f1_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_trainpreds)
h1n1_svm_unilabel_no_vacc_train_roc = roc_auc_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_trainpreds)))

#### Test data--no resampling

In [None]:
# Logreg--Test data evaluation Metrics
h1n1_logreg_unilabel_no_vacc_test_acc = accuracy_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_testpreds)
h1n1_logreg_unilabel_no_vacc_test_recall = recall_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_testpreds)
h1n1_logreg_unilabel_no_vacc_test_precision = precision_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_testpreds)
h1n1_logreg_unilabel_no_vacc_test_f1 = f1_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_testpreds)
h1n1_logreg_unilabel_no_vacc_test_roc = roc_auc_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_testpreds)))

In [None]:
# KNN--Test data evaluation Metrics
h1n1_knn_unilabel_no_vacc_test_acc = accuracy_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_testpreds)
h1n1_knn_unilabel_no_vacc_test_recall = recall_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_testpreds)
h1n1_knn_unilabel_no_vacc_test_precision = precision_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_testpreds)
h1n1_knn_unilabel_no_vacc_test_f1 = f1_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_testpreds)
h1n1_knn_unilabel_no_vacc_test_roc = roc_auc_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_testpreds)))

In [None]:
# Random Forest--Test data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_no_vacc_test_acc = accuracy_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_testpreds)
h1n1_rand_forst_unilabel_no_vacc_test_recall = recall_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_testpreds)
h1n1_rand_forst_unilabel_no_vacc_test_precision = precision_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_testpreds)
h1n1_rand_forst_unilabel_no_vacc_test_f1 = f1_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_testpreds)
h1n1_rand_forst_unilabel_no_vacc_test_roc = roc_auc_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_testpreds)))

In [None]:
# SVM--Test data evaluation Metrics
h1n1_svm_unilabel_no_vacc_test_acc = accuracy_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_testpreds)
h1n1_svm_unilabel_no_vacc_test_recall = recall_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_testpreds)
h1n1_svm_unilabel_no_vacc_test_precision = precision_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_testpreds)
h1n1_svm_unilabel_no_vacc_test_f1 = f1_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_testpreds)
h1n1_svm_unilabel_no_vacc_test_roc = roc_auc_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_testpreds)))

#### Train data--upsampling

In [None]:
# Logreg--Train data evaluation Metrics
h1n1_logreg_unilabel_no_vacc_upsamp_train_acc = accuracy_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_upsamp_trainpreds)
h1n1_logreg_unilabel_no_vacc_upsamp_train_recall = recall_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_upsamp_trainpreds)
h1n1_logreg_unilabel_no_vacc_upsamp_train_precision = precision_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_upsamp_trainpreds)
h1n1_logreg_unilabel_no_vacc_upsamp_train_f1 = f1_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_upsamp_trainpreds)
h1n1_logreg_unilabel_no_vacc_upsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_upsamp_trainpreds)

In [None]:
# KNN--Train data evaluation Metrics
h1n1_knn_unilabel_no_vacc_upsamp_train_acc = accuracy_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_upsamp_trainpreds)
h1n1_knn_unilabel_no_vacc_upsamp_train_recall = recall_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_upsamp_trainpreds)
h1n1_knn_unilabel_no_vacc_upsamp_train_precision = precision_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_upsamp_trainpreds)
h1n1_knn_unilabel_no_vacc_upsamp_train_f1 = f1_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_upsamp_trainpreds)
h1n1_knn_unilabel_no_vacc_upsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_upsamp_trainpreds)

In [None]:
# Random Forest--Train data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_no_vacc_upsamp_train_acc = accuracy_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_upsamp_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_upsamp_train_recall = recall_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_upsamp_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_upsamp_train_precision = precision_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_upsamp_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_upsamp_train_f1 = f1_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_upsamp_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_upsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_upsamp_trainpreds)

In [None]:
# SVM--Train data evaluation Metrics
h1n1_svm_unilabel_no_vacc_upsamp_train_acc = accuracy_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_upsamp_trainpreds)
h1n1_svm_unilabel_no_vacc_upsamp_train_recall = recall_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_upsamp_trainpreds)
h1n1_svm_unilabel_no_vacc_upsamp_train_precision = precision_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_upsamp_trainpreds)
h1n1_svm_unilabel_no_vacc_upsamp_train_f1 = f1_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_upsamp_trainpreds)
h1n1_svm_unilabel_no_vacc_upsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_upsamp_trainpreds)

#### Test data--upsampling

In [None]:
# Logreg--Test data evaluation Metrics
h1n1_logreg_unilabel_no_vacc_upsamp_test_acc = accuracy_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_upsamp_testpreds)
h1n1_logreg_unilabel_no_vacc_upsamp_test_recall = recall_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_upsamp_testpreds)
h1n1_logreg_unilabel_no_vacc_upsamp_test_precision = precision_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_upsamp_testpreds)
h1n1_logreg_unilabel_no_vacc_upsamp_test_f1 = f1_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_upsamp_testpreds)
h1n1_logreg_unilabel_no_vacc_upsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_upsamp_testpreds)

In [None]:
# KNN--Test data evaluation Metrics
h1n1_knn_unilabel_no_vacc_upsamp_test_acc = accuracy_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_upsamp_testpreds)
h1n1_knn_unilabel_no_vacc_upsamp_test_recall = recall_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_upsamp_testpreds)
h1n1_knn_unilabel_no_vacc_upsamp_test_precision = precision_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_upsamp_testpreds)
h1n1_knn_unilabel_no_vacc_upsamp_test_f1 = f1_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_upsamp_testpreds)
h1n1_knn_unilabel_no_vacc_upsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_upsamp_testpreds)

In [None]:
# Random Forest--Test data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_no_vacc_upsamp_test_acc = accuracy_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_upsamp_testpreds)
h1n1_rand_forst_unilabel_no_vacc_upsamp_test_recall = recall_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_upsamp_testpreds)
h1n1_rand_forst_unilabel_no_vacc_upsamp_test_precision = precision_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_upsamp_testpreds)
h1n1_rand_forst_unilabel_no_vacc_upsamp_test_f1 = f1_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_upsamp_testpreds)
h1n1_rand_forst_unilabel_no_vacc_upsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_upsamp_testpreds)

In [None]:
# SVM--Train data evaluation Metrics
h1n1_svm_unilabel_no_vacc_upsamp_test_acc = accuracy_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_upsamp_testpreds)
h1n1_svm_unilabel_no_vacc_upsamp_test_recall = recall_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_upsamp_testpreds)
h1n1_svm_unilabel_no_vacc_upsamp_test_precision = precision_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_upsamp_testpreds)
h1n1_svm_unilabel_no_vacc_upsamp_test_f1 = f1_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_upsamp_testpreds)
h1n1_svm_unilabel_no_vacc_upsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_upsamp_testpreds)

**Train data--downsampling**

In [None]:
# Logreg--Train data evaluation Metrics
h1n1_logreg_unilabel_no_vacc_downsamp_train_acc = accuracy_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_downsamp_trainpreds)
h1n1_logreg_unilabel_no_vacc_downsamp_train_recall = recall_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_downsamp_trainpreds)
h1n1_logreg_unilabel_no_vacc_downsamp_train_precision = precision_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_downsamp_trainpreds)
h1n1_logreg_unilabel_no_vacc_downsamp_train_f1 = f1_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_downsamp_trainpreds)
h1n1_logreg_unilabel_no_vacc_downsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, logreg_unilabel_no_vacc_downsamp_trainpreds)

In [None]:
# KNN--Train data evaluation Metrics
h1n1_knn_unilabel_no_vacc_downsamp_train_acc = accuracy_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_downsamp_trainpreds)
h1n1_knn_unilabel_no_vacc_downsamp_train_recall = recall_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_downsamp_trainpreds)
h1n1_knn_unilabel_no_vacc_downsamp_train_precision = precision_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_downsamp_trainpreds)
h1n1_knn_unilabel_no_vacc_downsamp_train_f1 = f1_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_downsamp_trainpreds)
h1n1_knn_unilabel_no_vacc_downsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, knn_unilabel_no_vacc_downsamp_trainpreds)

In [None]:
# Random Forest--Train data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_no_vacc_downsamp_train_acc = accuracy_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_downsamp_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_downsamp_train_recall = recall_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_downsamp_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_downsamp_train_precision = precision_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_downsamp_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_downsamp_train_f1 = f1_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_downsamp_trainpreds)
h1n1_rand_forst_unilabel_no_vacc_downsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, rand_forst_unilabel_no_vacc_downsamp_trainpreds)

In [None]:
# SVM--Train data evaluation Metrics
h1n1_svm_unilabel_no_vacc_downsamp_train_acc = accuracy_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_downsamp_trainpreds)
h1n1_svm_unilabel_no_vacc_downsamp_train_recall = recall_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_downsamp_trainpreds)
h1n1_svm_unilabel_no_vacc_downsamp_train_precision = precision_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_downsamp_trainpreds)
h1n1_svm_unilabel_no_vacc_downsamp_train_f1 = f1_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_downsamp_trainpreds)
h1n1_svm_unilabel_no_vacc_downsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, svm_unilabel_no_vacc_downsamp_trainpreds)

**Test data--downsampling**

In [None]:
# Logreg--Test data evaluation Metrics
h1n1_logreg_unilabel_no_vacc_downsamp_test_acc = accuracy_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_downsamp_testpreds)
h1n1_logreg_unilabel_no_vacc_downsamp_test_recall = recall_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_downsamp_testpreds)
h1n1_logreg_unilabel_no_vacc_downsamp_test_precision = precision_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_downsamp_testpreds)
h1n1_logreg_unilabel_no_vacc_downsamp_test_f1 = f1_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_downsamp_testpreds)
h1n1_logreg_unilabel_no_vacc_downsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, logreg_unilabel_no_vacc_downsamp_testpreds)

In [None]:
# KNN--Test data evaluation Metrics
h1n1_knn_unilabel_no_vacc_downsamp_test_acc = accuracy_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_downsamp_testpreds)
h1n1_knn_unilabel_no_vacc_downsamp_test_recall = recall_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_downsamp_testpreds)
h1n1_knn_unilabel_no_vacc_downsamp_test_precision = precision_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_downsamp_testpreds)
h1n1_knn_unilabel_no_vacc_downsamp_test_f1 = f1_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_downsamp_testpreds)
h1n1_knn_unilabel_no_vacc_downsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, knn_unilabel_no_vacc_downsamp_testpreds)

In [None]:
# Random Forest--Test data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_no_vacc_downsamp_test_acc = accuracy_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_downsamp_testpreds)
h1n1_rand_forst_unilabel_no_vacc_downsamp_test_recall = recall_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_downsamp_testpreds)
h1n1_rand_forst_unilabel_no_vacc_downsamp_test_precision = precision_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_downsamp_testpreds)
h1n1_rand_forst_unilabel_no_vacc_downsamp_test_f1 = f1_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_downsamp_testpreds)
h1n1_rand_forst_unilabel_no_vacc_downsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, rand_forst_unilabel_no_vacc_downsamp_testpreds)

In [None]:
# SVM--Train data evaluation Metrics
h1n1_svm_unilabel_no_vacc_downsamp_test_acc = accuracy_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_downsamp_testpreds)
h1n1_svm_unilabel_no_vacc_downsamp_test_recall = recall_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_downsamp_testpreds)
h1n1_svm_unilabel_no_vacc_downsamp_test_precision = precision_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_downsamp_testpreds)
h1n1_svm_unilabel_no_vacc_downsamp_test_f1 = f1_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_downsamp_testpreds)
h1n1_svm_unilabel_no_vacc_downsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, svm_unilabel_no_vacc_downsamp_testpreds)

## Tracking the model with MLFlow

### H1N1 vaccine output

#### Logistic regression

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1) #this needs to be adjusted for each experiment
name = 'logreg_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_logreg_unilabel_no_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_logreg_unilabel_no_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_logreg_unilabel_no_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_logreg_unilabel_no_vacc_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_logreg_unilabel_no_vacc_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_logreg_unilabel_no_vacc_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_logreg_unilabel_no_vacc_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_logreg_unilabel_no_vacc_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_logreg_unilabel_no_vacc_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_logreg_unilabel_no_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1) #this needs to be adjusted for each experiment
name = 'logreg_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_logreg_unilabel_no_vacc_upsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_logreg_unilabel_no_vacc_upsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_logreg_unilabel_no_vacc_upsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_logreg_unilabel_no_vacc_upsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_logreg_unilabel_no_vacc_upsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_logreg_unilabel_no_vacc_upsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_logreg_unilabel_no_vacc_upsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_logreg_unilabel_no_vacc_upsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_logreg_unilabel_no_vacc_upsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_logreg_unilabel_no_vacc_upsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1) #this needs to be adjusted for each experiment
name = 'logreg_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_logreg_unilabel_no_vacc_downsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_logreg_unilabel_no_vacc_downsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_logreg_unilabel_no_vacc_downsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_logreg_unilabel_no_vacc_downsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_logreg_unilabel_no_vacc_downsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_logreg_unilabel_no_vacc_downsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_logreg_unilabel_no_vacc_downsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_logreg_unilabel_no_vacc_downsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_logreg_unilabel_no_vacc_downsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_logreg_unilabel_no_vacc_downsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### KNN

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'knn_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_knn_unilabel_no_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_knn_unilabel_no_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_knn_unilabel_no_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_knn_unilabel_no_vacc_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_knn_unilabel_no_vacc_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_knn_unilabel_no_vacc_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_knn_unilabel_no_vacc_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_knn_unilabel_no_vacc_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_knn_unilabel_no_vacc_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_knn_unilabel_no_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'knn_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_knn_unilabel_no_vacc_upsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_knn_unilabel_no_vacc_upsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_knn_unilabel_no_vacc_upsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_knn_unilabel_no_vacc_upsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_knn_unilabel_no_vacc_upsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_knn_unilabel_no_vacc_upsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_knn_unilabel_no_vacc_upsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_knn_unilabel_no_vacc_upsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_knn_unilabel_no_vacc_upsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_knn_unilabel_no_vacc_upsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'knn_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_knn_unilabel_no_vacc_downsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_knn_unilabel_no_vacc_downsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_knn_unilabel_no_vacc_downsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_knn_unilabel_no_vacc_downsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_knn_unilabel_no_vacc_downsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_knn_unilabel_no_vacc_downsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_knn_unilabel_no_vacc_downsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_knn_unilabel_no_vacc_downsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_knn_unilabel_no_vacc_downsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_knn_unilabel_no_vacc_downsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### Random forest

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'rand_forst_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_rand_forst_unilabel_no_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_rand_forst_unilabel_no_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_rand_forst_unilabel_no_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_rand_forst_unilabel_no_vacc_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_rand_forst_unilabel_no_vacc_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_rand_forst_unilabel_no_vacc_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_rand_forst_unilabel_no_vacc_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_rand_forst_unilabel_no_vacc_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_rand_forst_unilabel_no_vacc_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_rand_forst_unilabel_no_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'rand_forst_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_rand_forst_unilabel_no_vacc_upsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_rand_forst_unilabel_no_vacc_upsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_rand_forst_unilabel_no_vacc_upsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_rand_forst_unilabel_no_vacc_upsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_rand_forst_unilabel_no_vacc_upsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_rand_forst_unilabel_no_vacc_upsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_rand_forst_unilabel_no_vacc_upsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_rand_forst_unilabel_no_vacc_upsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_rand_forst_unilabel_no_vacc_upsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_rand_forst_unilabel_no_vacc_upsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'rand_forst_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_rand_forst_unilabel_no_vacc_downsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_rand_forst_unilabel_no_vacc_downsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_rand_forst_unilabel_no_vacc_downsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_rand_forst_unilabel_no_vacc_downsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_rand_forst_unilabel_no_vacc_downsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_rand_forst_unilabel_no_vacc_downsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_rand_forst_unilabel_no_vacc_downsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_rand_forst_unilabel_no_vacc_downsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_rand_forst_unilabel_no_vacc_downsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_rand_forst_unilabel_no_vacc_downsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### SVM

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'svm_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_svm_unilabel_no_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_svm_unilabel_no_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_svm_unilabel_no_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_svm_unilabel_no_vacc_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_svm_unilabel_no_vacc_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_svm_unilabel_no_vacc_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_svm_unilabel_no_vacc_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_svm_unilabel_no_vacc_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_svm_unilabel_no_vacc_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_svm_unilabel_no_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'svm_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_svm_unilabel_no_vacc_upsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_svm_unilabel_no_vacc_upsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_svm_unilabel_no_vacc_upsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_svm_unilabel_no_vacc_upsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_svm_unilabel_no_vacc_upsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_svm_unilabel_no_vacc_upsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_svm_unilabel_no_vacc_upsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_svm_unilabel_no_vacc_upsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_svm_unilabel_no_vacc_upsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_svm_unilabel_no_vacc_upsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'svm_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_svm_unilabel_no_vacc_downsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_svm_unilabel_no_vacc_downsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_svm_unilabel_no_vacc_downsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_svm_unilabel_no_vacc_downsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_svm_unilabel_no_vacc_downsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_svm_unilabel_no_vacc_downsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_svm_unilabel_no_vacc_downsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_svm_unilabel_no_vacc_downsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_svm_unilabel_no_vacc_downsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_svm_unilabel_no_vacc_downsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

## Single Label Modelling, output H1N1 vaccine -> Seasonal Flu Vaccine is in features

The y_h1n1_vacc remains the same from the previous model; the X feature and cat_features (for the preprocessor) need to be adjusted:

In [None]:
cat_features_seas_vacc = cat_features.copy()

In [None]:
cat_features_seas_vacc.remove('h1n1_vaccine')

In [None]:
#NB: dropping the 'h1n1_vaccine' and 'seasonal_vaccine' columns
X_seas_vacc = df.drop(columns=['h1n1_vaccine'])

In [None]:
X_seas_vacc.columns

Performing test-train split (the same data can be used for each model in multilabelling):

In [None]:
X_seas_vacc_train, X_seas_vacc_test, y_h1n1_vacc_train, y_h1n1_vacc_test = train_test_split(X_seas_vacc, y_h1n1_vacc, stratify = y_h1n1_vacc, test_size=0.2, random_state=RSEED)

In [None]:
print('X_seas_vacc_train shape:', X_seas_vacc_train.shape)
print('X_seas_vacc_test shape:', X_seas_vacc_test.shape)
print('y_h1n1_vacc_train:', y_h1n1_vacc_train.shape)
print('y_h1n1_vacc_test:', y_h1n1_vacc_test.shape)

Preprocessor is adjusted:

In [None]:
preprocessor_seas_vacc = ColumnTransformer([
    ('cat', cat_pipeline, cat_features_seas_vacc)
])

Pipeline is adjusted:

In [None]:
# for logreg
logreg_seas_vacc_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor_seas_vacc),
    ("estimators", logreg),
])

# for KNN
knn_seas_vacc_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor_seas_vacc),
    ("estimators", knn),
])

# for Random Forest

rand_forst_seas_vacc_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor_seas_vacc),
    ("estimators", rand_forst),
])

#for SVM
svm_seas_vacc_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor_seas_vacc),
    ("estimators", svm),
])

**Creating concatenated training dataframe and separating into minority and majority class (for data balancing)**

In [None]:
# concatenating our train data back together

concat_train_df = pd.concat([X_seas_vacc_train, y_h1n1_vacc_train], axis = 1)

# separating into minority and majority classes

# majority class
no_h1n1_vacc = concat_train_df[concat_train_df.h1n1_vaccine==0]

# minority class
yes_h1n1_vacc = concat_train_df[concat_train_df.h1n1_vaccine==1]


**1. Oversampling of the minority class (upsampling)**

In [None]:
#upsample minority class
yes_h1n1_vacc_upsampled = resample(yes_h1n1_vacc,
                                   replace = True,
                                   n_samples = len(no_h1n1_vacc),
                                   random_state = RSEED)

In [None]:
# combine majority and upsampled minority

upsampled = pd.concat([no_h1n1_vacc, yes_h1n1_vacc_upsampled])

In [None]:
# checking new class counts
upsampled.h1n1_vaccine.value_counts()

In [None]:
# relabelling the upsampled data as train dataset

y_h1n1_vacc_upsamp_train = upsampled.h1n1_vaccine

X_seas_vacc_upsamp_train =upsampled.drop('h1n1_vaccine', axis = 1)

In [None]:
X_seas_vacc_upsamp_train.info()

**2. Undersampling of majority class (downsampling)**

In [None]:
# downsample minority class
no_h1n1_vacc_downsampled = resample(no_h1n1_vacc,
                                   replace = False,
                                   n_samples = len(yes_h1n1_vacc),
                                   random_state = RSEED)

In [None]:
# combine minority and downsampled majority

downsampled = pd.concat([no_h1n1_vacc_downsampled, yes_h1n1_vacc])

In [None]:
# checking new class counts
downsampled.h1n1_vaccine.value_counts()

In [None]:
# relabelling the downsampled data as train dataset

y_h1n1_vacc_downsamp_train = downsampled.h1n1_vaccine

X_seas_vacc_downsamp_train =downsampled.drop('h1n1_vaccine', axis = 1)

### Model training and predictions

No resampling--fitting model:

In [None]:
# for logreg
logreg_unilabel_seas_vacc = logreg_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_train, y_h1n1_vacc_train)

# for KNN
knn_unilabel_seas_vacc = knn_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_train, y_h1n1_vacc_train)

# for Random Forest
rand_forst_unilabel_seas_vacc = rand_forst_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_train, y_h1n1_vacc_train)

#for SVM
svm_unilabel_seas_vacc = svm_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_train, y_h1n1_vacc_train)

No resampling--making predictions:

In [None]:
# for logreg
logreg_unilabel_seas_vacc_trainpreds = logreg_unilabel_seas_vacc.predict(X_seas_vacc_train)
logreg_unilabel_seas_vacc_testpreds = logreg_unilabel_seas_vacc.predict(X_seas_vacc_test)

# for KNN
knn_unilabel_seas_vacc_trainpreds = knn_unilabel_seas_vacc.predict(X_seas_vacc_train)
knn_unilabel_seas_vacc_testpreds = knn_unilabel_seas_vacc.predict(X_seas_vacc_test)

# for Random Forest
rand_forst_unilabel_seas_vacc_trainpreds = rand_forst_unilabel_seas_vacc.predict(X_seas_vacc_train)
rand_forst_unilabel_seas_vacc_testpreds = rand_forst_unilabel_seas_vacc.predict(X_seas_vacc_test)

# for SVM
svm_unilabel_seas_vacc_trainpreds = svm_unilabel_seas_vacc.predict(X_seas_vacc_train)
svm_unilabel_seas_vacc_testpreds = svm_unilabel_seas_vacc.predict(X_seas_vacc_test)

Upsampled data--fitting model:

In [None]:
# for logreg
logreg_unilabel_seas_vacc_upsamp = logreg_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_upsamp_train, y_h1n1_vacc_upsamp_train)

# for KNN
knn_unilabel_seas_vacc_upsamp = knn_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_upsamp_train, y_h1n1_vacc_upsamp_train)

# for Random Forest
rand_forst_unilabel_seas_vacc_upsamp = rand_forst_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_upsamp_train, y_h1n1_vacc_upsamp_train)

#for SVM
svm_unilabel_seas_vacc_upsamp = svm_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_upsamp_train, y_h1n1_vacc_upsamp_train)

Upsampled data--making predictions:

In [None]:
# for logreg
logreg_unilabel_seas_vacc_upsamp_trainpreds = logreg_unilabel_seas_vacc_upsamp.predict(X_seas_vacc_train)
logreg_unilabel_seas_vacc_upsamp_testpreds = logreg_unilabel_seas_vacc_upsamp.predict(X_seas_vacc_test)

# for KNN
knn_unilabel_seas_vacc_upsamp_trainpreds = knn_unilabel_seas_vacc_upsamp.predict(X_seas_vacc_train)
knn_unilabel_seas_vacc_upsamp_testpreds = knn_unilabel_seas_vacc_upsamp.predict(X_seas_vacc_test)

# for Random Forest
rand_forst_unilabel_seas_vacc_upsamp_trainpreds = rand_forst_unilabel_seas_vacc_upsamp.predict(X_seas_vacc_train)
rand_forst_unilabel_seas_vacc_upsamp_testpreds = rand_forst_unilabel_seas_vacc_upsamp.predict(X_seas_vacc_test)

# for SVM
svm_unilabel_seas_vacc_upsamp_trainpreds = svm_unilabel_seas_vacc_upsamp.predict(X_seas_vacc_train)
svm_unilabel_seas_vacc_upsamp_testpreds = svm_unilabel_seas_vacc_upsamp.predict(X_seas_vacc_test)

Downsampled data--fitting the model:

In [None]:
# for logreg
logreg_unilabel_seas_vacc_downsamp = logreg_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_downsamp_train, y_h1n1_vacc_downsamp_train)

# for KNN
knn_unilabel_seas_vacc_downsamp = knn_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_downsamp_train, y_h1n1_vacc_downsamp_train)

# for Random Forest
rand_forst_unilabel_seas_vacc_downsamp = rand_forst_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_downsamp_train, y_h1n1_vacc_downsamp_train)

#for SVM
svm_unilabel_seas_vacc_downsamp = svm_seas_vacc_unilabel_pipeline.fit(X_seas_vacc_downsamp_train, y_h1n1_vacc_downsamp_train)

Downsampled data--making predictions:

In [None]:
# for logreg
logreg_unilabel_seas_vacc_downsamp_trainpreds = logreg_unilabel_seas_vacc_downsamp.predict(X_seas_vacc_train)
logreg_unilabel_seas_vacc_downsamp_testpreds = logreg_unilabel_seas_vacc_downsamp.predict(X_seas_vacc_test)

# for KNN
knn_unilabel_seas_vacc_downsamp_trainpreds = knn_unilabel_seas_vacc_downsamp.predict(X_seas_vacc_train)
knn_unilabel_seas_vacc_downsamp_testpreds = knn_unilabel_seas_vacc_downsamp.predict(X_seas_vacc_test)

# for Random Forest
rand_forst_unilabel_seas_vacc_downsamp_trainpreds = rand_forst_unilabel_seas_vacc_downsamp.predict(X_seas_vacc_train)
rand_forst_unilabel_seas_vacc_downsamp_testpreds = rand_forst_unilabel_seas_vacc_downsamp.predict(X_seas_vacc_test)

# for SVM
svm_unilabel_seas_vacc_downsamp_trainpreds = svm_unilabel_seas_vacc_downsamp.predict(X_seas_vacc_train)
svm_unilabel_seas_vacc_downsamp_testpreds = svm_unilabel_seas_vacc_downsamp.predict(X_seas_vacc_test)

In [None]:
# Figure out later what this does and if we want to use it


#y_train_predicted = cross_val_predict(full_pipeline, X_train, y_train, cv=5)

### Model evaluation

#### Train data--no resampling

In [None]:
# Logreg--Train data evaluation Metrics
h1n1_logreg_unilabel_seas_vacc_train_acc = accuracy_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_trainpreds)
h1n1_logreg_unilabel_seas_vacc_train_recall = recall_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_trainpreds)
h1n1_logreg_unilabel_seas_vacc_train_precision = precision_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_trainpreds)
h1n1_logreg_unilabel_seas_vacc_train_f1 = f1_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_trainpreds)
h1n1_logreg_unilabel_seas_vacc_train_roc = roc_auc_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_trainpreds)))

In [None]:
# KNN--Train data evaluation Metrics
h1n1_knn_unilabel_seas_vacc_train_acc = accuracy_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_trainpreds)
h1n1_knn_unilabel_seas_vacc_train_recall = recall_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_trainpreds)
h1n1_knn_unilabel_seas_vacc_train_precision = precision_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_trainpreds)
h1n1_knn_unilabel_seas_vacc_train_f1 = f1_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_trainpreds)
h1n1_knn_unilabel_seas_vacc_train_roc = roc_auc_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_trainpreds)))

In [None]:
# Random Forest--Train data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_seas_vacc_train_acc = accuracy_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_train_recall = recall_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_train_precision = precision_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_train_f1 = f1_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_train_roc = roc_auc_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_trainpreds)))

In [None]:
# SVM--Train data evaluation Metrics
h1n1_svm_unilabel_seas_vacc_train_acc = accuracy_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_trainpreds)
h1n1_svm_unilabel_seas_vacc_train_recall = recall_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_trainpreds)
h1n1_svm_unilabel_seas_vacc_train_precision = precision_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_trainpreds)
h1n1_svm_unilabel_seas_vacc_train_f1 = f1_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_trainpreds)
h1n1_svm_unilabel_seas_vacc_train_roc = roc_auc_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_trainpreds)))

#### Test data--no resampling

In [None]:
# Logreg--Test data evaluation Metrics
h1n1_logreg_unilabel_seas_vacc_test_acc = accuracy_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_testpreds)
h1n1_logreg_unilabel_seas_vacc_test_recall = recall_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_testpreds)
h1n1_logreg_unilabel_seas_vacc_test_precision = precision_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_testpreds)
h1n1_logreg_unilabel_seas_vacc_test_f1 = f1_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_testpreds)
h1n1_logreg_unilabel_seas_vacc_test_roc = roc_auc_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_testpreds)))

In [None]:
# KNN--Test data evaluation Metrics
h1n1_knn_unilabel_seas_vacc_test_acc = accuracy_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_testpreds)
h1n1_knn_unilabel_seas_vacc_test_recall = recall_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_testpreds)
h1n1_knn_unilabel_seas_vacc_test_precision = precision_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_testpreds)
h1n1_knn_unilabel_seas_vacc_test_f1 = f1_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_testpreds)
h1n1_knn_unilabel_seas_vacc_test_roc = roc_auc_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_testpreds)))

In [None]:
# Random Forest--Test data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_seas_vacc_test_acc = accuracy_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_test_recall = recall_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_test_precision = precision_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_test_f1 = f1_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_test_roc = roc_auc_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_testpreds)))

In [None]:
# SVM--Test data evaluation Metrics
h1n1_svm_unilabel_seas_vacc_test_acc = accuracy_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_testpreds)
h1n1_svm_unilabel_seas_vacc_test_recall = recall_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_testpreds)
h1n1_svm_unilabel_seas_vacc_test_precision = precision_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_testpreds)
h1n1_svm_unilabel_seas_vacc_test_f1 = f1_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_testpreds)
h1n1_svm_unilabel_seas_vacc_test_roc = roc_auc_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_testpreds)))

#### Train data--upsampling

In [None]:
# Logreg--Train data evaluation Metrics
h1n1_logreg_unilabel_seas_vacc_upsamp_train_acc = accuracy_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_logreg_unilabel_seas_vacc_upsamp_train_recall = recall_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_logreg_unilabel_seas_vacc_upsamp_train_precision = precision_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_logreg_unilabel_seas_vacc_upsamp_train_f1 = f1_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_logreg_unilabel_seas_vacc_upsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_upsamp_trainpreds)

In [None]:
# KNN--Train data evaluation Metrics
h1n1_knn_unilabel_seas_vacc_upsamp_train_acc = accuracy_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_knn_unilabel_seas_vacc_upsamp_train_recall = recall_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_knn_unilabel_seas_vacc_upsamp_train_precision = precision_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_knn_unilabel_seas_vacc_upsamp_train_f1 = f1_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_knn_unilabel_seas_vacc_upsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_upsamp_trainpreds)

In [None]:
# Random Forest--Train data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_seas_vacc_upsamp_train_acc = accuracy_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_upsamp_train_recall = recall_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_upsamp_train_precision = precision_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_upsamp_train_f1 = f1_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_upsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_upsamp_trainpreds)

In [None]:
# SVM--Train data evaluation Metrics
h1n1_svm_unilabel_seas_vacc_upsamp_train_acc = accuracy_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_svm_unilabel_seas_vacc_upsamp_train_recall = recall_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_svm_unilabel_seas_vacc_upsamp_train_precision = precision_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_svm_unilabel_seas_vacc_upsamp_train_f1 = f1_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_upsamp_trainpreds)
h1n1_svm_unilabel_seas_vacc_upsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_upsamp_trainpreds)

#### Test data--upsampling

In [None]:
# Logreg--Test data evaluation Metrics
h1n1_logreg_unilabel_seas_vacc_upsamp_test_acc = accuracy_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_upsamp_testpreds)
h1n1_logreg_unilabel_seas_vacc_upsamp_test_recall = recall_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_upsamp_testpreds)
h1n1_logreg_unilabel_seas_vacc_upsamp_test_precision = precision_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_upsamp_testpreds)
h1n1_logreg_unilabel_seas_vacc_upsamp_test_f1 = f1_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_upsamp_testpreds)
h1n1_logreg_unilabel_seas_vacc_upsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_upsamp_testpreds)

In [None]:
# KNN--Test data evaluation Metrics
h1n1_knn_unilabel_seas_vacc_upsamp_test_acc = accuracy_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_upsamp_testpreds)
h1n1_knn_unilabel_seas_vacc_upsamp_test_recall = recall_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_upsamp_testpreds)
h1n1_knn_unilabel_seas_vacc_upsamp_test_precision = precision_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_upsamp_testpreds)
h1n1_knn_unilabel_seas_vacc_upsamp_test_f1 = f1_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_upsamp_testpreds)
h1n1_knn_unilabel_seas_vacc_upsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_upsamp_testpreds)

In [None]:
# Random Forest--Test data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_seas_vacc_upsamp_test_acc = accuracy_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_upsamp_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_upsamp_test_recall = recall_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_upsamp_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_upsamp_test_precision = precision_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_upsamp_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_upsamp_test_f1 = f1_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_upsamp_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_upsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_upsamp_testpreds)

In [None]:
# SVM--Train data evaluation Metrics
h1n1_svm_unilabel_seas_vacc_upsamp_test_acc = accuracy_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_upsamp_testpreds)
h1n1_svm_unilabel_seas_vacc_upsamp_test_recall = recall_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_upsamp_testpreds)
h1n1_svm_unilabel_seas_vacc_upsamp_test_precision = precision_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_upsamp_testpreds)
h1n1_svm_unilabel_seas_vacc_upsamp_test_f1 = f1_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_upsamp_testpreds)
h1n1_svm_unilabel_seas_vacc_upsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_upsamp_testpreds)

**Train data--downsampling**

In [None]:
# Logreg--Train data evaluation Metrics
h1n1_logreg_unilabel_seas_vacc_downsamp_train_acc = accuracy_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_logreg_unilabel_seas_vacc_downsamp_train_recall = recall_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_logreg_unilabel_seas_vacc_downsamp_train_precision = precision_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_logreg_unilabel_seas_vacc_downsamp_train_f1 = f1_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_logreg_unilabel_seas_vacc_downsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, logreg_unilabel_seas_vacc_downsamp_trainpreds)

In [None]:
# KNN--Train data evaluation Metrics
h1n1_knn_unilabel_seas_vacc_downsamp_train_acc = accuracy_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_knn_unilabel_seas_vacc_downsamp_train_recall = recall_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_knn_unilabel_seas_vacc_downsamp_train_precision = precision_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_knn_unilabel_seas_vacc_downsamp_train_f1 = f1_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_knn_unilabel_seas_vacc_downsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, knn_unilabel_seas_vacc_downsamp_trainpreds)

In [None]:
# Random Forest--Train data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_seas_vacc_downsamp_train_acc = accuracy_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_downsamp_train_recall = recall_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_downsamp_train_precision = precision_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_downsamp_train_f1 = f1_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_rand_forst_unilabel_seas_vacc_downsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, rand_forst_unilabel_seas_vacc_downsamp_trainpreds)

In [None]:
# SVM--Train data evaluation Metrics
h1n1_svm_unilabel_seas_vacc_downsamp_train_acc = accuracy_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_svm_unilabel_seas_vacc_downsamp_train_recall = recall_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_svm_unilabel_seas_vacc_downsamp_train_precision = precision_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_svm_unilabel_seas_vacc_downsamp_train_f1 = f1_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_downsamp_trainpreds)
h1n1_svm_unilabel_seas_vacc_downsamp_train_roc = roc_auc_score(y_h1n1_vacc_train, svm_unilabel_seas_vacc_downsamp_trainpreds)

**Test data--downsampling**

In [None]:
# Logreg--Test data evaluation Metrics
h1n1_logreg_unilabel_seas_vacc_downsamp_test_acc = accuracy_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_downsamp_testpreds)
h1n1_logreg_unilabel_seas_vacc_downsamp_test_recall = recall_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_downsamp_testpreds)
h1n1_logreg_unilabel_seas_vacc_downsamp_test_precision = precision_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_downsamp_testpreds)
h1n1_logreg_unilabel_seas_vacc_downsamp_test_f1 = f1_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_downsamp_testpreds)
h1n1_logreg_unilabel_seas_vacc_downsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, logreg_unilabel_seas_vacc_downsamp_testpreds)

In [None]:
# KNN--Test data evaluation Metrics
h1n1_knn_unilabel_seas_vacc_downsamp_test_acc = accuracy_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_downsamp_testpreds)
h1n1_knn_unilabel_seas_vacc_downsamp_test_recall = recall_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_downsamp_testpreds)
h1n1_knn_unilabel_seas_vacc_downsamp_test_precision = precision_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_downsamp_testpreds)
h1n1_knn_unilabel_seas_vacc_downsamp_test_f1 = f1_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_downsamp_testpreds)
h1n1_knn_unilabel_seas_vacc_downsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, knn_unilabel_seas_vacc_downsamp_testpreds)

In [None]:
# Random Forest--Test data evaluation Metrics
#check if these metrics are changing compared to previous results
h1n1_rand_forst_unilabel_seas_vacc_downsamp_test_acc = accuracy_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_downsamp_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_downsamp_test_recall = recall_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_downsamp_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_downsamp_test_precision = precision_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_downsamp_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_downsamp_test_f1 = f1_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_downsamp_testpreds)
h1n1_rand_forst_unilabel_seas_vacc_downsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, rand_forst_unilabel_seas_vacc_downsamp_testpreds)

In [None]:
# SVM--Train data evaluation Metrics
h1n1_svm_unilabel_seas_vacc_downsamp_test_acc = accuracy_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_downsamp_testpreds)
h1n1_svm_unilabel_seas_vacc_downsamp_test_recall = recall_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_downsamp_testpreds)
h1n1_svm_unilabel_seas_vacc_downsamp_test_precision = precision_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_downsamp_testpreds)
h1n1_svm_unilabel_seas_vacc_downsamp_test_f1 = f1_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_downsamp_testpreds)
h1n1_svm_unilabel_seas_vacc_downsamp_test_roc = roc_auc_score(y_h1n1_vacc_test, svm_unilabel_seas_vacc_downsamp_testpreds)

## Tracking the model with MLFlow

### H1N1 vaccine output

#### Logistic regression

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1) #this needs to be adjusted for each experiment
name = 'logreg_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_logreg_unilabel_seas_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_logreg_unilabel_seas_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_logreg_unilabel_seas_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_logreg_unilabel_seas_vacc_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_logreg_unilabel_seas_vacc_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_logreg_unilabel_seas_vacc_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_logreg_unilabel_seas_vacc_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_logreg_unilabel_seas_vacc_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_logreg_unilabel_seas_vacc_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_logreg_unilabel_seas_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1) #this needs to be adjusted for each experiment
name = 'logreg_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_logreg_unilabel_seas_vacc_upsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_logreg_unilabel_seas_vacc_upsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_logreg_unilabel_seas_vacc_upsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_logreg_unilabel_seas_vacc_upsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_logreg_unilabel_seas_vacc_upsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_logreg_unilabel_seas_vacc_upsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_logreg_unilabel_seas_vacc_upsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_logreg_unilabel_seas_vacc_upsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_logreg_unilabel_seas_vacc_upsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_logreg_unilabel_seas_vacc_upsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1) #this needs to be adjusted for each experiment
name = 'logreg_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_logreg_unilabel_seas_vacc_downsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_logreg_unilabel_seas_vacc_downsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_logreg_unilabel_seas_vacc_downsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_logreg_unilabel_seas_vacc_downsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_logreg_unilabel_seas_vacc_downsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_logreg_unilabel_seas_vacc_downsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_logreg_unilabel_seas_vacc_downsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_logreg_unilabel_seas_vacc_downsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_logreg_unilabel_seas_vacc_downsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_logreg_unilabel_seas_vacc_downsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### KNN

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'knn_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_knn_unilabel_seas_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_knn_unilabel_seas_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_knn_unilabel_seas_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_knn_unilabel_seas_vacc_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_knn_unilabel_seas_vacc_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_knn_unilabel_seas_vacc_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_knn_unilabel_seas_vacc_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_knn_unilabel_seas_vacc_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_knn_unilabel_seas_vacc_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_knn_unilabel_seas_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'knn_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_knn_unilabel_seas_vacc_upsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_knn_unilabel_seas_vacc_upsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_knn_unilabel_seas_vacc_upsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_knn_unilabel_seas_vacc_upsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_knn_unilabel_seas_vacc_upsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_knn_unilabel_seas_vacc_upsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_knn_unilabel_seas_vacc_upsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_knn_unilabel_seas_vacc_upsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_knn_unilabel_seas_vacc_upsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_knn_unilabel_seas_vacc_upsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'knn_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_knn_unilabel_seas_vacc_downsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_knn_unilabel_seas_vacc_downsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_knn_unilabel_seas_vacc_downsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_knn_unilabel_seas_vacc_downsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_knn_unilabel_seas_vacc_downsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_knn_unilabel_seas_vacc_downsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_knn_unilabel_seas_vacc_downsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_knn_unilabel_seas_vacc_downsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_knn_unilabel_seas_vacc_downsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_knn_unilabel_seas_vacc_downsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### Random forest

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'rand_forst_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_rand_forst_unilabel_seas_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_rand_forst_unilabel_seas_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_rand_forst_unilabel_seas_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_rand_forst_unilabel_seas_vacc_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_rand_forst_unilabel_seas_vacc_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_rand_forst_unilabel_seas_vacc_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_rand_forst_unilabel_seas_vacc_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_rand_forst_unilabel_seas_vacc_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_rand_forst_unilabel_seas_vacc_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_rand_forst_unilabel_seas_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'rand_forst_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_rand_forst_unilabel_seas_vacc_upsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_rand_forst_unilabel_seas_vacc_upsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_rand_forst_unilabel_seas_vacc_upsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_rand_forst_unilabel_seas_vacc_upsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_rand_forst_unilabel_seas_vacc_upsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_rand_forst_unilabel_seas_vacc_upsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_rand_forst_unilabel_seas_vacc_upsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_rand_forst_unilabel_seas_vacc_upsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_rand_forst_unilabel_seas_vacc_upsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_rand_forst_unilabel_seas_vacc_upsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'rand_forst_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_rand_forst_unilabel_seas_vacc_downsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_rand_forst_unilabel_seas_vacc_downsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_rand_forst_unilabel_seas_vacc_downsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_rand_forst_unilabel_seas_vacc_downsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_rand_forst_unilabel_seas_vacc_downsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_rand_forst_unilabel_seas_vacc_downsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_rand_forst_unilabel_seas_vacc_downsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_rand_forst_unilabel_seas_vacc_downsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_rand_forst_unilabel_seas_vacc_downsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_rand_forst_unilabel_seas_vacc_downsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

#### SVM

*No resampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'svm_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_svm_unilabel_seas_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_svm_unilabel_seas_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_svm_unilabel_seas_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_svm_unilabel_seas_vacc_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_svm_unilabel_seas_vacc_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_svm_unilabel_seas_vacc_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_svm_unilabel_seas_vacc_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_svm_unilabel_seas_vacc_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_svm_unilabel_seas_vacc_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_svm_unilabel_seas_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

*Upsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'svm_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Upsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_svm_unilabel_seas_vacc_upsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_svm_unilabel_seas_vacc_upsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_svm_unilabel_seas_vacc_upsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_svm_unilabel_seas_vacc_upsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_svm_unilabel_seas_vacc_upsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_svm_unilabel_seas_vacc_upsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_svm_unilabel_seas_vacc_upsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_svm_unilabel_seas_vacc_upsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_svm_unilabel_seas_vacc_upsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_svm_unilabel_seas_vacc_upsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

*Downsampling*

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_h1n1)
name = 'svm_unilabel_h1n1' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "Downsampling",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "Seasonal") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", h1n1_svm_unilabel_seas_vacc_downsamp_train_roc)
mlflow.log_metric("test -" + "ROC", h1n1_svm_unilabel_seas_vacc_downsamp_test_roc)
mlflow.log_metric("train -" + "accuracy", h1n1_svm_unilabel_seas_vacc_downsamp_train_acc)
mlflow.log_metric("test -" + "accuracy", h1n1_svm_unilabel_seas_vacc_downsamp_test_acc)
mlflow.log_metric("train -" + "recall", h1n1_svm_unilabel_seas_vacc_downsamp_train_recall)
mlflow.log_metric("test -" + "recall", h1n1_svm_unilabel_seas_vacc_downsamp_test_recall)
mlflow.log_metric("train -" + "precision", h1n1_svm_unilabel_seas_vacc_downsamp_train_precision)
mlflow.log_metric("test -" + "precision", h1n1_svm_unilabel_seas_vacc_downsamp_test_precision)
mlflow.log_metric("train -" + "f1", h1n1_svm_unilabel_seas_vacc_downsamp_train_f1)
mlflow.log_metric("test -" + "f1", h1n1_svm_unilabel_seas_vacc_downsamp_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

# TheFluShot_seasonal: Single Label Modelling, output seasonal vaccine

## Single Label Modelling, output seasonal vaccine -> H1N1 Flu Vaccine not in features

The cat_features_no_vacc and X_no_vacc variables and the preprocessor remain the same from the multilabel modelling:

Set up the target variable:

In [None]:
y_seas_vacc = df['seasonal_vaccine'].copy()

In [None]:
y_seas_vacc = y_seas_vacc.to_numpy()
y_seas_vacc

In [None]:
len(y_seas_vacc)

Performing test-train split:

In [None]:
X_no_vacc_train, X_no_vacc_test, y_seas_vacc_train, y_seas_vacc_test = train_test_split(X_no_vacc, y_seas_vacc, stratify = y_seas_vacc, test_size=0.2, random_state=RSEED)

In [None]:
print('X_no_vacc_train shape:', X_no_vacc_train.shape)
print('X_no_vacc_test shape:', X_no_vacc_test.shape)
print('y_seas_vacc_train:', y_seas_vacc_train.shape)
print('y_seas_vacc_test:', y_seas_vacc_test.shape)

Setting up the pipeline for each model:

In [None]:
# for logreg
logreg_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", logreg),
])

# for KNN
knn_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", knn),
])

# for Random Forest

rand_forst_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", rand_forst),
])

#for SVM
svm_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor),
    ("estimators", svm),
])

Training the models:

In [None]:
# for logreg
logreg_unilabel_no_vacc = logreg_unilabel_pipeline.fit(X_no_vacc_train, y_seas_vacc_train)

# for KNN
knn_unilabel_no_vacc = knn_unilabel_pipeline.fit(X_no_vacc_train, y_seas_vacc_train)

# for Random Forest
rand_forst_unilabel_no_vacc = rand_forst_unilabel_pipeline.fit(X_no_vacc_train, y_seas_vacc_train)

#for SVM
svm_unilabel_no_vacc = svm_unilabel_pipeline.fit(X_no_vacc_train, y_seas_vacc_train)

In [None]:
# Figure out later what this does and if we want to use it


#y_train_predicted = cross_val_predict(full_pipeline, X_train, y_train, cv=5)

Making predictions based on train and test data:

In [None]:
# for logreg
logreg_unilabel_no_vacc_trainpreds = logreg_unilabel_no_vacc.predict(X_no_vacc_train)
logreg_unilabel_no_vacc_testpreds = logreg_unilabel_no_vacc.predict(X_no_vacc_test)

# for KNN
knn_unilabel_no_vacc_trainpreds = knn_unilabel_no_vacc.predict(X_no_vacc_train)
knn_unilabel_no_vacc_testpreds = knn_unilabel_no_vacc.predict(X_no_vacc_test)

# for Random Forest
rand_forst_unilabel_no_vacc_trainpreds = rand_forst_unilabel_no_vacc.predict(X_no_vacc_train)
rand_forst_unilabel_no_vacc_testpreds = rand_forst_unilabel_no_vacc.predict(X_no_vacc_test)

# for SVM
svm_unilabel_no_vacc_trainpreds = svm_unilabel_no_vacc.predict(X_no_vacc_train)
svm_unilabel_no_vacc_testpreds = svm_unilabel_no_vacc.predict(X_no_vacc_test)

### Model evaluation

#### Train data

In [None]:
# Logreg--Train data evaluation Metrics
seas_logreg_unilabel_no_vacc_train_acc = accuracy_score(y_seas_vacc_train, logreg_unilabel_no_vacc_trainpreds)
seas_logreg_unilabel_no_vacc_train_recall = recall_score(y_seas_vacc_train, logreg_unilabel_no_vacc_trainpreds)
seas_logreg_unilabel_no_vacc_train_precision = precision_score(y_seas_vacc_train, logreg_unilabel_no_vacc_trainpreds)
seas_logreg_unilabel_no_vacc_train_f1 = f1_score(y_seas_vacc_train, logreg_unilabel_no_vacc_trainpreds)
seas_logreg_unilabel_no_vacc_train_roc = roc_auc_score(y_seas_vacc_train, logreg_unilabel_no_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_train, logreg_unilabel_no_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_train, logreg_unilabel_no_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_train, logreg_unilabel_no_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_train, logreg_unilabel_no_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_train, logreg_unilabel_no_vacc_trainpreds)))

In [None]:
# KNN--Train data evaluation Metrics
seas_knn_unilabel_no_vacc_train_acc = accuracy_score(y_seas_vacc_train, knn_unilabel_no_vacc_trainpreds)
seas_knn_unilabel_no_vacc_train_recall = recall_score(y_seas_vacc_train, knn_unilabel_no_vacc_trainpreds)
seas_knn_unilabel_no_vacc_train_precision = precision_score(y_seas_vacc_train, knn_unilabel_no_vacc_trainpreds)
seas_knn_unilabel_no_vacc_train_f1 = f1_score(y_seas_vacc_train, knn_unilabel_no_vacc_trainpreds)
seas_knn_unilabel_no_vacc_train_roc = roc_auc_score(y_seas_vacc_train, knn_unilabel_no_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_train, knn_unilabel_no_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_train, knn_unilabel_no_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_train, knn_unilabel_no_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_train, knn_unilabel_no_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_train, knn_unilabel_no_vacc_trainpreds)))

In [None]:
# Random Forest--Train data evaluation Metrics
#check if these metrics are changing compared to previous results
seas_rand_forst_unilabel_no_vacc_train_acc = accuracy_score(y_seas_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)
seas_rand_forst_unilabel_no_vacc_train_recall = recall_score(y_seas_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)
seas_rand_forst_unilabel_no_vacc_train_precision = precision_score(y_seas_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)
seas_rand_forst_unilabel_no_vacc_train_f1 = f1_score(y_seas_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)
seas_rand_forst_unilabel_no_vacc_train_roc = roc_auc_score(y_seas_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_train, rand_forst_unilabel_no_vacc_trainpreds)))

In [None]:
# SVM--Train data evaluation Metrics
seas_svm_unilabel_no_vacc_train_acc = accuracy_score(y_seas_vacc_train, svm_unilabel_no_vacc_trainpreds)
seas_svm_unilabel_no_vacc_train_recall = recall_score(y_seas_vacc_train, svm_unilabel_no_vacc_trainpreds)
seas_svm_unilabel_no_vacc_train_precision = precision_score(y_seas_vacc_train, svm_unilabel_no_vacc_trainpreds)
seas_svm_unilabel_no_vacc_train_f1 = f1_score(y_seas_vacc_train, svm_unilabel_no_vacc_trainpreds)
seas_svm_unilabel_no_vacc_train_roc = roc_auc_score(y_seas_vacc_train, svm_unilabel_no_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_train, svm_unilabel_no_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_train, svm_unilabel_no_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_train, svm_unilabel_no_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_train, svm_unilabel_no_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_train, svm_unilabel_no_vacc_trainpreds)))

#### Test data

In [None]:
# Logreg--Test data evaluation Metrics
seas_logreg_unilabel_no_vacc_test_acc = accuracy_score(y_seas_vacc_test, logreg_unilabel_no_vacc_testpreds)
seas_logreg_unilabel_no_vacc_test_recall = recall_score(y_seas_vacc_test, logreg_unilabel_no_vacc_testpreds)
seas_logreg_unilabel_no_vacc_test_precision = precision_score(y_seas_vacc_test, logreg_unilabel_no_vacc_testpreds)
seas_logreg_unilabel_no_vacc_test_f1 = f1_score(y_seas_vacc_test, logreg_unilabel_no_vacc_testpreds)
seas_logreg_unilabel_no_vacc_test_roc = roc_auc_score(y_seas_vacc_test, logreg_unilabel_no_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_test, logreg_unilabel_no_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_test, logreg_unilabel_no_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_test, logreg_unilabel_no_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_test, logreg_unilabel_no_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_test, logreg_unilabel_no_vacc_testpreds)))

In [None]:
# KNN--Test data evaluation Metrics
seas_knn_unilabel_no_vacc_test_acc = accuracy_score(y_seas_vacc_test, knn_unilabel_no_vacc_testpreds)
seas_knn_unilabel_no_vacc_test_recall = recall_score(y_seas_vacc_test, knn_unilabel_no_vacc_testpreds)
seas_knn_unilabel_no_vacc_test_precision = precision_score(y_seas_vacc_test, knn_unilabel_no_vacc_testpreds)
seas_knn_unilabel_no_vacc_test_f1 = f1_score(y_seas_vacc_test, knn_unilabel_no_vacc_testpreds)
seas_knn_unilabel_no_vacc_test_roc = roc_auc_score(y_seas_vacc_test, knn_unilabel_no_vacc_testpreds)

print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_test, knn_unilabel_no_vacc_testpreds)))
print("Recall: {:.2f}".format(recall_score(y_seas_vacc_test, knn_unilabel_no_vacc_testpreds)))
print("Precision: {:.2f}".format(precision_score(y_seas_vacc_test, knn_unilabel_no_vacc_testpreds)))
print("F1: {:.2f}".format(f1_score(y_seas_vacc_test, knn_unilabel_no_vacc_testpreds)))
print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_test, knn_unilabel_no_vacc_testpreds)))

In [None]:
print(seas_knn_unilabel_no_vacc_test_acc)
print(seas_knn_unilabel_no_vacc_test_recall)
print(seas_knn_unilabel_no_vacc_test_precision)
print(seas_knn_unilabel_no_vacc_test_f1)
print(seas_knn_unilabel_no_vacc_test_roc)

In [None]:
# Random Forest--Test data evaluation Metrics
#check if these metrics are changing compared to previous results
seas_rand_forst_unilabel_no_vacc_test_acc = accuracy_score(y_seas_vacc_test, rand_forst_unilabel_no_vacc_testpreds)
seas_rand_forst_unilabel_no_vacc_test_recall = recall_score(y_seas_vacc_test, rand_forst_unilabel_no_vacc_testpreds)
seas_rand_forst_unilabel_no_vacc_test_precision = precision_score(y_seas_vacc_test, rand_forst_unilabel_no_vacc_testpreds)
seas_rand_forst_unilabel_no_vacc_test_f1 = f1_score(y_seas_vacc_test, rand_forst_unilabel_no_vacc_testpreds)
seas_rand_forst_unilabel_no_vacc_test_roc = roc_auc_score(y_seas_vacc_test, rand_forst_unilabel_no_vacc_testpreds)

print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_test, rand_forst_unilabel_no_vacc_testpreds)))
print("Recall: {:.2f}".format(recall_score(y_seas_vacc_test, rand_forst_unilabel_no_vacc_testpreds)))
print("Precision: {:.2f}".format(precision_score(y_seas_vacc_test, rand_forst_unilabel_no_vacc_testpreds)))
print("F1: {:.2f}".format(f1_score(y_seas_vacc_test, rand_forst_unilabel_no_vacc_testpreds)))
print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_test, rand_forst_unilabel_no_vacc_testpreds)))

In [None]:
print(seas_rand_forst_unilabel_no_vacc_test_acc)
print(seas_rand_forst_unilabel_no_vacc_test_recall)
print(seas_rand_forst_unilabel_no_vacc_test_precision)
print(seas_rand_forst_unilabel_no_vacc_test_f1)
print(seas_rand_forst_unilabel_no_vacc_test_roc)

In [None]:
# SVM--Test data evaluation Metrics
seas_svm_unilabel_no_vacc_test_acc = accuracy_score(y_seas_vacc_test, svm_unilabel_no_vacc_testpreds)
seas_svm_unilabel_no_vacc_test_recall = recall_score(y_seas_vacc_test, svm_unilabel_no_vacc_testpreds)
seas_svm_unilabel_no_vacc_test_precision = precision_score(y_seas_vacc_test, svm_unilabel_no_vacc_testpreds)
seas_svm_unilabel_no_vacc_test_f1 = f1_score(y_seas_vacc_test, svm_unilabel_no_vacc_testpreds)
seas_svm_unilabel_no_vacc_test_roc = roc_auc_score(y_seas_vacc_test, svm_unilabel_no_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_test, svm_unilabel_no_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_test, svm_unilabel_no_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_test, svm_unilabel_no_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_test, svm_unilabel_no_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_test, svm_unilabel_no_vacc_testpreds)))

## Tracking the model with MLFlow

### Seasonal vaccine output

#### Logistic regression

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_seasonal) #this needs to be adjusted for each experiment
name = 'logreg_unilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seas_logreg_unilabel_no_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", seas_logreg_unilabel_no_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", seas_logreg_unilabel_no_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", seas_logreg_unilabel_no_vacc_test_acc)
mlflow.log_metric("train -" + "recall", seas_logreg_unilabel_no_vacc_train_recall)
mlflow.log_metric("test -" + "recall", seas_logreg_unilabel_no_vacc_test_recall)
mlflow.log_metric("train -" + "precision", seas_logreg_unilabel_no_vacc_train_precision)
mlflow.log_metric("test -" + "precision", seas_logreg_unilabel_no_vacc_test_precision)
mlflow.log_metric("train -" + "f1", seas_logreg_unilabel_no_vacc_train_f1)
mlflow.log_metric("test -" + "f1", seas_logreg_unilabel_no_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

#### KNN

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_seasonal)
name = 'knn_unilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seas_knn_unilabel_no_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", seas_knn_unilabel_no_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", seas_knn_unilabel_no_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", seas_knn_unilabel_no_vacc_test_acc)
mlflow.log_metric("train -" + "recall", seas_knn_unilabel_no_vacc_train_recall)
mlflow.log_metric("test -" + "recall", seas_knn_unilabel_no_vacc_test_recall)
mlflow.log_metric("train -" + "precision", seas_knn_unilabel_no_vacc_train_precision)
mlflow.log_metric("test -" + "precision", seas_knn_unilabel_no_vacc_test_precision)
mlflow.log_metric("train -" + "f1", seas_knn_unilabel_no_vacc_train_f1)
mlflow.log_metric("test -" + "f1", seas_knn_unilabel_no_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

#### Random forest

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_seasonal)
name = 'rand_forst_unilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seas_rand_forst_unilabel_no_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", seas_rand_forst_unilabel_no_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", seas_rand_forst_unilabel_no_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", seas_rand_forst_unilabel_no_vacc_test_acc)
mlflow.log_metric("train -" + "recall", seas_rand_forst_unilabel_no_vacc_train_recall)
mlflow.log_metric("test -" + "recall", seas_rand_forst_unilabel_no_vacc_test_recall)
mlflow.log_metric("train -" + "precision", seas_rand_forst_unilabel_no_vacc_train_precision)
mlflow.log_metric("test -" + "precision", seas_rand_forst_unilabel_no_vacc_test_precision)
mlflow.log_metric("train -" + "f1", seas_rand_forst_unilabel_no_vacc_train_f1)
mlflow.log_metric("test -" + "f1", seas_rand_forst_unilabel_no_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

#### SVM

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_seasonal)
name = 'svm_unilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "None") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seas_svm_unilabel_no_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", seas_svm_unilabel_no_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", seas_svm_unilabel_no_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", seas_svm_unilabel_no_vacc_test_acc)
mlflow.log_metric("train -" + "recall", seas_svm_unilabel_no_vacc_train_recall)
mlflow.log_metric("test -" + "recall", seas_svm_unilabel_no_vacc_test_recall)
mlflow.log_metric("train -" + "precision", seas_svm_unilabel_no_vacc_train_precision)
mlflow.log_metric("test -" + "precision", seas_svm_unilabel_no_vacc_test_precision)
mlflow.log_metric("train -" + "f1", seas_svm_unilabel_no_vacc_train_f1)
mlflow.log_metric("test -" + "f1", seas_svm_unilabel_no_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

## Single Label Modelling, output seasonal vaccine -> H1N1 Flu Vaccine is in features

The y_seas_vacc remains the same from the previous model; the X feature and cat_features (for the preprocessor) need to be adjusted:

In [None]:
cat_features_h1n1_vacc = cat_features.copy()

In [None]:
cat_features_h1n1_vacc.remove('seasonal_vaccine')

In [None]:
#NB: dropping the 'h1n1_vaccine' and 'seasonal_vaccine' columns
X_h1n1_vacc = df.drop(columns=['seasonal_vaccine'])

Performing test-train split (the same data can be used for each model in multilabelling):

In [None]:
X_h1n1_vacc_train, X_h1n1_vacc_test, y_seas_vacc_train, y_seas_vacc_test = train_test_split(X_h1n1_vacc, y_seas_vacc, stratify = y_seas_vacc, test_size=0.2, random_state=RSEED)

In [None]:
print('X_h1n1_vacc_train shape:', X_h1n1_vacc_train.shape)
print('X_h1n1_vacc_test shape:', X_h1n1_vacc_test.shape)
print('y_seas_vacc_train:', y_seas_vacc_train.shape)
print('y_seas_vacc_test:', y_seas_vacc_test.shape)

The preprocessor is adjusted:

In [None]:
preprocessor_h1n1_vacc = ColumnTransformer([
    ('cat', cat_pipeline, cat_features_h1n1_vacc)
])

The pipeline is adjusted:

In [None]:
# for logreg
logreg_h1n1_vacc_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor_h1n1_vacc),
    ("estimators", logreg),
])

# for KNN
knn_h1n1_vacc_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor_h1n1_vacc),
    ("estimators", knn),
])

# for Random Forest

rand_forst_h1n1_vacc_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor_h1n1_vacc),
    ("estimators", rand_forst),
])

#for SVM
svm_h1n1_vacc_unilabel_pipeline = Pipeline([
    ("preprocessor", preprocessor_h1n1_vacc),
    ("estimators", svm),
])

Training the models:

In [None]:
# for logreg
logreg_unilabel_h1n1_vacc = logreg_h1n1_vacc_unilabel_pipeline.fit(X_h1n1_vacc_train, y_seas_vacc_train)

# for KNN
knn_unilabel_h1n1_vacc = knn_h1n1_vacc_unilabel_pipeline.fit(X_h1n1_vacc_train, y_seas_vacc_train)

# for Random Forest
rand_forst_unilabel_h1n1_vacc = rand_forst_h1n1_vacc_unilabel_pipeline.fit(X_h1n1_vacc_train, y_seas_vacc_train)

#for SVM
svm_unilabel_h1n1_vacc = svm_h1n1_vacc_unilabel_pipeline.fit(X_h1n1_vacc_train, y_seas_vacc_train)

In [None]:
# Figure out later what this does and if we want to use it


#y_train_predicted = cross_val_predict(full_pipeline, X_train, y_train, cv=5)

Making predictions based on train and test data:

In [None]:
# for logreg
logreg_unilabel_h1n1_vacc_trainpreds = logreg_unilabel_h1n1_vacc.predict(X_h1n1_vacc_train)
logreg_unilabel_h1n1_vacc_testpreds = logreg_unilabel_h1n1_vacc.predict(X_h1n1_vacc_test)

# for KNN
knn_unilabel_h1n1_vacc_trainpreds = knn_unilabel_h1n1_vacc.predict(X_h1n1_vacc_train)
knn_unilabel_h1n1_vacc_testpreds = knn_unilabel_h1n1_vacc.predict(X_h1n1_vacc_test)

# for Random Forest
rand_forst_unilabel_h1n1_vacc_trainpreds = rand_forst_unilabel_h1n1_vacc.predict(X_h1n1_vacc_train)
rand_forst_unilabel_h1n1_vacc_testpreds = rand_forst_unilabel_h1n1_vacc.predict(X_h1n1_vacc_test)

# for SVM
svm_unilabel_h1n1_vacc_trainpreds = svm_unilabel_h1n1_vacc.predict(X_h1n1_vacc_train)
svm_unilabel_h1n1_vacc_testpreds = svm_unilabel_h1n1_vacc.predict(X_h1n1_vacc_test)

### Model evaluation

#### Train data

In [None]:
# Logreg--Train data evaluation Metrics
seas_logreg_unilabel_h1n1_vacc_train_acc = accuracy_score(y_seas_vacc_train, logreg_unilabel_h1n1_vacc_trainpreds)
seas_logreg_unilabel_h1n1_vacc_train_recall = recall_score(y_seas_vacc_train, logreg_unilabel_h1n1_vacc_trainpreds)
seas_logreg_unilabel_h1n1_vacc_train_precision = precision_score(y_seas_vacc_train, logreg_unilabel_h1n1_vacc_trainpreds)
seas_logreg_unilabel_h1n1_vacc_train_f1 = f1_score(y_seas_vacc_train, logreg_unilabel_h1n1_vacc_trainpreds)
seas_logreg_unilabel_h1n1_vacc_train_roc = roc_auc_score(y_seas_vacc_train, logreg_unilabel_h1n1_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_train, logreg_unilabel_h1n1_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_train, logreg_unilabel_h1n1_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_train, logreg_unilabel_h1n1_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_train, logreg_unilabel_h1n1_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_train, logreg_unilabel_h1n1_vacc_trainpreds)))

In [None]:
# KNN--Train data evaluation Metrics
seas_knn_unilabel_h1n1_vacc_train_acc = accuracy_score(y_seas_vacc_train, knn_unilabel_h1n1_vacc_trainpreds)
seas_knn_unilabel_h1n1_vacc_train_recall = recall_score(y_seas_vacc_train, knn_unilabel_h1n1_vacc_trainpreds)
seas_knn_unilabel_h1n1_vacc_train_precision = precision_score(y_seas_vacc_train, knn_unilabel_h1n1_vacc_trainpreds)
seas_knn_unilabel_h1n1_vacc_train_f1 = f1_score(y_seas_vacc_train, knn_unilabel_h1n1_vacc_trainpreds)
seas_knn_unilabel_h1n1_vacc_train_roc = roc_auc_score(y_seas_vacc_train, knn_unilabel_h1n1_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_train, knn_unilabel_h1n1_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_train, knn_unilabel_h1n1_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_train, knn_unilabel_h1n1_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_train, knn_unilabel_h1n1_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_train, knn_unilabel_h1n1_vacc_trainpreds)))

In [None]:
# Random Forest--Train data evaluation Metrics
#check if these metrics are changing compared to previous results
seas_rand_forst_unilabel_h1n1_vacc_train_acc = accuracy_score(y_seas_vacc_train, rand_forst_unilabel_h1n1_vacc_trainpreds)
seas_rand_forst_unilabel_h1n1_vacc_train_recall = recall_score(y_seas_vacc_train, rand_forst_unilabel_h1n1_vacc_trainpreds)
seas_rand_forst_unilabel_h1n1_vacc_train_precision = precision_score(y_seas_vacc_train, rand_forst_unilabel_h1n1_vacc_trainpreds)
seas_rand_forst_unilabel_h1n1_vacc_train_f1 = f1_score(y_seas_vacc_train, rand_forst_unilabel_h1n1_vacc_trainpreds)
seas_rand_forst_unilabel_h1n1_vacc_train_roc = roc_auc_score(y_seas_vacc_train, rand_forst_unilabel_h1n1_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_train, rand_forst_unilabel_h1n1_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_train, rand_forst_unilabel_h1n1_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_train, rand_forst_unilabel_h1n1_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_train, rand_forst_unilabel_h1n1_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_train, rand_forst_unilabel_h1n1_vacc_trainpreds)))

In [None]:
# SVM--Train data evaluation Metrics
seas_svm_unilabel_h1n1_vacc_train_acc = accuracy_score(y_seas_vacc_train, svm_unilabel_h1n1_vacc_trainpreds)
seas_svm_unilabel_h1n1_vacc_train_recall = recall_score(y_seas_vacc_train, svm_unilabel_h1n1_vacc_trainpreds)
seas_svm_unilabel_h1n1_vacc_train_precision = precision_score(y_seas_vacc_train, svm_unilabel_h1n1_vacc_trainpreds)
seas_svm_unilabel_h1n1_vacc_train_f1 = f1_score(y_seas_vacc_train, svm_unilabel_h1n1_vacc_trainpreds)
seas_svm_unilabel_h1n1_vacc_train_roc = roc_auc_score(y_seas_vacc_train, svm_unilabel_h1n1_vacc_trainpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_train, svm_unilabel_h1n1_vacc_trainpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_train, svm_unilabel_h1n1_vacc_trainpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_train, svm_unilabel_h1n1_vacc_trainpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_train, svm_unilabel_h1n1_vacc_trainpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_train, svm_unilabel_h1n1_vacc_trainpreds)))

#### Test data

In [None]:
# Logreg--Test data evaluation Metrics
seas_logreg_unilabel_h1n1_vacc_test_acc = accuracy_score(y_seas_vacc_test, logreg_unilabel_h1n1_vacc_testpreds)
seas_logreg_unilabel_h1n1_vacc_test_recall = recall_score(y_seas_vacc_test, logreg_unilabel_h1n1_vacc_testpreds)
seas_logreg_unilabel_h1n1_vacc_test_precision = precision_score(y_seas_vacc_test, logreg_unilabel_h1n1_vacc_testpreds)
seas_logreg_unilabel_h1n1_vacc_test_f1 = f1_score(y_seas_vacc_test, logreg_unilabel_h1n1_vacc_testpreds)
seas_logreg_unilabel_h1n1_vacc_test_roc = roc_auc_score(y_seas_vacc_test, logreg_unilabel_h1n1_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_test, logreg_unilabel_h1n1_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_test, logreg_unilabel_h1n1_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_test, logreg_unilabel_h1n1_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_test, logreg_unilabel_h1n1_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_test, logreg_unilabel_h1n1_vacc_testpreds)))

In [None]:
# KNN--Test data evaluation Metrics
seas_knn_unilabel_h1n1_vacc_test_acc = accuracy_score(y_seas_vacc_test, knn_unilabel_h1n1_vacc_testpreds)
seas_knn_unilabel_h1n1_vacc_test_recall = recall_score(y_seas_vacc_test, knn_unilabel_h1n1_vacc_testpreds)
seas_knn_unilabel_h1n1_vacc_test_precision = precision_score(y_seas_vacc_test, knn_unilabel_h1n1_vacc_testpreds)
seas_knn_unilabel_h1n1_vacc_test_f1 = f1_score(y_seas_vacc_test, knn_unilabel_h1n1_vacc_testpreds)
seas_knn_unilabel_h1n1_vacc_test_roc = roc_auc_score(y_seas_vacc_test, knn_unilabel_h1n1_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_test, knn_unilabel_h1n1_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_test, knn_unilabel_h1n1_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_test, knn_unilabel_h1n1_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_test, knn_unilabel_h1n1_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_test, knn_unilabel_h1n1_vacc_testpreds)))

In [None]:
# Random Forest--Test data evaluation Metrics
#check if these metrics are changing compared to previous results
seas_rand_forst_unilabel_h1n1_vacc_test_acc = accuracy_score(y_seas_vacc_test, rand_forst_unilabel_h1n1_vacc_testpreds)
seas_rand_forst_unilabel_h1n1_vacc_test_recall = recall_score(y_seas_vacc_test, rand_forst_unilabel_h1n1_vacc_testpreds)
seas_rand_forst_unilabel_h1n1_vacc_test_precision = precision_score(y_seas_vacc_test, rand_forst_unilabel_h1n1_vacc_testpreds)
seas_rand_forst_unilabel_h1n1_vacc_test_f1 = f1_score(y_seas_vacc_test, rand_forst_unilabel_h1n1_vacc_testpreds)
seas_rand_forst_unilabel_h1n1_vacc_test_roc = roc_auc_score(y_seas_vacc_test, rand_forst_unilabel_h1n1_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_test, rand_forst_unilabel_h1n1_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_test, rand_forst_unilabel_h1n1_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_test, rand_forst_unilabel_h1n1_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_test, rand_forst_unilabel_h1n1_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_test, rand_forst_unilabel_h1n1_vacc_testpreds)))

In [None]:
# SVM--Test data evaluation Metrics
seas_svm_unilabel_h1n1_vacc_test_acc = accuracy_score(y_seas_vacc_test, svm_unilabel_h1n1_vacc_testpreds)
seas_svm_unilabel_h1n1_vacc_test_recall = recall_score(y_seas_vacc_test, svm_unilabel_h1n1_vacc_testpreds)
seas_svm_unilabel_h1n1_vacc_test_precision = precision_score(y_seas_vacc_test, svm_unilabel_h1n1_vacc_testpreds)
seas_svm_unilabel_h1n1_vacc_test_f1 = f1_score(y_seas_vacc_test, svm_unilabel_h1n1_vacc_testpreds)
seas_svm_unilabel_h1n1_vacc_test_roc = roc_auc_score(y_seas_vacc_test, svm_unilabel_h1n1_vacc_testpreds)

#print("Accuracy: {:.2f}".format(accuracy_score(y_seas_vacc_test, svm_unilabel_h1n1_vacc_testpreds)))
#print("Recall: {:.2f}".format(recall_score(y_seas_vacc_test, svm_unilabel_h1n1_vacc_testpreds)))
#print("Precision: {:.2f}".format(precision_score(y_seas_vacc_test, svm_unilabel_h1n1_vacc_testpreds)))
#print("F1: {:.2f}".format(f1_score(y_seas_vacc_test, svm_unilabel_h1n1_vacc_testpreds)))
#print("ROC: {:.2f}".format(roc_auc_score(y_seas_vacc_test, svm_unilabel_h1n1_vacc_testpreds)))

## Tracking the model with MLFlow

### H1N1 vaccine output

#### Logistic regression

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_seasonal) #this needs to be adjusted for each experiment
name = 'logreg_unilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "H1N1") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seas_logreg_unilabel_h1n1_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", seas_logreg_unilabel_h1n1_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", seas_logreg_unilabel_h1n1_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", seas_logreg_unilabel_h1n1_vacc_test_acc)
mlflow.log_metric("train -" + "recall", seas_logreg_unilabel_h1n1_vacc_train_recall)
mlflow.log_metric("test -" + "recall", seas_logreg_unilabel_h1n1_vacc_test_recall)
mlflow.log_metric("train -" + "precision", seas_logreg_unilabel_h1n1_vacc_train_precision)
mlflow.log_metric("test -" + "precision", seas_logreg_unilabel_h1n1_vacc_test_precision)
mlflow.log_metric("train -" + "f1", seas_logreg_unilabel_h1n1_vacc_train_f1)
mlflow.log_metric("test -" + "f1", seas_logreg_unilabel_h1n1_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

#### KNN

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_seasonal)
name = 'knn_unilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "H1N1") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seas_knn_unilabel_h1n1_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", seas_knn_unilabel_h1n1_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", seas_knn_unilabel_h1n1_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", seas_knn_unilabel_h1n1_vacc_test_acc)
mlflow.log_metric("train -" + "recall", seas_knn_unilabel_h1n1_vacc_train_recall)
mlflow.log_metric("test -" + "recall", seas_knn_unilabel_h1n1_vacc_test_recall)
mlflow.log_metric("train -" + "precision", seas_knn_unilabel_h1n1_vacc_train_precision)
mlflow.log_metric("test -" + "precision", seas_knn_unilabel_h1n1_vacc_test_precision)
mlflow.log_metric("train -" + "f1", seas_knn_unilabel_h1n1_vacc_train_f1)
mlflow.log_metric("test -" + "f1", seas_knn_unilabel_h1n1_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

#### Random forest

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_seasonal)
name = 'rand_forst_unilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "H1N1") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seas_rand_forst_unilabel_h1n1_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", seas_rand_forst_unilabel_h1n1_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", seas_rand_forst_unilabel_h1n1_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", seas_rand_forst_unilabel_h1n1_vacc_test_acc)
mlflow.log_metric("train -" + "recall", seas_rand_forst_unilabel_h1n1_vacc_train_recall)
mlflow.log_metric("test -" + "recall", seas_rand_forst_unilabel_h1n1_vacc_test_recall)
mlflow.log_metric("train -" + "precision", seas_rand_forst_unilabel_h1n1_vacc_train_precision)
mlflow.log_metric("test -" + "precision", seas_rand_forst_unilabel_h1n1_vacc_test_precision)
mlflow.log_metric("train -" + "f1", seas_rand_forst_unilabel_h1n1_vacc_train_f1)
mlflow.log_metric("test -" + "f1", seas_rand_forst_unilabel_h1n1_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)

#### SVM

In [None]:
# setting the MLFlow connection and experiment
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME_seasonal)
name = 'svm_unilabel_seasonal' #specify the run name here; name it with model used
mlflow.start_run(run_name = name) 
run = mlflow.active_run()

In [None]:
print("Active run_id: {}".format(run.info.run_id))

In [None]:
#parameters are to keep track of everything for re-running the experiment (e.g. what features are being engineered, hyperparameters of model, etc)
#to be adjusted as needed
params = {
    "Data cleaning": "Full dataset",
    "Data balancing": "None",
    "Hyperparameters": "None"
  }

In [None]:
mlflow.log_params(params)
#tags = data used
mlflow.set_tag("Vaccines in features", "H1N1") #set tags for more details of what we've done
mlflow.log_metric("train -" + "ROC", seas_svm_unilabel_h1n1_vacc_train_roc)
mlflow.log_metric("test -" + "ROC", seas_svm_unilabel_h1n1_vacc_test_roc)
mlflow.log_metric("train -" + "accuracy", seas_svm_unilabel_h1n1_vacc_train_acc)
mlflow.log_metric("test -" + "accuracy", seas_svm_unilabel_h1n1_vacc_test_acc)
mlflow.log_metric("train -" + "recall", seas_svm_unilabel_h1n1_vacc_train_recall)
mlflow.log_metric("test -" + "recall", seas_svm_unilabel_h1n1_vacc_test_recall)
mlflow.log_metric("train -" + "precision", seas_svm_unilabel_h1n1_vacc_train_precision)
mlflow.log_metric("test -" + "precision", seas_svm_unilabel_h1n1_vacc_test_precision)
mlflow.log_metric("train -" + "f1", seas_svm_unilabel_h1n1_vacc_train_f1)
mlflow.log_metric("test -" + "f1", seas_svm_unilabel_h1n1_vacc_test_f1)
#mlflow.log_artifact("../models")
#mlflow.sklearn.log_model(reg, "model")
mlflow.end_run()

In [None]:
#mlflow.get_run(run_id=run.info.run_id)