### Importing libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import LabelEncoder

from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline


### Loading data

In [2]:
# Loading raw datasets

data_train = pd.read_csv('data/raw/train.csv')
data_test = pd.read_csv('data/raw/test.csv')

In [3]:
le = LabelEncoder()
X_train = data_train.drop('Activity', axis='columns')
y_train = data_train['Activity']
y_train_encoded = le.fit_transform(y_train)

X_test = data_test.drop('Activity', axis='columns')
y_test = data_test['Activity']
y_test_encoded = le.fit_transform(y_test)

### Setting up Classifiers

In [4]:
clf1 = GaussianNB()

clf2 = DecisionTreeClassifier(
  max_depth=4,
  criterion='entropy',
  random_state=0
)

clf3 = KNeighborsClassifier(
  n_neighbors=1,
  p=2,
  metric='minkowski'
)

pipe1 = Pipeline([['sc', StandardScaler()], ['clf', clf1]])
pipe3 = Pipeline([['sc', StandardScaler()], ['clf', clf3]])

clf_labels = ['GaussianNB', 'Decision tree', 'KNN']

### Comparing Classifiers

In [None]:
print('10-fold cross validation:\n')

for clf, label in zip([pipe1, clf2, pipe3], clf_labels):
  scores = cross_val_score(
    estimator=clf,
    X=X_train,
    y=y_train_encoded,
    cv=10,
    scoring='roc_auc_ovr'
  )
  
  print("ROC AUC: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

### Creating Majority Vote Classifier

In [6]:
from MajorityVoteClassifier import MajorityVoteClassifier

mv_clf = MajorityVoteClassifier(classifiers=[pipe1, clf2, pipe3])
clf_labels += ['Majority voting']
all_clf = [pipe1, clf2, pipe3, mv_clf]

scores = cross_val_score(
  estimator=mv_clf,
  X=X_train,
  y=y_train_encoded,
  cv=10,
  scoring='roc_auc_ovr'
)

# for clf, label in zip(all_clf, clf_labels):
#   scores = cross_val_score(
#     estimator=clf,
#     X=X_train,
#     y=y_train,
#     cv=10,
#     scoring='roc_auc_ovr'
#   )
  
#   print("ROC AUC: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

NameError: name 'clf_labels' is not defined