### Importing libraries

In [72]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline


### Loading data

In [73]:
# Loading raw datasets

data_train = pd.read_csv('data/raw/train.csv')
data_test = pd.read_csv('data/raw/test.csv')
y_train_encoded = pd.get_dummies(y_train)

In [74]:
X_train = data_train.drop('Activity', axis='columns')
y_train = data_train['Activity']

X_test = data_test.drop('Activity', axis='columns')
y_test = data_test['Activity']
y_test_encoded = pd.get_dummies(y_test)

### Setting up Classifiers

In [75]:

from sklearn.linear_model import LinearRegression


clf1 = LinearRegression()

clf2 = DecisionTreeClassifier(
  max_depth=4,
  criterion='entropy',
  random_state=0
)

clf3 = KNeighborsClassifier(
  n_neighbors=1,
  p=2,
  metric='minkowski'
)

pipe1 = Pipeline([['sc', StandardScaler()], ['clf', clf1]])
pipe3 = Pipeline([['sc', StandardScaler()], ['clf', clf3]])

### Comparing Classifiers

In [76]:
clf_labels = ['Logistic regression', 'Decision tree', 'KNN']

print('10-fold cross validation:\n')

for clf, label in zip([pipe1, clf2, pipe3], clf_labels):
  scores = cross_val_score(
    estimator=clf,
    X=X_train,
    y=y_train_encoded,
    cv=10,
    scoring='roc_auc',
    error_score='raise'
  )
  
  print("ROC AUC: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

10-fold cross validation:

ROC AUC: 0.99 (+/- 0.01) [Logistic regression]
ROC AUC: 0.96 (+/- 0.03) [Decision tree]
ROC AUC: 0.90 (+/- 0.02) [KNN]
