## Praparation

### Importing libraries

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import LabelEncoder

from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

from MajorityVoteClassifier import MajorityVoteClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.metrics import accuracy_score

### Loading data

In [3]:
# Loading raw datasets

data_train = pd.read_csv('data/raw/train.csv')
data_test = pd.read_csv('data/raw/test.csv')

In [4]:
le = LabelEncoder()
X_train = data_train.drop('Activity', axis='columns')
y_train = data_train['Activity']
y_train_encoded = le.fit_transform(y_train)

X_test = data_test.drop('Activity', axis='columns')
y_test = data_test['Activity']
y_test_encoded = le.fit_transform(y_test)

# MajorityVoteClassifier

## Creating Majority Vote Classifier

### Hyperparameter tunning with optuna

In [None]:
def objective(trial):
  clf1 = GaussianNB(var_smoothing=trial.suggest_float("var_smoothing", 1e-10, 1, log=True),)

  clf2 = DecisionTreeClassifier(
    max_depth=trial.suggest_int("max_depth", 1, 5),
    criterion='entropy',
    random_state=0
  )

  clf3 = KNeighborsClassifier(
    n_neighbors=trial.suggest_int("n_neighbors", 1, 8),
    p=2,
    metric='minkowski'
  )

  pipe1 = Pipeline([['sc', StandardScaler()], ['clf', clf1]])
  pipe3 = Pipeline([['sc', StandardScaler()], ['clf', clf3]])

  model = MajorityVoteClassifier(classifiers=[pipe1, clf2, pipe3])
  
  model.fit(X_train, y_train)

  # Evaluate the model accuracy on the validation set.
  score = model.score(X_test, y_test)
  return score

In [None]:
import optuna

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30, timeout=300)

In [None]:
study.best_params

### Setting up Classifiers

In [None]:
clf1 = GaussianNB(var_smoothing=study.best_params['var_smoothing'])

clf2 = DecisionTreeClassifier(
  max_depth=study.best_params['max_depth'],
  criterion='entropy',
  random_state=0
)

clf3 = KNeighborsClassifier(
  n_neighbors=study.best_params['n_neighbors'],
  p=2,
  metric='minkowski'
)

pipe1 = Pipeline([['sc', StandardScaler()], ['clf', clf1]])
pipe3 = Pipeline([['sc', StandardScaler()], ['clf', clf3]])

clf_labels = ['GaussianNB', 'Decision tree', 'KNN']

In [None]:
model = MajorityVoteClassifier(classifiers=[pipe1, clf2, pipe3])

model.fit(X_train, y_train)

## Evaluating performance of Majority Vote Classifier

In [None]:
score = model.score(X_test, y_test)

print(score)

# AdaBoost Classifier

### Building classifier

In [23]:
tree = DecisionTreeClassifier(
  criterion='entropy',
  random_state=1,
  max_depth=1
)

ada = AdaBoostClassifier(
  estimator=tree,
  n_estimators=800,
  learning_rate=0.1,
  random_state=1
)

### Evaluating classifier

In [24]:
ada = ada.fit(X_train, y_train)

In [25]:

y_train_pred = ada.predict(X_train)
y_test_pred = ada.predict(X_test)

ada_train = accuracy_score(y_train, y_train_pred)
ada_test = accuracy_score(y_test, y_test_pred)

print('AdaBoost train/test accuracies %.3f/%.3f' % (ada_train, ada_test))

AdaBoost train/test accuracies 0.615/0.608
