## Praparation

### Importing libraries

In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import LabelEncoder

from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

from MajorityVoteClassifier import MajorityVoteClassifier

### Loading data

In [27]:
# Loading raw datasets

data_train = pd.read_csv('data/raw/train.csv')
data_test = pd.read_csv('data/raw/test.csv')

In [28]:
le = LabelEncoder()
X_train = data_train.drop('Activity', axis='columns')
y_train = data_train['Activity']
y_train_encoded = le.fit_transform(y_train)

X_test = data_test.drop('Activity', axis='columns')
y_test = data_test['Activity']
y_test_encoded = le.fit_transform(y_test)

## Creating Majority Vote Classifier

### Hyperparameter tunning with optuna

In [29]:
def objective(trial):
  clf1 = GaussianNB(var_smoothing=trial.suggest_float("var_smoothing", 1e-10, 1, log=True),)

  clf2 = DecisionTreeClassifier(
    max_depth=trial.suggest_int("max_depth", 1, 5),
    criterion='entropy',
    random_state=0
  )

  clf3 = KNeighborsClassifier(
    n_neighbors=trial.suggest_int("n_neighbors", 1, 8),
    p=2,
    metric='minkowski'
  )

  pipe1 = Pipeline([['sc', StandardScaler()], ['clf', clf1]])
  pipe3 = Pipeline([['sc', StandardScaler()], ['clf', clf3]])

  model = MajorityVoteClassifier(classifiers=[pipe1, clf2, pipe3])
  
  model.fit(X_train, y_train)

  # Evaluate the model accuracy on the validation set.
  score = model.score(X_test, y_test)
  return score

In [30]:
import optuna

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30, timeout=300)

[I 2023-10-10 19:15:41,055] A new study created in memory with name: no-name-52d5d98a-01e3-44bd-acc3-2fe1f9ab938a
[I 2023-10-10 19:15:44,463] Trial 0 finished with value: 0.9056667797760435 and parameters: {'var_smoothing': 2.9760963889495587e-05, 'max_depth': 5, 'n_neighbors': 3}. Best is trial 0 with value: 0.9056667797760435.
[I 2023-10-10 19:15:48,130] Trial 1 finished with value: 0.8965049202578894 and parameters: {'var_smoothing': 8.411617049677278e-05, 'max_depth': 5, 'n_neighbors': 1}. Best is trial 0 with value: 0.9056667797760435.
[I 2023-10-10 19:15:50,292] Trial 2 finished with value: 0.8642687478791992 and parameters: {'var_smoothing': 4.4593057099572537e-10, 'max_depth': 2, 'n_neighbors': 7}. Best is trial 0 with value: 0.9056667797760435.
[I 2023-10-10 19:15:52,432] Trial 3 finished with value: 0.8639294197488971 and parameters: {'var_smoothing': 2.2183041211795054e-08, 'max_depth': 2, 'n_neighbors': 6}. Best is trial 0 with value: 0.9056667797760435.
[I 2023-10-10 19:15

In [31]:
study.best_params

{'var_smoothing': 1.7610112039937324e-07, 'max_depth': 5, 'n_neighbors': 8}

### Setting up Classifiers

In [32]:
clf1 = GaussianNB(var_smoothing=study.best_params['var_smoothing'])

clf2 = DecisionTreeClassifier(
  max_depth=study.best_params['max_depth'],
  criterion='entropy',
  random_state=0
)

clf3 = KNeighborsClassifier(
  n_neighbors=study.best_params['n_neighbors'],
  p=2,
  metric='minkowski'
)

pipe1 = Pipeline([['sc', StandardScaler()], ['clf', clf1]])
pipe3 = Pipeline([['sc', StandardScaler()], ['clf', clf3]])

clf_labels = ['GaussianNB', 'Decision tree', 'KNN']

In [33]:
model = MajorityVoteClassifier(classifiers=[pipe1, clf2, pipe3])

model.fit(X_train, y_train)

## Evaluating performance of Majority Vote Classifier

In [34]:
score = model.score(X_test, y_test)

print(score)

0.9134713267729895
