In [1]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Loading data

In [2]:
# Loading datasets

# data_train = pd.read_csv('data/processed/train_meaningfull.csv')
# data_test = pd.read_csv('data/processed/test_meaningfull.csv')

In [3]:
# Loading raw datasets

data_train = pd.read_csv('data/raw/train.csv')
data_test = pd.read_csv('data/raw/test.csv')

In [4]:
X_train = data_train.drop('Activity', axis='columns')
y_train = data_train['Activity']

In [5]:
X_test = data_test.drop('Activity', axis='columns')
y_test = data_test['Activity']

### Testing different models

In [6]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

import optuna

  from .autonotebook import tqdm as notebook_tqdm


#### Trying Linear Discriminant Analysis (LDA)

##### Hyperparameter tuning LDA

In [7]:
def objective(trial):
  model = LDA(solver='lsqr', shrinkage=trial.suggest_float("shrinkage", 0, 1))

  model.fit(X_train, y_train)

  # Evaluate the model accuracy on the validation set.
  score = model.score(X_test, y_test)
  return score

In [8]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=400, timeout=3600)

[I 2023-09-24 18:53:29,795] A new study created in memory with name: no-name-023237d4-195f-43fd-9dac-05fc7e47783a
[I 2023-09-24 18:53:30,001] Trial 0 finished with value: 0.9250084832032576 and parameters: {'shrinkage': 0.5152436841654604}. Best is trial 0 with value: 0.9250084832032576.
[I 2023-09-24 18:53:30,190] Trial 1 finished with value: 0.9151679674244995 and parameters: {'shrinkage': 0.6342411449001779}. Best is trial 0 with value: 0.9250084832032576.
[I 2023-09-24 18:53:30,390] Trial 2 finished with value: 0.9036308109942314 and parameters: {'shrinkage': 0.7412242597719868}. Best is trial 0 with value: 0.9250084832032576.
[I 2023-09-24 18:53:30,570] Trial 3 finished with value: 0.8941296233457754 and parameters: {'shrinkage': 0.797194726600662}. Best is trial 0 with value: 0.9250084832032576.
[I 2023-09-24 18:53:30,756] Trial 4 finished with value: 0.9569053274516457 and parameters: {'shrinkage': 0.06480806420720264}. Best is trial 4 with value: 0.9569053274516457.
[I 2023-09-

In [9]:
study.best_params

{'shrinkage': 0.012632869596956803}

We found the aproximately best value of "shrinkage" parameter which is 0.013080256284927826

In [10]:
shrinkage = study.best_params["shrinkage"]

lda = LDA(solver='lsqr', shrinkage=shrinkage)
lda.fit(X_train, y_train)
score = lda.score(X_test, y_test)
score

0.9660671869697998

In [11]:
lda_auto_shrinkage = LDA(solver='lsqr', shrinkage='auto')
lda_auto_shrinkage.fit(X_train, y_train)
score_auto = lda_auto_shrinkage.score(X_test, y_test)
score_auto

0.9640312181879878

As we can see, optuna-tunned parameter gave better result than 'auto' value (96.6% > 96.4%)