In [1]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score  
import optuna
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from joblib import load, dump


X_train_val = load('../../BEST SET/X_train_filtered.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_filtered.joblib')
y_test = load('../../BEST SET/y_Test.joblib') 

def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]  # Assuming the 3rd dimension is the number of channels
    return data.reshape(num_samples, num_timesteps * num_channels)

X_train_val = reshape_data(X_train_val)  
X_test = reshape_data(X_test)
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)  

def objective(trial):
    n_neighbors = trial.suggest_int('n_neighbors', 1, 50)
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])
    metric = trial.suggest_categorical('metric', ['euclidean', 'manhattan', 'minkowski'])

    knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, metric=metric, n_jobs=-1)

    cv_scores = cross_val_score(knn, X_train_val, y_train_val, cv=skf, scoring='accuracy')
    return cv_scores.mean()

# Hyperparameter tuning with Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200, n_jobs = -1)  

best_params = study.best_params
print("Best hyperparameters:", best_params)

# Train final model with best hyperparameters
best_knn = KNeighborsClassifier(**best_params, n_jobs=-1)
best_knn.fit(X_train_val, y_train_val)

# Save model if needed
dump(best_knn, './KNN-Filtered-2.joblib') 

# Evaluation on test set
predictions_test = best_knn.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

# Evaluation on test set
predictions_test = best_knn.predict(X_train_val)
accuracy_test = accuracy_score(y_train_val, predictions_test)
print("Accuracy on Training Set:", accuracy_test)
print(classification_report(y_train_val, predictions_test))
print(confusion_matrix(y_train_val, predictions_test))

[I 2024-05-16 07:49:24,862] A new study created in memory with name: no-name-439762e8-ae3e-4dbc-9e14-c1ac4f0e9215
[I 2024-05-16 07:49:43,592] Trial 25 finished with value: 0.9346205254966584 and parameters: {'n_neighbors': 6, 'weights': 'distance', 'metric': 'euclidean'}. Best is trial 25 with value: 0.9346205254966584.
[I 2024-05-16 07:49:44,059] Trial 14 finished with value: 0.9001226769202599 and parameters: {'n_neighbors': 40, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 25 with value: 0.9346205254966584.
[I 2024-05-16 07:49:44,452] Trial 18 finished with value: 0.9001226769202599 and parameters: {'n_neighbors': 40, 'weights': 'uniform', 'metric': 'minkowski'}. Best is trial 25 with value: 0.9346205254966584.
[I 2024-05-16 07:49:44,757] Trial 17 finished with value: 0.9097976746315115 and parameters: {'n_neighbors': 44, 'weights': 'distance', 'metric': 'minkowski'}. Best is trial 25 with value: 0.9346205254966584.
[I 2024-05-16 07:49:44,855] Trial 5 finished with val

Best hyperparameters: {'n_neighbors': 3, 'weights': 'distance', 'metric': 'manhattan'}
Accuracy on Test Set: 0.9847826086956522
              precision    recall  f1-score   support

           0       0.98      0.99      0.98       230
           1       0.99      0.98      0.98       230

    accuracy                           0.98       460
   macro avg       0.98      0.98      0.98       460
weighted avg       0.98      0.98      0.98       460

[[227   3]
 [  4 226]]
Accuracy on Training Set: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       826
           1       1.00      1.00      1.00       826

    accuracy                           1.00      1652
   macro avg       1.00      1.00      1.00      1652
weighted avg       1.00      1.00      1.00      1652

[[826   0]
 [  0 826]]


In [5]:
predictions_test = best_knn.predict(X_test)

f1_test = f1_score(y_test, predictions_test)
print("F1 on Test Set:", f1_test)


F1 on Test Set: 0.9847494553376905


In [4]:
# Evaluation on test set
predictions_test = best_knn.predict(X_train_val)
accuracy_test = accuracy_score(y_train_val, predictions_test)
print("Accuracy on Training Set:", accuracy_test)
print(classification_report(y_train_val, predictions_test))
print(confusion_matrix(y_train_val, predictions_test))

Accuracy on Training Set: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       826
           1       1.00      1.00      1.00       826

    accuracy                           1.00      1652
   macro avg       1.00      1.00      1.00      1652
weighted avg       1.00      1.00      1.00      1652

[[826   0]
 [  0 826]]


In [3]:
predictions_test = best_knn.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))

Accuracy on Test Set: 0.7369565217391304
              precision    recall  f1-score   support

           0       0.66      0.97      0.79       230
           1       0.95      0.50      0.66       230

    accuracy                           0.74       460
   macro avg       0.81      0.74      0.72       460
weighted avg       0.81      0.74      0.72       460

[[224   6]
 [115 115]]


# Quarters

In [1]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score  
import optuna
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from joblib import load, dump

X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_fft.joblib')
y_test = load('../../BEST SET/y_Test.joblib') 

quarter = X_train_val[::4]
quarter_labels = y_train_val[::4]
second_qtr = X_train_val[1::4]
second_qtr_labels = y_train_val[1::4]
third_qtr = X_train_val[2::4]
third_qtr_labels = y_train_val[2::4]
fourth_qtr = X_train_val[3::4]
fourth_qtr_labels = y_train_val[3::4]

X_test = np.append(X_test,second_qtr, axis=0)
X_test = np.append(X_test, third_qtr, axis=0)
X_test = np.append(X_test, fourth_qtr, axis=0)
y_test = np.append(y_test,second_qtr_labels, axis=0)
y_test = np.append(y_test, third_qtr_labels, axis=0)
y_test = np.append(y_test, fourth_qtr_labels, axis=0)

X_train_val = quarter
y_train_val = quarter_labels

def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]  # Assuming the 3rd dimension is the number of channels
    return data.reshape(num_samples, num_timesteps * num_channels)

X_train_val = reshape_data(X_train_val)  
X_test = reshape_data(X_test)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)  

def objective(trial):
    n_neighbors = trial.suggest_int('n_neighbors', 1, 50)
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])
    metric = trial.suggest_categorical('metric', ['euclidean', 'manhattan', 'minkowski'])

    knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, metric=metric, n_jobs=-1)

    cv_scores = cross_val_score(knn, X_train_val, y_train_val, cv=skf, scoring='accuracy')
    return cv_scores.mean()

# Hyperparameter tuning with Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200, n_jobs = -1)  

best_params = study.best_params
print("Best hyperparameters:", best_params)

# Train final model with best hyperparameters
best_knn = KNeighborsClassifier(**best_params, n_jobs=-1)
best_knn.fit(X_train_val, y_train_val)

# Save model if needed
dump(best_knn, './KNN-Quarter.joblib') 

# Evaluation on test set
predictions_test = best_knn.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))
f1_test = f1_score(y_test, predictions_test)
print("F1 on Test Set:", f1_test)

# Evaluation on test set
predictions_test = best_knn.predict(X_train_val)
accuracy_test = accuracy_score(y_train_val, predictions_test)
print("Accuracy on Training Set:", accuracy_test)
print(classification_report(y_train_val, predictions_test))
print(confusion_matrix(y_train_val, predictions_test))

[I 2024-05-24 06:20:15,170] A new study created in memory with name: no-name-47a6270d-6a5f-4de5-8d9c-0b49413ae1cd
[I 2024-05-24 06:20:17,333] Trial 9 finished with value: 0.7095503967087863 and parameters: {'n_neighbors': 47, 'weights': 'uniform', 'metric': 'minkowski'}. Best is trial 9 with value: 0.7095503967087863.
[I 2024-05-24 06:20:17,348] Trial 12 finished with value: 0.816044666470761 and parameters: {'n_neighbors': 22, 'weights': 'distance', 'metric': 'minkowski'}. Best is trial 12 with value: 0.816044666470761.
[I 2024-05-24 06:20:17,393] Trial 21 finished with value: 0.8257126065236557 and parameters: {'n_neighbors': 18, 'weights': 'distance', 'metric': 'minkowski'}. Best is trial 21 with value: 0.8257126065236557.
[I 2024-05-24 06:20:17,449] Trial 8 finished with value: 0.7459300617102558 and parameters: {'n_neighbors': 45, 'weights': 'distance', 'metric': 'euclidean'}. Best is trial 21 with value: 0.8257126065236557.
[I 2024-05-24 06:20:17,466] Trial 19 finished with value

Best hyperparameters: {'n_neighbors': 1, 'weights': 'distance', 'metric': 'manhattan'}
Accuracy on Test Set: 0.9593878752207181
              precision    recall  f1-score   support

           0       0.94      0.99      0.96       850
           1       0.99      0.93      0.96       849

    accuracy                           0.96      1699
   macro avg       0.96      0.96      0.96      1699
weighted avg       0.96      0.96      0.96      1699

[[839  11]
 [ 58 791]]
F1 on Test Set: 0.9582071471835252
Accuracy on Training Set: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       206
           1       1.00      1.00      1.00       207

    accuracy                           1.00       413
   macro avg       1.00      1.00      1.00       413
weighted avg       1.00      1.00      1.00       413

[[206   0]
 [  0 207]]


In [2]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score  
import optuna
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from joblib import load, dump

X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_fft.joblib')
y_test = load('../../BEST SET/y_Test.joblib') 

quarter = X_train_val[::4]
quarter_labels = y_train_val[::4]
second_qtr = X_train_val[1::4]
second_qtr_labels = y_train_val[1::4]
third_qtr = X_train_val[2::4]
third_qtr_labels = y_train_val[2::4]
fourth_qtr = X_train_val[3::4]
fourth_qtr_labels = y_train_val[3::4]

X_train_val = np.append(quarter, second_qtr, axis=0)
X_train_val = np.append(X_train_val, third_qtr, axis=0)
y_train_val = np.append(quarter_labels, second_qtr_labels, axis=0)
y_train_val = np.append(y_train_val, third_qtr_labels, axis=0)
X_test = np.append(X_test, fourth_qtr, axis=0)
y_test = np.append(y_test, fourth_qtr_labels, axis=0)

def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]  # Assuming the 3rd dimension is the number of channels
    return data.reshape(num_samples, num_timesteps * num_channels)

X_train_val = reshape_data(X_train_val)  
X_test = reshape_data(X_test)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)  

def objective(trial):
    n_neighbors = trial.suggest_int('n_neighbors', 1, 50)
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])
    metric = trial.suggest_categorical('metric', ['euclidean', 'manhattan', 'minkowski'])

    knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, metric=metric, n_jobs=-1)

    cv_scores = cross_val_score(knn, X_train_val, y_train_val, cv=skf, scoring='accuracy')
    return cv_scores.mean()

# Hyperparameter tuning with Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200, n_jobs = -1)  

best_params = study.best_params
print("Best hyperparameters:", best_params)

# Train final model with best hyperparameters
best_knn = KNeighborsClassifier(**best_params, n_jobs=-1)
best_knn.fit(X_train_val, y_train_val)

# Save model if needed
dump(best_knn, './KNN-3Quarter.joblib') 

# Evaluation on test set
predictions_test = best_knn.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))
f1_test = f1_score(y_test, predictions_test)
print("F1 on Test Set:", f1_test)

# Evaluation on test set
predictions_test = best_knn.predict(X_train_val)
accuracy_test = accuracy_score(y_train_val, predictions_test)
print("Accuracy on Training Set:", accuracy_test)
print(classification_report(y_train_val, predictions_test))
print(confusion_matrix(y_train_val, predictions_test))

[I 2024-05-24 06:21:39,537] A new study created in memory with name: no-name-160b1021-e97b-489e-bdad-00f6381a60c8
[I 2024-05-24 06:21:47,275] Trial 13 finished with value: 0.8078914718558181 and parameters: {'n_neighbors': 49, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 13 with value: 0.8078914718558181.
[I 2024-05-24 06:21:47,590] Trial 15 finished with value: 0.7982075225284053 and parameters: {'n_neighbors': 50, 'weights': 'uniform', 'metric': 'minkowski'}. Best is trial 13 with value: 0.8078914718558181.
[I 2024-05-24 06:21:47,791] Trial 14 finished with value: 0.8127432414783857 and parameters: {'n_neighbors': 39, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 14 with value: 0.8127432414783857.
[I 2024-05-24 06:21:47,913] Trial 17 finished with value: 0.8329241217186889 and parameters: {'n_neighbors': 29, 'weights': 'uniform', 'metric': 'minkowski'}. Best is trial 17 with value: 0.8329241217186889.
[I 2024-05-24 06:21:47,934] Trial 5 finished with valu

Best hyperparameters: {'n_neighbors': 4, 'weights': 'distance', 'metric': 'manhattan'}
Accuracy on Test Set: 0.981672394043528
              precision    recall  f1-score   support

           0       0.97      1.00      0.98       437
           1       1.00      0.97      0.98       436

    accuracy                           0.98       873
   macro avg       0.98      0.98      0.98       873
weighted avg       0.98      0.98      0.98       873

[[435   2]
 [ 14 422]]
F1 on Test Set: 0.9813953488372092
Accuracy on Training Set: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       619
           1       1.00      1.00      1.00       620

    accuracy                           1.00      1239
   macro avg       1.00      1.00      1.00      1239
weighted avg       1.00      1.00      1.00      1239

[[619   0]
 [  0 620]]


In [3]:
X_train_val = load('../../BEST SET/X_train_fft.joblib')
y_train_val = load('../../BEST SET/y_Train.joblib')
X_test = load('../../BEST SET/X_test_fft.joblib')
y_test = load('../../BEST SET/y_Test.joblib') 
half = X_train_val[::2]
half_labels = y_train_val[::2]
second_half = X_train_val[1::2]
second_half_labels = y_train_val[1::2]
X_test = np.append(X_test, second_half, axis=0)
y_test = np.append(y_test, second_half_labels, axis=0)
X_train_val = half
y_train_val = half_labels

def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]  # Assuming the 3rd dimension is the number of channels
    return data.reshape(num_samples, num_timesteps * num_channels)

X_train_val = reshape_data(X_train_val)  
X_test = reshape_data(X_test)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)  

def objective(trial):
    n_neighbors = trial.suggest_int('n_neighbors', 1, 50)
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])
    metric = trial.suggest_categorical('metric', ['euclidean', 'manhattan', 'minkowski'])

    knn = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, metric=metric, n_jobs=-1)

    cv_scores = cross_val_score(knn, X_train_val, y_train_val, cv=skf, scoring='accuracy')
    return cv_scores.mean()

# Hyperparameter tuning with Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200, n_jobs = -1)  

best_params = study.best_params
print("Best hyperparameters:", best_params)

# Train final model with best hyperparameters
best_knn = KNeighborsClassifier(**best_params, n_jobs=-1)
best_knn.fit(X_train_val, y_train_val)

# Save model if needed
dump(best_knn, './KNN-3Quarter.joblib') 

# Evaluation on test set
predictions_test = best_knn.predict(X_test)
accuracy_test = accuracy_score(y_test, predictions_test)
print("Accuracy on Test Set:", accuracy_test)
print(classification_report(y_test, predictions_test))
print(confusion_matrix(y_test, predictions_test))
f1_test = f1_score(y_test, predictions_test)
print("F1 on Test Set:", f1_test)

# Evaluation on test set
predictions_test = best_knn.predict(X_train_val)
accuracy_test = accuracy_score(y_train_val, predictions_test)
print("Accuracy on Training Set:", accuracy_test)
print(classification_report(y_train_val, predictions_test))
print(confusion_matrix(y_train_val, predictions_test))

[I 2024-05-24 06:32:55,868] A new study created in memory with name: no-name-b98e9ad0-936d-4307-b35b-63004a375c98
[I 2024-05-24 06:33:00,204] Trial 17 finished with value: 0.8753705732018986 and parameters: {'n_neighbors': 4, 'weights': 'uniform', 'metric': 'minkowski'}. Best is trial 17 with value: 0.8753705732018986.
[I 2024-05-24 06:33:00,475] Trial 13 finished with value: 0.8015115005476451 and parameters: {'n_neighbors': 39, 'weights': 'distance', 'metric': 'minkowski'}. Best is trial 17 with value: 0.8753705732018986.
[I 2024-05-24 06:33:00,641] Trial 21 finished with value: 0.8862504563709382 and parameters: {'n_neighbors': 7, 'weights': 'uniform', 'metric': 'euclidean'}. Best is trial 21 with value: 0.8862504563709382.
[I 2024-05-24 06:33:00,691] Trial 26 finished with value: 0.8099963490324937 and parameters: {'n_neighbors': 29, 'weights': 'uniform', 'metric': 'minkowski'}. Best is trial 21 with value: 0.8862504563709382.
[I 2024-05-24 06:33:00,800] Trial 16 finished with valu

Best hyperparameters: {'n_neighbors': 2, 'weights': 'distance', 'metric': 'manhattan'}
Accuracy on Test Set: 0.968895800933126
              precision    recall  f1-score   support

           0       0.95      0.99      0.97       643
           1       0.99      0.95      0.97       643

    accuracy                           0.97      1286
   macro avg       0.97      0.97      0.97      1286
weighted avg       0.97      0.97      0.97      1286

[[637   6]
 [ 34 609]]
F1 on Test Set: 0.9682034976152624
Accuracy on Training Set: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       413
           1       1.00      1.00      1.00       413

    accuracy                           1.00       826
   macro avg       1.00      1.00      1.00       826
weighted avg       1.00      1.00      1.00       826

[[413   0]
 [  0 413]]
