In [1]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from joblib import load
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [2]:
combined_data = load('../DataDumps/data_fft.joblib')
combined_labels = load('../DataDumps/labels.joblib')

# Tuning

In [3]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from joblib import load
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load data
combined_data = load('../DataDumps/data_fft.joblib')
combined_labels = load('../DataDumps/labels.joblib')

# Define a function to reshape the data
def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]
    return data.reshape(num_samples, num_timesteps * num_channels) 

combined_data1 = reshape_data(combined_data)  

# Split data into training/validation and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(combined_data1, combined_labels, test_size=0.2, stratify=combined_labels, random_state=0)

# Define K-Fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=0)

# Define parameter grid for hyperparameter tuning
param_grid = {
    'n_neighbors': list(range(1, 10)),  # Try odd numbers of neighbors
    'weights': ['uniform', 'distance'],   # Weighting schemes
} 

# Initialize KNN classifier
knn = KNeighborsClassifier()

# Initialize variables to store best hyperparameters and accuracies
best_hyperparams = []
best_accuracies = []

# Perform hyperparameter tuning and K-Fold cross-validation
for train_index, val_index in kf.split(X_train_val):
    X_train, X_val = X_train_val[train_index], X_train_val[val_index]
    y_train, y_val = y_train_val[train_index], y_train_val[val_index]
    
    # Initialize GridSearchCV
    grid_search = GridSearchCV(knn, param_grid, cv=kf)
    
    # Perform GridSearchCV
    grid_search.fit(X_train, y_train)
    
    # Get the best KNN model from GridSearchCV
    best_knn = grid_search.best_estimator_
    
    # Store best hyperparameters
    best_hyperparams.append(grid_search.best_params_)
    
    # Make predictions on validation set
    predictions_val = best_knn.predict(X_val)
    
    # Calculate accuracy on validation set
    accuracy_val = accuracy_score(y_val, predictions_val)
    
    # Print confusion matrix and classification report for each fold
    cm = confusion_matrix(y_val, predictions_val)
    print("Confusion Matrix for validation set:\n", cm)
    print(classification_report(y_val, predictions_val))
    
    # Make predictions on test set
    predictions_test = best_knn.predict(X_test)
    
    # Calculate accuracy on test set
    accuracy_test = accuracy_score(y_test, predictions_test)
    best_accuracies.append(accuracy_test)
    
    # Print confusion matrix and classification report for test set
    cm_test = confusion_matrix(y_test, predictions_test)
    print("Confusion Matrix for test set:\n", cm_test)
    print(classification_report(y_test, predictions_test))

# Print average accuracy across all folds for test set
print("Average Accuracy on Test Set:", np.mean(best_accuracies))

# Print the best hyperparameters found during each fold
print("Best hyperparameters for each fold:")
for i, params in enumerate(best_hyperparams):
    print("Fold {}: {}".format(i+1, params))


Confusion Matrix for validation set:
 [[68  2  3]
 [ 8 26  1]
 [27 12  7]]
              precision    recall  f1-score   support

           0       0.66      0.93      0.77        73
           1       0.65      0.74      0.69        35
           2       0.64      0.15      0.25        46

    accuracy                           0.66       154
   macro avg       0.65      0.61      0.57       154
weighted avg       0.65      0.66      0.60       154

Confusion Matrix for test set:
 [[91  0  5]
 [12 34  3]
 [33 10  4]]
              precision    recall  f1-score   support

           0       0.67      0.95      0.78        96
           1       0.77      0.69      0.73        49
           2       0.33      0.09      0.14        47

    accuracy                           0.67       192
   macro avg       0.59      0.58      0.55       192
weighted avg       0.61      0.67      0.61       192

Confusion Matrix for validation set:
 [[72  1  8]
 [ 8 26  3]
 [15  8 13]]
              preci

In [4]:
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from joblib import load
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load data
combined_data = load('../DataDumps/data_fft.joblib')
combined_labels = load('../DataDumps/labels.joblib')

combined_labels[combined_labels == 2] = 1


# Define a function to reshape the data
def reshape_data(data):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_channels = data.shape[2]
    return data.reshape(num_samples, num_timesteps * num_channels) 

combined_data1 = reshape_data(combined_data)  

# Split data into training/validation and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(combined_data1, combined_labels, test_size=0.2, stratify=combined_labels, random_state=0)

# Define K-Fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=0)

# Define parameter grid for hyperparameter tuning
param_grid = {
    'n_neighbors': list(range(1, 10)),  # Try odd numbers of neighbors
    'weights': ['uniform', 'distance'],   # Weighting schemes
} 

# Initialize KNN classifier
knn = KNeighborsClassifier()

# Initialize variables to store best hyperparameters and accuracies
best_hyperparams = []
best_accuracies = []

# Perform hyperparameter tuning and K-Fold cross-validation
for train_index, val_index in kf.split(X_train_val):
    X_train, X_val = X_train_val[train_index], X_train_val[val_index]
    y_train, y_val = y_train_val[train_index], y_train_val[val_index]
    
    # Initialize GridSearchCV
    grid_search = GridSearchCV(knn, param_grid, cv=kf)
    
    # Perform GridSearchCV
    grid_search.fit(X_train, y_train)
    
    # Get the best KNN model from GridSearchCV
    best_knn = grid_search.best_estimator_
    
    # Store best hyperparameters
    best_hyperparams.append(grid_search.best_params_)
    
    # Make predictions on validation set
    predictions_val = best_knn.predict(X_val)
    
    # Calculate accuracy on validation set
    accuracy_val = accuracy_score(y_val, predictions_val)
    
    # Print confusion matrix and classification report for each fold
    cm = confusion_matrix(y_val, predictions_val)
    print("Confusion Matrix for validation set:\n", cm)
    print(classification_report(y_val, predictions_val))
    
    # Make predictions on test set
    predictions_test = best_knn.predict(X_test)
    
    # Calculate accuracy on test set
    accuracy_test = accuracy_score(y_test, predictions_test)
    best_accuracies.append(accuracy_test)
    
    # Print confusion matrix and classification report for test set
    cm_test = confusion_matrix(y_test, predictions_test)
    print("Confusion Matrix for test set:\n", cm_test)
    print(classification_report(y_test, predictions_test))

# Print average accuracy across all folds for test set
print("Average Accuracy on Test Set:", np.mean(best_accuracies))

# Print the best hyperparameters found during each fold
print("Best hyperparameters for each fold:")
for i, params in enumerate(best_hyperparams):
    print("Fold {}: {}".format(i+1, params))


Confusion Matrix for validation set:
 [[64  9]
 [33 48]]
              precision    recall  f1-score   support

           0       0.66      0.88      0.75        73
           1       0.84      0.59      0.70        81

    accuracy                           0.73       154
   macro avg       0.75      0.73      0.72       154
weighted avg       0.76      0.73      0.72       154

Confusion Matrix for test set:
 [[85 11]
 [32 64]]
              precision    recall  f1-score   support

           0       0.73      0.89      0.80        96
           1       0.85      0.67      0.75        96

    accuracy                           0.78       192
   macro avg       0.79      0.78      0.77       192
weighted avg       0.79      0.78      0.77       192

Confusion Matrix for validation set:
 [[70 11]
 [25 48]]
              precision    recall  f1-score   support

           0       0.74      0.86      0.80        81
           1       0.81      0.66      0.73        73

    accuracy     