<a href="https://colab.research.google.com/github/Faisal-Manchester/ATMEGA328P-Bare-metal/blob/main/Multiclass_kNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import seaborn as sns
import matplotlib.pyplot as plt

# Load datasets
# For training
hp_vs_mp_train = pd.read_csv('hp_vs_mp_train.csv')
hp_vs_lp_train = pd.read_csv('hp_vs_lp_train.csv')
mp_vs_lp_train = pd.read_csv('mp_vs_lp_train.csv')

# For testing
hp_vs_mp_test = pd.read_csv('hp_vs_mp_test.csv')
hp_vs_lp_test = pd.read_csv('hp_vs_lp_test.csv')
mp_vs_lp_test = pd.read_csv('mp_vs_lp_test.csv')

# Add labels for the data
# Assign class labels: 0 for hp, 1 for mp, 2 for lp (you can adjust these as needed)
# For hp_vs_mp
hp_vs_mp_train['label'] = np.where(hp_vs_mp_train.iloc[:, -1] == 'hp', 0, 1)  # 0: hp, 1: mp
hp_vs_mp_test['label'] = np.where(hp_vs_mp_test.iloc[:, -1] == 'hp', 0, 1)    # Same for test set

# For hp_vs_lp
hp_vs_lp_train['label'] = np.where(hp_vs_lp_train.iloc[:, -1] == 'hp', 0, 2)  # 0: hp, 2: lp
hp_vs_lp_test['label'] = np.where(hp_vs_lp_test.iloc[:, -1] == 'hp', 0, 2)    # Same for test set

# For mp_vs_lp
mp_vs_lp_train['label'] = np.where(mp_vs_lp_train.iloc[:, -1] == 'mp', 1, 2)  # 1: mp, 2: lp
mp_vs_lp_test['label'] = np.where(mp_vs_lp_test.iloc[:, -1] == 'mp', 1, 2)    # Same for test set

# Combine all training data
data_train = pd.concat([hp_vs_mp_train, hp_vs_lp_train, mp_vs_lp_train], axis=0).drop(columns=hp_vs_mp_train.columns[-2])  # drop the original class column
data_test = pd.concat([hp_vs_mp_test, hp_vs_lp_test, mp_vs_lp_test], axis=0).drop(columns=hp_vs_mp_test.columns[-2])      # drop the original class column

# Split into features and labels
data_train_X, data_train_y = data_train.iloc[:, :-1].values, data_train['label'].values
data_test_X, data_test_y = data_test.iloc[:, :-1].values, data_test['label'].values

# Check shapes
print("Training Data shape:", data_train_X.shape, "Training Labels shape:", data_train_y.shape)
print("Testing Data shape:", data_test_X.shape, "Testing Labels shape:", data_test_y.shape)

# Now you can use this data to train a multiclass SVM


Training Data shape: (270, 16384) Training Labels shape: (270,)
Testing Data shape: (72, 16384) Testing Labels shape: (72,)


In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier  # Import KNeighborsClassifier
from sklearn.metrics import (accuracy_score, confusion_matrix,
                             f1_score, precision_score, recall_score,
                             ConfusionMatrixDisplay)

def calculate_classification(data_train_X, data_train_y, data_test_X, data_test_y):
    # Ensure data_train_y and data_test_y are 1D arrays
    data_train_y = np.ravel(data_train_y)
    data_test_y = np.ravel(data_test_y)

    # Split the data for training and hyperparameter tuning
    x_train, x_best_prm, y_train, y_best_prm = train_test_split(data_train_X, data_train_y, test_size=0.2, stratify=data_train_y)

    # Define parameter grid for hyperparameter tuning (n_neighbors for KNN)
    param_grid = {'n_neighbors': np.arange(1, 31)}

    # Use GridSearchCV for hyperparameter tuning with KNN
    grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=7)
    grid.fit(x_best_prm, y_best_prm)

    # Get the best hyperparameters from the grid search
    best_n_neighbors = grid.best_params_.get('n_neighbors')

    # Train the final KNN model with the best hyperparameters on the entire training set
    clf = KNeighborsClassifier(n_neighbors=best_n_neighbors)
    clf.fit(data_train_X, data_train_y)

    # Make predictions on the test set
    y_pred = clf.predict(data_test_X)

    # Calculate accuracy
    accuracy = accuracy_score(data_test_y, y_pred)

    # Calculate Precision, Recall, and F1-score (weighted for multiclass)
    precision = precision_score(data_test_y, y_pred, average='weighted', zero_division=0)
    recall = recall_score(data_test_y, y_pred, average='weighted')
    f1 = f1_score(data_test_y, y_pred, average='weighted')

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")

    return accuracy, precision, recall, f1, y_pred, data_test_y

def plot_confusion_matrix(data_test_y, y_pred):
    cm = confusion_matrix(data_test_y, y_pred)
    ConfusionMatrixDisplay(confusion_matrix=cm).plot()
    plt.title('Confusion Matrix')
    plt.show()

# Main code
num_run = 100
acc_list = []
prec_list = []
f1_list = []
recall_list = []

for i in range(num_run):
    b = calculate_classification(data_train_X, data_train_y, data_test_X, data_test_y)
    acc_list.append(b[0])
    prec_list.append(b[1])
    recall_list.append(b[2])
    f1_list.append(b[3])
    print(f'Run {i+1} completed.')

# Finding the mean and standard deviation of the metrics
print("Mean result: ", "Accuracy:", np.mean(acc_list), " std: ", np.std(acc_list),
      "Precision:", np.mean(prec_list), " std: ", np.std(prec_list),
      "Recall:", np.mean(recall_list), " std: ", np.std(recall_list),
      "F1-Score:", np.mean(f1_list), " std: ", np.std(f1_list))


Accuracy: 0.6666666666666666
Precision: 0.6142857142857143
Recall: 0.6666666666666666
F1-Score: 0.5567144719687093
Run 1 completed.
Accuracy: 0.6666666666666666
Precision: 0.625
Recall: 0.6666666666666666
F1-Score: 0.6071428571428572
Run 2 completed.
Accuracy: 0.6388888888888888
Precision: 0.6026272577996716
Recall: 0.6388888888888888
F1-Score: 0.6084078119827873
Run 3 completed.
Accuracy: 0.6666666666666666
Precision: 0.625
Recall: 0.6666666666666666
F1-Score: 0.6071428571428572
Run 4 completed.
Accuracy: 0.5555555555555556
Precision: 0.6333333333333333
Recall: 0.5555555555555556
F1-Score: 0.5666666666666667
Run 5 completed.
Accuracy: 0.6666666666666666
Precision: 0.625
Recall: 0.6666666666666666
F1-Score: 0.6071428571428572
Run 6 completed.
Accuracy: 0.6666666666666666
Precision: 0.6176470588235294
Recall: 0.6666666666666666
F1-Score: 0.5763546798029557
Run 7 completed.
Accuracy: 0.6666666666666666
Precision: 0.625
Recall: 0.6666666666666666
F1-Score: 0.6071428571428572
Run 8 complet

In [4]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import (accuracy_score, confusion_matrix, f1_score, precision_score, recall_score)
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

def calculate_classification(data_train_X, data_train_y, data_test_X, data_test_y):
    # Ensure data_train_y and data_test_y are 1D arrays
    data_train_y = np.ravel(data_train_y)
    data_test_y = np.ravel(data_test_y)

    # Step 1: Standardize the data (important for KNN)
    scaler = StandardScaler()
    data_train_X = scaler.fit_transform(data_train_X)
    data_test_X = scaler.transform(data_test_X)

    # Step 2: Apply PCA for dimensionality reduction
    pca = PCA(n_components=0.95)  # Retain 95% of variance
    data_train_X_pca = pca.fit_transform(data_train_X)
    data_test_X_pca = pca.transform(data_test_X)

    # Step 3: Split the training data for cross-validation and hyperparameter tuning
    x_train, x_best_prm, y_train, y_best_prm = train_test_split(data_train_X_pca, data_train_y, test_size=0.2, stratify=data_train_y)

    # Step 4: Define parameter grid for hyperparameter tuning (KNN)
    param_grid = {
        'n_neighbors': np.arange(1, 31),  # Search between 1 and 30 neighbors
        'weights': ['uniform', 'distance'],  # Weighting schemes
        'metric': ['euclidean', 'manhattan']  # Distance metrics
    }

    # Step 5: Use GridSearchCV for hyperparameter tuning
    grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy')
    grid.fit(x_best_prm, y_best_prm)

    # Get the best hyperparameters
    best_params = grid.best_params_
    print(f"Best Hyperparameters: {best_params}")

    # Step 6: Train the final model with the best hyperparameters
    knn = KNeighborsClassifier(n_neighbors=best_params['n_neighbors'],
                               weights=best_params['weights'],
                               metric=best_params['metric'])
    knn.fit(data_train_X_pca, data_train_y)

    # Step 7: Make predictions on the test set
    y_pred = knn.predict(data_test_X_pca)

    # Step 8: Calculate accuracy, precision, recall, and F1-score (weighted for multiclass)
    accuracy = accuracy_score(data_test_y, y_pred)
    precision = precision_score(data_test_y, y_pred, average='weighted', zero_division=0)
    recall = recall_score(data_test_y, y_pred, average='weighted')
    f1 = f1_score(data_test_y, y_pred, average='weighted')

    print(f"Accuracy: {accuracy}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")

    return accuracy, precision, recall, f1, y_pred, data_test_y

def plot_confusion_matrix(data_test_y, y_pred):
    cm = confusion_matrix(data_test_y, y_pred)
    # Uncomment to plot the confusion matrix if needed
    # ConfusionMatrixDisplay(confusion_matrix=cm).plot()
    # plt.title('Confusion Matrix')
    # plt.show()

# Main code
num_run = 10
acc_list = []
prec_list = []
f1_list = []
recall_list = []

for i in range(num_run):
    b = calculate_classification(data_train_X, data_train_y, data_test_X, data_test_y)
    acc_list.append(b[0])
    prec_list.append(b[1])
    recall_list.append(b[2])
    f1_list.append(b[3])
    print(f'Run {i+1} completed.')

# Finding the mean and standard deviation of the metrics
print("Mean result: ", "Accuracy:", np.mean(acc_list), " std: ", np.std(acc_list),
      "Precision:", np.mean(prec_list), " std: ", np.std(prec_list),
      "Recall:", np.mean(recall_list), " std: ", np.std(recall_list),
      "F1-Score:", np.mean(f1_list), " std: ", np.std(f1_list))


Best Hyperparameters: {'metric': 'euclidean', 'n_neighbors': 4, 'weights': 'uniform'}
Accuracy: 0.5833333333333334
Precision: 0.6915584415584415
Recall: 0.5833333333333334
F1-Score: 0.5897832817337462
Run 1 completed.
Best Hyperparameters: {'metric': 'euclidean', 'n_neighbors': 17, 'weights': 'uniform'}
Accuracy: 0.6666666666666666
Precision: 0.6333333333333333
Recall: 0.6666666666666666
F1-Score: 0.6296296296296297
Run 2 completed.
Best Hyperparameters: {'metric': 'euclidean', 'n_neighbors': 15, 'weights': 'uniform'}
Accuracy: 0.6666666666666666
Precision: 0.6212121212121212
Recall: 0.6666666666666666
F1-Score: 0.5929824561403508
Run 3 completed.
Best Hyperparameters: {'metric': 'euclidean', 'n_neighbors': 17, 'weights': 'uniform'}
Accuracy: 0.6666666666666666
Precision: 0.6333333333333333
Recall: 0.6666666666666666
F1-Score: 0.6296296296296297
Run 4 completed.
Best Hyperparameters: {'metric': 'manhattan', 'n_neighbors': 16, 'weights': 'uniform'}
Accuracy: 0.6666666666666666
Precision