In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

In [2]:
from knn_class import KNNModel

In [3]:
df = pd.read_csv("../audio_features.csv")
X = df.drop(columns=["Sample ID", "Label"])  # Features
Y = df["Label"]  # Labels

I will be analysing each of the states in the table below. <br>
Data will be stored as a 2D list. Each element will be a list of 4 items. <br>
Lists will store [best k, best accuracy, best f, relative path to image of k-accuracy graph] <br>

| |w/o feature selection| Filter method | Wrapper method | Embedded method | Intersection
|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
|  **w/o normalisation**   |  (1)  |  (3)   | (5) | (7) | (9) |
|  **w/ normalisation**   |  (2)  |  (4)   | (6) | (8) | (10) |

In [4]:
knn_table = [
    [[], [], [], [], []],
    [[], [], [], [], []]
]

# (1)
# for each f, find best k and corresponding accuracy and fig
# then, will find max accuracy and take the corresponding elements
# below stores k, best f, accuracy, fig
all_f_accuracy = [[], 
                  [],
                  [],
                  []]
for f in range(10, 21):
    knn_model = KNNModel(X, Y, False, False, "", f)
    fig = knn_model.plot_k_accuracy(30)
    all_f_accuracy[0].append(knn_model.best_k)
    all_f_accuracy[1].append(knn_model.best_accuracy)
    all_f_accuracy[2].append(f)
    all_f_accuracy[3].append(fig)
index_of_max = all_f_accuracy[1].index(max(all_f_accuracy[1]))
best_k = all_f_accuracy[0][index_of_max]
max_accuracy = all_f_accuracy[1][index_of_max]
best_f = all_f_accuracy[2][index_of_max]
fig = all_f_accuracy[3][index_of_max]
fig.savefig("k_accuracy_graphs/1_knn_plot.png")
knn_table[0][0] = [best_k, max_accuracy, best_f, ""]
print(f"Best k: {best_k} \nMax accuracy: {max_accuracy} \nBest f: {best_f}")

Best k: 3 
Max accuracy: 0.5661764705882353 
Best f: 10
