In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix  
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

#Feature Classification
def feature_classification(path,num_ngh):
    #Read csv    
    df = pd.read_csv(path, index_col=0)
    
    #Drop index
    X = df.drop('label', axis=1).values
    
    #Get label
    y = df['label'].values
    
    #Seperating Training and Testing Data    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify = y)  
    
    #Setup arrays to store training and test accuracies    
    neighbors = np.arange(1, 57)    
    train_accuracy =np.empty(len(neighbors))
    test_accuracy = np.empty(len(neighbors))

    for i,k in enumerate(neighbors):
        #Setup a knn classifier with k neighbors
        k1 = num_ngh
        
        knn = KNeighborsClassifier(n_neighbors=k)    
        
        #Fit the model
        knn.fit(X_train, y_train)    
        
        #Compute accuracy on the training set
        train_accuracy[i] = knn.score(X_train, y_train)
    
        #Compute accuracy on the test set
        test_accuracy[i] = knn.score(X_test, y_test)    
    
    #Classifier
    knn1 = KNeighborsClassifier(n_neighbors=k1)
    
    #Fit Classifier Data    
    knn1.fit(X_train,y_train)
    print(knn1.fit)    
    knn1.score(X_test,y_test)

    y_pred = knn1.predict(X_test)
    
    #Compute and Print Confusion Matrix 
    confusion_matrix(y_test,y_pred)
    cm = confusion_matrix(y_test, y_pred)  
    print(cm)  
    
    #Compute and Print Accuracy
    print('Accuracy' + str(accuracy_score(y_test, y_pred)))
    return neighbors,test_accuracy,train_accuracy

if __name__ == '__main__':
    #Path to Get csv
    path = 'data/stress.csv'
    
    #Number of Neighbours
    num_ngh = 7    
    
    clf = feature_classification(path,num_ngh)    
    neighbors,test_accuracy,train_accuracy = clf[0],clf[1],clf[2]
    
    #Plotting result of Classifier
    plt.title('k-NN Varying number of neighbors')
    plt.plot(neighbors, test_accuracy, label='Testing Accuracy')
    plt.plot(neighbors, train_accuracy, label='Training accuracy')
    plt.legend()
    plt.xlabel('Number of neighbors')
    plt.ylabel('Accuracy')
    plt.show()