In [1]:
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
import math as math
import sklearn as sklearn
import random as random
import time
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.model_selection import LeaveOneOut
from scipy import stats
from sklearn.decomposition import PCA


In [2]:
data_path = "DataFrame_01_12.csv"

In [3]:
data = pd.read_csv(data_path)
#data.head(15)

In [4]:
def getSamplesAndLabels(data,variables,label):
    x=[]
    y=[]
    for row in range(len(data)):
        sample=[]
        nan=False
        for variable in variables:
            point = data[variable][row]
            sample.append(data[variable][row])
            if math.isnan(point):
                nan=True
        if(not nan):
            x.append(sample)
            y.append(data[label][row])
        
    return x,y

In [5]:
variables = ["RMS","RMS_filt","Zero-Cross","Zero-Cross_filt","Spectral_centroid","Spectral_centroid_filt","Spectral_slope","Spectral_slope_filt","Spectral_spread","Spectral_spread_filt","STD","STD_filt"]

In [6]:
X,y=getSamplesAndLabels(data,variables,"Surface")

In [7]:
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.20, random_state=42, shuffle=True, stratify=y)

In [8]:
labels=[]
for label in y:
    if label not in labels:
        labels.append(label)

In [9]:
knn=KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=3)

In [10]:
predictions = knn.predict(X_test)
c_matrix=metrics.confusion_matrix(y_test, predictions,labels=labels)
#print
#print(c_matrix)

In [11]:
corrects=0
for pred,true in zip(predictions,y_test):
    if(pred==true):
        corrects=corrects+1
#print("accuracy: ",corrects/len(y_test))

In [12]:
"""
plt.figure(figsize=(10, 8))
sb.heatmap(c_matrix, xticklabels=labels, yticklabels=labels, 
            annot=True, fmt='g')
plt.xlabel('Prediction')
plt.ylabel('Label')
plt.show()
"""

"\nplt.figure(figsize=(10, 8))\nsb.heatmap(c_matrix, xticklabels=labels, yticklabels=labels, \n            annot=True, fmt='g')\nplt.xlabel('Prediction')\nplt.ylabel('Label')\nplt.show()\n"

In [31]:
# results of the classification don't mean anything. This is a computational benchmark 
def benchmark(classifier,X,iterations=10):
    number_of_samples=len(X)*iterations
    start_time = time.time()
    for i in range(iterations):
        predictions = classifier.predict(X)
    end_time = time.time()
    time_it = end_time-start_time
    return time_it, number_of_samples
first_time,samples=benchmark(knn,X,10)
print(first_time,samples/first_time)

0.15104293823242188 38267.26404849097


In [35]:
classifiers=[]
for i in range(1,31):
    knn=KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train)
    classifiers.append(knn)

In [36]:
times=[]
iterations = 10
x_len=len(X)
for classifier in classifiers:
    knn_time,samples=benchmark(classifier,X,iterations)
    times.append(knn_time)

In [37]:
ks=range(1,31)
#print(len(ks),len(times))
for i in ks:
    speed=(iterations*x_len)/times[i-1]
    print("Speed for k =",i,":",speed)

30 30
Speed for k = 1 : 43449.33368222209
Speed for k = 2 : 45862.801967461215
Speed for k = 3 : 45146.151648724095
Speed for k = 4 : 44796.11875377642
Speed for k = 5 : 44451.39043316269
Speed for k = 6 : 43777.68870863182
Speed for k = 7 : 44112.32863152685
Speed for k = 8 : 42805.21705352445
Speed for k = 9 : 42805.065894484076
Speed for k = 10 : 42490.56548745776
Speed for k = 11 : 42490.78890682483
Speed for k = 12 : 42490.34207044019
Speed for k = 13 : 41276.40464130352
Speed for k = 14 : 41874.79422809193
Speed for k = 15 : 40983.335085836006
Speed for k = 16 : 40983.61221981604
Speed for k = 17 : 40983.88935754412
Speed for k = 18 : 40983.54293596965
Speed for k = 19 : 40983.82007276072
Speed for k = 20 : 41276.40464130352
Speed for k = 21 : 40694.944362098744
Speed for k = 22 : 40983.75078821157
Speed for k = 23 : 40410.65132234739
Speed for k = 24 : 40129.704991408966
Speed for k = 25 : 40130.103556956325
Speed for k = 26 : 39852.96569364784
Speed for k = 27 : 39853.096721907