In [1]:
import nbimporter
import helper_methods as hm
import preprocessing as pp
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from collections import Counter
import matplotlib.pyplot as plt

Importing Jupyter notebook from helper_methods.ipynb
Importing Jupyter notebook from preprocessing.ipynb


### K-NN Classifier Implementation 

In [2]:
# This function was meant to train the data - but no explicit training in K-NN
def train(x, y):
    return

In [3]:
# This function takes in the training data and the feature values to the data sample to be predicted on. 
# Using the specified number of nearest neighbours, it returns the predicted classification of the specificed data sample
def predict_one(x_train, y_train, x_test_point, k):
    distances = []
    for i in range(len(x_train)):
        # sum of square of distance of each feature - minkowski Distance with p = 2
        distance = ((x_train[i, :] - x_test_point)**2).sum()
        # appending the list of the distance for the point and its index to the list
        distances.append([distance, i])

    # Sorting using the distance from the sample point
    distances = sorted(distances)
    
    targets = []
    # Finding the classification of the elements using the first 'k' elemets in the distances list i.e 'k' nearest neighbours
    for i in range(k):
        # list of the indices of the 'k' nearest neighbours
        index_of_training_data = distances[i][1]
        
        # adding the nearest neighbours to the targets list
        targets.append(y_train[index_of_training_data])
    
    # returns the most common entry among the targets
    return Counter(targets).most_common(1)[0][0]

In [4]:
# This function takes in the training data, the data samples to be predicted upon and the value of k
# It return the predicted classification of the data samples given to it for prediction
def predict(x_train, y_train, x_test_data, k):
    predictions = []
    
    # making prediction for the testing data samples
    for x_test in x_test_data:
        predictions.append(predict_one(x_train, y_train, x_test, k))
    return predictions

### Using CV to find Optimal parameters

Sorting list: https://stackoverflow.com/questions/17555218/python-how-to-sort-a-list-of-lists-by-the-fourth-element-in-each-list  

In [5]:
def print_parameters_accuracy(accuracies):
    print('#Features \t #Neighbours \t Accuracy')
    for i in range(len(accuracies)):
        print(accuracies[i][0], '\t\t', accuracies[i][1], '\t\t', accuracies[i][2])
    print()

In [6]:
def find_optimal_values(max_features, max_neighbours, num_folds = 10, symbol_name = 'AAPL'):
    accuracies = list()
    for num_features in range(2, max_features + 1, 2):
        print('Features:', num_features)
        
        X_train, X_test, Y_train, Y_test = hm.prepare_data(num_features, symbol_name, is_binary_ouput=True)
        X_train, X_test, Y_train, Y_test = X_train.values, X_test.values, Y_train.values, Y_test.values
        
        for k_neighbours in range(2, max_neighbours + 1, 2):
            neighbour_accuracy = hm.timeSeriesCV(X_train, Y_train, num_folds, predict, [k_neighbours], is_classification=True)
            accuracies.append([num_features, k_neighbours, neighbour_accuracy])
    
    print_parameters_accuracy(accuracies)
    
    # Sorting the accuracies
    accuracies.sort(reverse=True, key=lambda x: x[2])
    print_parameters_accuracy(accuracies)
    
    return accuracies[0][0], accuracies[0][1]

In [7]:
num_features, k_neighbours = find_optimal_values(max_features=50, max_neighbours=100, num_folds=10, symbol_name = 'MSFT')

Features: 2
Parameters ------------------------> [2]
Parameters ------------------------> [4]
Parameters ------------------------> [6]
Parameters ------------------------> [8]
Parameters ------------------------> [10]
Parameters ------------------------> [12]
Parameters ------------------------> [14]
Parameters ------------------------> [16]
Parameters ------------------------> [18]
Parameters ------------------------> [20]
Parameters ------------------------> [22]
Parameters ------------------------> [24]
Parameters ------------------------> [26]
Parameters ------------------------> [28]
Parameters ------------------------> [30]
Parameters ------------------------> [32]
Parameters ------------------------> [34]
Parameters ------------------------> [36]
Parameters ------------------------> [38]
Parameters ------------------------> [40]
Parameters ------------------------> [42]
Parameters ------------------------> [44]
Parameters ------------------------> [46]
Parameters ---------------

Parameters ------------------------> [92]
Parameters ------------------------> [94]
Parameters ------------------------> [96]
Parameters ------------------------> [98]
Parameters ------------------------> [100]
Features: 10
Parameters ------------------------> [2]
Parameters ------------------------> [4]
Parameters ------------------------> [6]
Parameters ------------------------> [8]
Parameters ------------------------> [10]
Parameters ------------------------> [12]
Parameters ------------------------> [14]
Parameters ------------------------> [16]
Parameters ------------------------> [18]
Parameters ------------------------> [20]
Parameters ------------------------> [22]
Parameters ------------------------> [24]
Parameters ------------------------> [26]
Parameters ------------------------> [28]
Parameters ------------------------> [30]
Parameters ------------------------> [32]
Parameters ------------------------> [34]
Parameters ------------------------> [36]
Parameters -------------

Parameters ------------------------> [82]
Parameters ------------------------> [84]
Parameters ------------------------> [86]
Parameters ------------------------> [88]
Parameters ------------------------> [90]
Parameters ------------------------> [92]
Parameters ------------------------> [94]
Parameters ------------------------> [96]
Parameters ------------------------> [98]
Parameters ------------------------> [100]
Features: 18
Parameters ------------------------> [2]
Parameters ------------------------> [4]
Parameters ------------------------> [6]
Parameters ------------------------> [8]
Parameters ------------------------> [10]
Parameters ------------------------> [12]
Parameters ------------------------> [14]
Parameters ------------------------> [16]
Parameters ------------------------> [18]
Parameters ------------------------> [20]
Parameters ------------------------> [22]
Parameters ------------------------> [24]
Parameters ------------------------> [26]
Parameters -------------

Parameters ------------------------> [72]
Parameters ------------------------> [74]
Parameters ------------------------> [76]
Parameters ------------------------> [78]
Parameters ------------------------> [80]
Parameters ------------------------> [82]
Parameters ------------------------> [84]
Parameters ------------------------> [86]
Parameters ------------------------> [88]
Parameters ------------------------> [90]
Parameters ------------------------> [92]
Parameters ------------------------> [94]
Parameters ------------------------> [96]
Parameters ------------------------> [98]
Parameters ------------------------> [100]
Features: 26
Parameters ------------------------> [2]
Parameters ------------------------> [4]
Parameters ------------------------> [6]
Parameters ------------------------> [8]
Parameters ------------------------> [10]
Parameters ------------------------> [12]
Parameters ------------------------> [14]
Parameters ------------------------> [16]
Parameters -------------

Parameters ------------------------> [62]
Parameters ------------------------> [64]
Parameters ------------------------> [66]
Parameters ------------------------> [68]
Parameters ------------------------> [70]
Parameters ------------------------> [72]
Parameters ------------------------> [74]
Parameters ------------------------> [76]
Parameters ------------------------> [78]
Parameters ------------------------> [80]
Parameters ------------------------> [82]
Parameters ------------------------> [84]
Parameters ------------------------> [86]
Parameters ------------------------> [88]
Parameters ------------------------> [90]
Parameters ------------------------> [92]
Parameters ------------------------> [94]
Parameters ------------------------> [96]
Parameters ------------------------> [98]
Parameters ------------------------> [100]
Features: 34
Parameters ------------------------> [2]
Parameters ------------------------> [4]
Parameters ------------------------> [6]
Parameters ------------

Parameters ------------------------> [52]
Parameters ------------------------> [54]
Parameters ------------------------> [56]
Parameters ------------------------> [58]
Parameters ------------------------> [60]
Parameters ------------------------> [62]
Parameters ------------------------> [64]
Parameters ------------------------> [66]
Parameters ------------------------> [68]
Parameters ------------------------> [70]
Parameters ------------------------> [72]
Parameters ------------------------> [74]
Parameters ------------------------> [76]
Parameters ------------------------> [78]
Parameters ------------------------> [80]
Parameters ------------------------> [82]
Parameters ------------------------> [84]
Parameters ------------------------> [86]
Parameters ------------------------> [88]
Parameters ------------------------> [90]
Parameters ------------------------> [92]
Parameters ------------------------> [94]
Parameters ------------------------> [96]
Parameters -----------------------

Parameters ------------------------> [42]
Parameters ------------------------> [44]
Parameters ------------------------> [46]
Parameters ------------------------> [48]
Parameters ------------------------> [50]
Parameters ------------------------> [52]
Parameters ------------------------> [54]
Parameters ------------------------> [56]
Parameters ------------------------> [58]
Parameters ------------------------> [60]
Parameters ------------------------> [62]
Parameters ------------------------> [64]
Parameters ------------------------> [66]
Parameters ------------------------> [68]
Parameters ------------------------> [70]
Parameters ------------------------> [72]
Parameters ------------------------> [74]
Parameters ------------------------> [76]
Parameters ------------------------> [78]
Parameters ------------------------> [80]
Parameters ------------------------> [82]
Parameters ------------------------> [84]
Parameters ------------------------> [86]
Parameters -----------------------

20 		 6 		 0.5061096902529127
20 		 8 		 0.5080988917306053
20 		 10 		 0.5035521454958796
20 		 12 		 0.5041204887752202
20 		 14 		 0.5061096902529127
20 		 16 		 0.49900539926115367
20 		 18 		 0.4970161977834612
20 		 20 		 0.49587951122477975
20 		 22 		 0.49502699630576863
20 		 24 		 0.49815288434214267
20 		 26 		 0.5001420858198352
20 		 28 		 0.5080988917306053
20 		 30 		 0.5044046604148906
20 		 32 		 0.5063938618925832
20 		 34 		 0.5055413469735721
20 		 36 		 0.5115089514066496
20 		 38 		 0.5083830633702757
20 		 40 		 0.5092355782892867
20 		 42 		 0.5078147200909349
20 		 44 		 0.5072463768115942
20 		 46 		 0.5046888320545609
20 		 48 		 0.5009946007388463
20 		 50 		 0.5063938618925832
20 		 52 		 0.5075305484512646
20 		 54 		 0.5103722648479682
20 		 56 		 0.508667235009946
20 		 58 		 0.5041204887752202
20 		 60 		 0.5024154589371981
20 		 62 		 0.5032679738562091
20 		 64 		 0.5049730036942314
20 		 66 		 0.5055413469735721
20 		 68 		 0.5055413469735721
20 		 7

36 		 6 		 0.488717509283062
36 		 8 		 0.49471579548700373
36 		 10 		 0.5021422450728363
36 		 12 		 0.5058554698657527
36 		 14 		 0.5049985718366181
36 		 16 		 0.49700085689802914
36 		 18 		 0.49700085689802914
36 		 20 		 0.4987146529562982
36 		 22 		 0.5007140816909454
36 		 24 		 0.4992859183090546
36 		 26 		 0.5004284490145673
36 		 28 		 0.49071693801770916
36 		 30 		 0.4975721222507855
36 		 32 		 0.49328763210511284
36 		 34 		 0.4910025706940875
36 		 36 		 0.49528706083976004
36 		 38 		 0.49471579548700373
36 		 40 		 0.5038560411311054
36 		 42 		 0.4975721222507854
36 		 44 		 0.49842902027992003
36 		 46 		 0.5009997143673236
36 		 48 		 0.4995715509854327
36 		 50 		 0.49642959154527283
36 		 52 		 0.49842902027992003
36 		 54 		 0.4972864895744073
36 		 56 		 0.5041416738074836
36 		 58 		 0.5004284490145672
36 		 60 		 0.4987146529562982
36 		 62 		 0.4961439588688946
36 		 64 		 0.49557269351613825
36 		 66 		 0.49271636675235647
36 		 68 		 0.4938588974578691

12 		 98 		 0.5206024438761012
6 		 82 		 0.5204081632653063
10 		 68 		 0.5203182722364308
10 		 64 		 0.5191815856777495
10 		 66 		 0.5191815856777493
10 		 76 		 0.518897414038079
12 		 96 		 0.5188974140380789
6 		 88 		 0.518140589569161
6 		 84 		 0.5181405895691609
6 		 86 		 0.5181405895691609
12 		 92 		 0.5180448991190679
16 		 48 		 0.5177607274793975
4 		 90 		 0.5175736961451247
6 		 80 		 0.5175736961451247
8 		 70 		 0.5174765558397271
10 		 78 		 0.5171923842000568
2 		 24 		 0.5170068027210886
8 		 44 		 0.5169082125603865
12 		 84 		 0.5169082125603865
12 		 100 		 0.5169082125603865
10 		 80 		 0.5166240409207161
12 		 46 		 0.5166240409207161
8 		 72 		 0.5163398692810458
10 		 70 		 0.5163398692810458
10 		 74 		 0.5163398692810458
12 		 88 		 0.5163398692810458
12 		 94 		 0.5163398692810458
30 		 14 		 0.5162393162393163
32 		 10 		 0.5162393162393162
46 		 92 		 0.5161382462153671
12 		 64 		 0.5160556976413755
16 		 24 		 0.5160556976413754
14 		 90 		 0.51605

22 		 74 		 0.505982905982906
24 		 14 		 0.505982905982906
26 		 12 		 0.505982905982906
26 		 60 		 0.505982905982906
28 		 4 		 0.505982905982906
28 		 44 		 0.505982905982906
32 		 94 		 0.505982905982906
34 		 32 		 0.505982905982906
34 		 98 		 0.505982905982906
6 		 52 		 0.505952380952381
4 		 72 		 0.5059523809523809
6 		 30 		 0.5059523809523809
36 		 12 		 0.5058554698657527
40 		 80 		 0.5058554698657527
40 		 92 		 0.5058554698657526
42 		 20 		 0.5058554698657526
46 		 44 		 0.5058554698657526
16 		 68 		 0.5058255186132424
8 		 80 		 0.5058255186132423
12 		 36 		 0.5058255186132423
16 		 72 		 0.5058255186132423
16 		 84 		 0.5058255186132423
20 		 88 		 0.5058255186132423
22 		 68 		 0.5056980056980057
24 		 24 		 0.5056980056980057
26 		 66 		 0.5056980056980057
28 		 74 		 0.5056980056980057
32 		 72 		 0.5056980056980057
2 		 100 		 0.5056689342403629
4 		 10 		 0.5056689342403629
6 		 58 		 0.5056689342403629
42 		 88 		 0.5055698371893745
46 		 42 		 0.50556983718

26 		 28 		 0.5002849002849002
28 		 32 		 0.5002849002849002
36 		 80 		 0.500142816338189
38 		 90 		 0.500142816338189
40 		 16 		 0.500142816338189
16 		 6 		 0.5001420858198352
18 		 74 		 0.5001420858198352
20 		 26 		 0.5001420858198352
6 		 10 		 0.5
22 		 84 		 0.5
22 		 86 		 0.5
24 		 70 		 0.5
24 		 94 		 0.5
26 		 56 		 0.5
26 		 86 		 0.5
32 		 48 		 0.5
40 		 26 		 0.4998571836618109
40 		 28 		 0.4998571836618109
40 		 42 		 0.4998571836618109
40 		 52 		 0.4998571836618109
40 		 56 		 0.4998571836618109
44 		 12 		 0.4998571836618109
44 		 40 		 0.4998571836618109
46 		 82 		 0.4998571836618109
40 		 34 		 0.49985718366181087
42 		 64 		 0.49985718366181087
6 		 26 		 0.4997165532879818
22 		 20 		 0.4997150997150997
24 		 72 		 0.4997150997150997
32 		 60 		 0.4997150997150997
48 		 46 		 0.4997136311569301
48 		 68 		 0.4997136311569301
48 		 76 		 0.4997136311569301
48 		 80 		 0.4997136311569301
48 		 98 		 0.4997136311569301
10 		 20 		 0.49957374254049447
16 		 9

In [8]:
X_train, X_test, Y_train, Y_test = hm.prepare_data(num_features)
X_train, X_test, Y_train, Y_test = X_train.values, X_test.values, Y_train.values, Y_test.values

### In-built KNN Classifier

In [9]:
clf = KNeighborsClassifier(n_neighbors=k_neighbours)
clf.fit(X_train, Y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=72, p=2,
           weights='uniform')

### Training and Predicting

In [10]:
print('In-Built KNN (Accuracy) score - k =', k_neighbours, '--', clf.score(X_test, Y_test))
# Y_pred = rgr.predict(X_test)

In-Built KNN (Accuracy) score - k = 72 -- 0.47473200612557426


In [11]:
Y_pred = predict(X_train, Y_train, X_test, k_neighbours)
print('Implementaion Accuracy Score --', accuracy_score(Y_test, Y_pred))

Implementaion Accuracy Score -- 0.48162327718223585
