In [1]:
# !pip install Rtree
# !pip install pyts

In [1]:
import numpy as np
# import pandas as pd
from importlib import reload
import matplotlib.pyplot as plt
import h5py
from pyts.transformation import BagOfPatterns, BOSS, WEASEL
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from rtree import index
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_classif
from knnClassifier import kNNClassifier
from logisticRegression import LogisticRegression

# **utils**

In [2]:
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

def f1_score(actual_y, pred_y):
    tp = np.sum((actual_y + pred_y) == 2)
    fp = np.sum((actual_y - pred_y) == -1)
    fn = np.sum((actual_y - pred_y) == 1)

    print("True Positives (tp):", tp)
    print("False Positives (fp):", fp)
    print("False Negatives (fn):", fn)

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    print("precision: ", precision)
    print("recall: ", recall)

    return 2 * precision * recall / (precision + recall)

# **Load data**

In [3]:
path = 'D:/Documents-D/Downloads/'

# train data
with h5py.File(path + 'train.h5', 'r') as f:
   print(f.keys())
   x_train_raw = f['x']
   y_train_raw = f['y']

   x_train = np.array(x_train_raw)
   y_train = np.array(y_train_raw)

# test data
# with h5py.File(path + 'test.h5', 'r') as f:
#    print(f.keys())
#    x_test_raw = f['x']

#    x_test = np.array(x_test_raw)

<KeysViewHDF5 ['x', 'y']>


In [4]:
x_train = x_train.reshape(x_train.shape[0], x_train.shape[2])
y_train = y_train.reshape(y_train.shape[0], 1)

# x_train1 = x_train[0:150]
# x_train2 = x_train[150:]
# y_train1 = y_train[0:150]
# y_train2 = y_train[150:]

x_train1, x_train2, y_train1, y_train2 = train_test_split(
    x_train, y_train, test_size=0.3, random_state=56
)

del y_train
del x_train

# **Feature Extraction**

### **Bag Of Patterns**

In [5]:
bop = BagOfPatterns(
    window_size=40,
    word_size=30,
    n_bins=4,
    strategy='quantile'
)

x_train1_transformed = bop.fit_transform(x_train1)
x_train2_transformed = bop.transform(x_train2)

In [None]:
del bop
del x_train1_transformed
del x_train2_transformed

### **BOSS**

In [5]:
boss = BOSS(
    window_size=30,
    word_size=20,
    n_bins=4,
    strategy='entropy'
)

x_train1_transformed = boss.fit_transform(x_train1,y=y_train1)
x_train2_transformed = boss.transform(x_train2)

# del boss
# del x_train1_transformed
# del x_train2_transformed

### **WEASEL**

In [41]:
weasel = WEASEL(
    word_size=15, 
    window_sizes=np.arange(20, 50),
    n_bins=4,
    anova=True
)

x_train1_transformed = weasel.fit_transform(x_train1,y=y_train1.reshape((y_train1.shape[0],)))
x_train2_transformed = weasel.transform(x_train2)

# **Tune regression parameters**

In [6]:
kbest = [ 15, 20, 25, 35, 40, 50, 55, 70, 100, 120, 150]    # k mejores features
lrates = [0.005, 0.01, 0.05]                                # learning rates

def test_regression(kbest, lrates):
    d = {}
    for lrate in lrates:
        accuracies = []
        f1_scores = []
        print("- - - - - - - - LEARNING RATE: ", lrate, "- - - - - - - - - - -")
        for k in kbest:
            selector = SelectKBest(chi2,k=k)
            X_train_new = selector.fit_transform(x_train1_transformed, y_train1.reshape(y_train1.shape[0],))
            X_test_new = selector.transform(x_train2_transformed)
            X_test_new = X_test_new.toarray().astype(float)
            X_train_new = X_train_new.toarray()
 
            lr = LogisticRegression(n_iter=250, d=k, threshold=0.6, learning_rate=lrate)
            lr.fit(X_train_new, y_train1)

            print("k.best: ", k, '------')
            y_pred = lr.predict(X_test_new)
            acc = accuracy(y_train2, y_pred)
            f1_ = f1_score(y_train2, y_pred)
            print("accuracy: ", acc, "f1 score: ", f1_)
            accuracies.append(acc)
            f1_scores.append(f1_)
            print()
            del lr
        d[lrate] = (accuracies, f1_scores)

    
measures = test_regression(kbest=kbest, lrates=lrates)

- - - - - - - - LEARNING RATE:  0.005 - - - - - - - - - - -


  return 1.0 / (1.0 + np.exp(-z))
  precision = tp / (tp + fp)


k.best:  15 ------
True Positives (tp): 0
False Positives (fp): 0
False Negatives (fn): 17
precision:  nan
recall:  0.0
accuracy:  0.7258064516129032 f1 score:  nan



  return 1.0 / (1.0 + np.exp(-z))


k.best:  20 ------
True Positives (tp): 2
False Positives (fp): 3
False Negatives (fn): 15
precision:  0.4
recall:  0.11764705882352941
accuracy:  0.7096774193548387 f1 score:  0.1818181818181818



  return 1.0 / (1.0 + np.exp(-z))


k.best:  25 ------
True Positives (tp): 1
False Positives (fp): 1
False Negatives (fn): 16
precision:  0.5
recall:  0.058823529411764705
accuracy:  0.7258064516129032 f1 score:  0.10526315789473684



  return 1.0 / (1.0 + np.exp(-z))


k.best:  35 ------
True Positives (tp): 1
False Positives (fp): 2
False Negatives (fn): 16
precision:  0.3333333333333333
recall:  0.058823529411764705
accuracy:  0.7096774193548387 f1 score:  0.1



  return 1.0 / (1.0 + np.exp(-z))


k.best:  40 ------
True Positives (tp): 13
False Positives (fp): 26
False Negatives (fn): 4
precision:  0.3333333333333333
recall:  0.7647058823529411
accuracy:  0.5161290322580645 f1 score:  0.4642857142857143



  return 1.0 / (1.0 + np.exp(-z))


k.best:  50 ------
True Positives (tp): 14
False Positives (fp): 19
False Negatives (fn): 3
precision:  0.42424242424242425
recall:  0.8235294117647058
accuracy:  0.6451612903225806 f1 score:  0.5599999999999999



  return 1.0 / (1.0 + np.exp(-z))


k.best:  55 ------
True Positives (tp): 6
False Positives (fp): 12
False Negatives (fn): 11
precision:  0.3333333333333333
recall:  0.35294117647058826
accuracy:  0.6290322580645161 f1 score:  0.34285714285714286



  return 1.0 / (1.0 + np.exp(-z))


k.best:  70 ------
True Positives (tp): 6
False Positives (fp): 9
False Negatives (fn): 11
precision:  0.4
recall:  0.35294117647058826
accuracy:  0.6774193548387096 f1 score:  0.37500000000000006



  return 1.0 / (1.0 + np.exp(-z))


k.best:  100 ------
True Positives (tp): 3
False Positives (fp): 4
False Negatives (fn): 14
precision:  0.42857142857142855
recall:  0.17647058823529413
accuracy:  0.7096774193548387 f1 score:  0.25



  return 1.0 / (1.0 + np.exp(-z))


k.best:  120 ------
True Positives (tp): 7
False Positives (fp): 12
False Negatives (fn): 10
precision:  0.3684210526315789
recall:  0.4117647058823529
accuracy:  0.6451612903225806 f1 score:  0.3888888888888889



  return 1.0 / (1.0 + np.exp(-z))


k.best:  150 ------
True Positives (tp): 5
False Positives (fp): 9
False Negatives (fn): 12
precision:  0.35714285714285715
recall:  0.29411764705882354
accuracy:  0.6612903225806451 f1 score:  0.3225806451612903

- - - - - - - - LEARNING RATE:  0.01 - - - - - - - - - - -


  return 1.0 / (1.0 + np.exp(-z))


k.best:  15 ------
True Positives (tp): 12
False Positives (fp): 17
False Negatives (fn): 5
precision:  0.41379310344827586
recall:  0.7058823529411765
accuracy:  0.6451612903225806 f1 score:  0.5217391304347826



  return 1.0 / (1.0 + np.exp(-z))


k.best:  20 ------
True Positives (tp): 4
False Positives (fp): 6
False Negatives (fn): 13
precision:  0.4
recall:  0.23529411764705882
accuracy:  0.6935483870967742 f1 score:  0.29629629629629634



  return 1.0 / (1.0 + np.exp(-z))


k.best:  25 ------
True Positives (tp): 6
False Positives (fp): 15
False Negatives (fn): 11
precision:  0.2857142857142857
recall:  0.35294117647058826
accuracy:  0.5806451612903226 f1 score:  0.31578947368421056



  return 1.0 / (1.0 + np.exp(-z))


k.best:  35 ------
True Positives (tp): 9
False Positives (fp): 17
False Negatives (fn): 8
precision:  0.34615384615384615
recall:  0.5294117647058824
accuracy:  0.5967741935483871 f1 score:  0.41860465116279066



  return 1.0 / (1.0 + np.exp(-z))


k.best:  40 ------
True Positives (tp): 15
False Positives (fp): 21
False Negatives (fn): 2
precision:  0.4166666666666667
recall:  0.8823529411764706
accuracy:  0.6290322580645161 f1 score:  0.5660377358490566



  return 1.0 / (1.0 + np.exp(-z))


k.best:  50 ------
True Positives (tp): 7
False Positives (fp): 14
False Negatives (fn): 10
precision:  0.3333333333333333
recall:  0.4117647058823529
accuracy:  0.6129032258064516 f1 score:  0.36842105263157887



  return 1.0 / (1.0 + np.exp(-z))


k.best:  55 ------
True Positives (tp): 5
False Positives (fp): 11
False Negatives (fn): 12
precision:  0.3125
recall:  0.29411764705882354
accuracy:  0.6290322580645161 f1 score:  0.30303030303030304



  return 1.0 / (1.0 + np.exp(-z))
  precision = tp / (tp + fp)


k.best:  70 ------
True Positives (tp): 0
False Positives (fp): 0
False Negatives (fn): 17
precision:  nan
recall:  0.0
accuracy:  0.7258064516129032 f1 score:  nan



  return 1.0 / (1.0 + np.exp(-z))


k.best:  100 ------
True Positives (tp): 3
False Positives (fp): 6
False Negatives (fn): 14
precision:  0.3333333333333333
recall:  0.17647058823529413
accuracy:  0.6774193548387096 f1 score:  0.23076923076923078



  return 1.0 / (1.0 + np.exp(-z))


k.best:  120 ------
True Positives (tp): 9
False Positives (fp): 13
False Negatives (fn): 8
precision:  0.4090909090909091
recall:  0.5294117647058824
accuracy:  0.6612903225806451 f1 score:  0.46153846153846156



  return 1.0 / (1.0 + np.exp(-z))
  precision = tp / (tp + fp)


k.best:  150 ------
True Positives (tp): 0
False Positives (fp): 0
False Negatives (fn): 17
precision:  nan
recall:  0.0
accuracy:  0.7258064516129032 f1 score:  nan

- - - - - - - - LEARNING RATE:  0.05 - - - - - - - - - - -


  return 1.0 / (1.0 + np.exp(-z))


k.best:  15 ------
True Positives (tp): 12
False Positives (fp): 15
False Negatives (fn): 5
precision:  0.4444444444444444
recall:  0.7058823529411765
accuracy:  0.6774193548387096 f1 score:  0.5454545454545455



  return 1.0 / (1.0 + np.exp(-z))


k.best:  20 ------
True Positives (tp): 6
False Positives (fp): 12
False Negatives (fn): 11
precision:  0.3333333333333333
recall:  0.35294117647058826
accuracy:  0.6290322580645161 f1 score:  0.34285714285714286



  return 1.0 / (1.0 + np.exp(-z))


k.best:  25 ------
True Positives (tp): 14
False Positives (fp): 20
False Negatives (fn): 3
precision:  0.4117647058823529
recall:  0.8235294117647058
accuracy:  0.6290322580645161 f1 score:  0.5490196078431372



  return 1.0 / (1.0 + np.exp(-z))
  precision = tp / (tp + fp)


k.best:  35 ------
True Positives (tp): 0
False Positives (fp): 0
False Negatives (fn): 17
precision:  nan
recall:  0.0
accuracy:  0.7258064516129032 f1 score:  nan



  return 1.0 / (1.0 + np.exp(-z))


k.best:  40 ------
True Positives (tp): 8
False Positives (fp): 16
False Negatives (fn): 9
precision:  0.3333333333333333
recall:  0.47058823529411764
accuracy:  0.5967741935483871 f1 score:  0.39024390243902435



  return 1.0 / (1.0 + np.exp(-z))


k.best:  50 ------
True Positives (tp): 13
False Positives (fp): 18
False Negatives (fn): 4
precision:  0.41935483870967744
recall:  0.7647058823529411
accuracy:  0.6451612903225806 f1 score:  0.5416666666666666



  return 1.0 / (1.0 + np.exp(-z))


k.best:  55 ------
True Positives (tp): 7
False Positives (fp): 17
False Negatives (fn): 10
precision:  0.2916666666666667
recall:  0.4117647058823529
accuracy:  0.5645161290322581 f1 score:  0.34146341463414637



  return 1.0 / (1.0 + np.exp(-z))


k.best:  70 ------
True Positives (tp): 6
False Positives (fp): 10
False Negatives (fn): 11
precision:  0.375
recall:  0.35294117647058826
accuracy:  0.6612903225806451 f1 score:  0.3636363636363636



  return 1.0 / (1.0 + np.exp(-z))


k.best:  100 ------
True Positives (tp): 6
False Positives (fp): 13
False Negatives (fn): 11
precision:  0.3157894736842105
recall:  0.35294117647058826
accuracy:  0.6129032258064516 f1 score:  0.33333333333333337



  return 1.0 / (1.0 + np.exp(-z))


k.best:  120 ------
True Positives (tp): 3
False Positives (fp): 4
False Negatives (fn): 14
precision:  0.42857142857142855
recall:  0.17647058823529413
accuracy:  0.7096774193548387 f1 score:  0.25

k.best:  150 ------
True Positives (tp): 1
False Positives (fp): 0
False Negatives (fn): 16
precision:  1.0
recall:  0.058823529411764705
accuracy:  0.7419354838709677 f1 score:  0.1111111111111111



  return 1.0 / (1.0 + np.exp(-z))


# **Tune knn hyperparameters**

In [6]:
ks = [3, 5, 10, 20, 30, 40, 50, 60] # k de knn
kbest = [5, 10, 15, 18, 20, 25, 30] # k mejores features

def test_hyperparameters(ks, kbest):
  d = {}
  for k_best in kbest:
    # select k_best best attributes
    selector = SelectKBest(chi2, k=k_best)
    X_train_new = selector.fit_transform(x_train1_transformed, y_train1.reshape(y_train1.shape[0],))
    X_test_new = selector.transform(x_train2_transformed)
    X_test_new = X_test_new.toarray().astype(float)
    X_train_new = X_train_new.toarray().astype(float)
    print(X_train_new.shape)
    # create r-tree index of dimension k_best
    knn = kNNClassifier(d=k_best)
    knn.insert_data(X_train_new, y_train1)

    accuracies = []
    f1_scores = []

    # predict based on knn
    for k in ks:
      print("k: ", k, "kBEST:", k_best, " ------------------------------")
      y_pred = knn.predict(X=X_test_new, k=k).reshape(X_test_new.shape[0],1)
      acc = accuracy(y_train2, y_pred)
      f1_ = f1_score(y_train2, y_pred)
      print("accuracy: ", acc, "f1 score: ", f1_)
      accuracies.append(acc)
      f1_scores.append(f1_)
      print()

    d[k_best] = (accuracies, f1_scores)
    del knn

  return d

def plot_performance(tup):
  accuracies = tup[0]
  f1_scores = tup[1]
  plt.plot(ks, accuracies, label='accuracy')
  plt.plot(ks, f1_scores, label='f1 score')

measures = test_hyperparameters(ks=ks, kbest=kbest)


(142, 5)
k:  3 kBEST: 5  ------------------------------
True Positives (tp): 6
False Positives (fp): 10
False Negatives (fn): 11
precision:  0.375
recall:  0.35294117647058826
accuracy:  0.6612903225806451 f1 score:  0.3636363636363636

k:  5 kBEST: 5  ------------------------------
True Positives (tp): 4
False Positives (fp): 5
False Negatives (fn): 13
precision:  0.4444444444444444
recall:  0.23529411764705882
accuracy:  0.7096774193548387 f1 score:  0.30769230769230765

k:  10 kBEST: 5  ------------------------------
True Positives (tp): 2
False Positives (fp): 2
False Negatives (fn): 15
precision:  0.5
recall:  0.11764705882352941
accuracy:  0.7258064516129032 f1 score:  0.19047619047619047

k:  20 kBEST: 5  ------------------------------
True Positives (tp): 2
False Positives (fp): 2
False Negatives (fn): 15
precision:  0.5
recall:  0.11764705882352941
accuracy:  0.7258064516129032 f1 score:  0.19047619047619047

k:  30 kBEST: 5  ------------------------------
True Positives (tp):

  precision = tp / (tp + fp)


(142, 10)
k:  3 kBEST: 10  ------------------------------
True Positives (tp): 4
False Positives (fp): 3
False Negatives (fn): 13
precision:  0.5714285714285714
recall:  0.23529411764705882
accuracy:  0.7419354838709677 f1 score:  0.3333333333333333

k:  5 kBEST: 10  ------------------------------
True Positives (tp): 4
False Positives (fp): 2
False Negatives (fn): 13
precision:  0.6666666666666666
recall:  0.23529411764705882
accuracy:  0.7580645161290323 f1 score:  0.3478260869565218

k:  10 kBEST: 10  ------------------------------
True Positives (tp): 2
False Positives (fp): 2
False Negatives (fn): 15
precision:  0.5
recall:  0.11764705882352941
accuracy:  0.7258064516129032 f1 score:  0.19047619047619047

k:  20 kBEST: 10  ------------------------------
True Positives (tp): 0
False Positives (fp): 0
False Negatives (fn): 17
precision:  nan
recall:  0.0
accuracy:  0.7258064516129032 f1 score:  nan

k:  30 kBEST: 10  ------------------------------
True Positives (tp): 0
False Positi

  precision = tp / (tp + fp)


(142, 15)
k:  3 kBEST: 15  ------------------------------
True Positives (tp): 3
False Positives (fp): 3
False Negatives (fn): 14
precision:  0.5
recall:  0.17647058823529413
accuracy:  0.7258064516129032 f1 score:  0.2608695652173913

k:  5 kBEST: 15  ------------------------------
True Positives (tp): 4
False Positives (fp): 5
False Negatives (fn): 13
precision:  0.4444444444444444
recall:  0.23529411764705882
accuracy:  0.7096774193548387 f1 score:  0.30769230769230765

k:  10 kBEST: 15  ------------------------------
True Positives (tp): 2
False Positives (fp): 2
False Negatives (fn): 15
precision:  0.5
recall:  0.11764705882352941
accuracy:  0.7258064516129032 f1 score:  0.19047619047619047

k:  20 kBEST: 15  ------------------------------
True Positives (tp): 0
False Positives (fp): 0
False Negatives (fn): 17
precision:  nan
recall:  0.0
accuracy:  0.7258064516129032 f1 score:  nan

k:  30 kBEST: 15  ------------------------------
True Positives (tp): 0
False Positives (fp): 0
Fa

  precision = tp / (tp + fp)


(142, 18)
k:  3 kBEST: 18  ------------------------------
True Positives (tp): 3
False Positives (fp): 5
False Negatives (fn): 14
precision:  0.375
recall:  0.17647058823529413
accuracy:  0.6935483870967742 f1 score:  0.24

k:  5 kBEST: 18  ------------------------------
True Positives (tp): 4
False Positives (fp): 3
False Negatives (fn): 13
precision:  0.5714285714285714
recall:  0.23529411764705882
accuracy:  0.7419354838709677 f1 score:  0.3333333333333333

k:  10 kBEST: 18  ------------------------------
True Positives (tp): 2
False Positives (fp): 1
False Negatives (fn): 15
precision:  0.6666666666666666
recall:  0.11764705882352941
accuracy:  0.7419354838709677 f1 score:  0.2

k:  20 kBEST: 18  ------------------------------
True Positives (tp): 0
False Positives (fp): 0
False Negatives (fn): 17
precision:  nan
recall:  0.0
accuracy:  0.7258064516129032 f1 score:  nan

k:  30 kBEST: 18  ------------------------------
True Positives (tp): 0
False Positives (fp): 0
False Negatives 

  precision = tp / (tp + fp)


(142, 20)
k:  3 kBEST: 20  ------------------------------
True Positives (tp): 3
False Positives (fp): 5
False Negatives (fn): 14
precision:  0.375
recall:  0.17647058823529413
accuracy:  0.6935483870967742 f1 score:  0.24

k:  5 kBEST: 20  ------------------------------
True Positives (tp): 2
False Positives (fp): 4
False Negatives (fn): 15
precision:  0.3333333333333333
recall:  0.11764705882352941
accuracy:  0.6935483870967742 f1 score:  0.1739130434782609

k:  10 kBEST: 20  ------------------------------
True Positives (tp): 1
False Positives (fp): 2
False Negatives (fn): 16
precision:  0.3333333333333333
recall:  0.058823529411764705
accuracy:  0.7096774193548387 f1 score:  0.1

k:  20 kBEST: 20  ------------------------------
True Positives (tp): 0
False Positives (fp): 1
False Negatives (fn): 17
precision:  0.0
recall:  0.0
accuracy:  0.7096774193548387 f1 score:  nan

k:  30 kBEST: 20  ------------------------------
True Positives (tp): 0
False Positives (fp): 0
False Negatives

  return 2 * precision * recall / (precision + recall)
  precision = tp / (tp + fp)


(142, 25)
k:  3 kBEST: 25  ------------------------------
True Positives (tp): 3
False Positives (fp): 7
False Negatives (fn): 14
precision:  0.3
recall:  0.17647058823529413
accuracy:  0.6612903225806451 f1 score:  0.22222222222222224

k:  5 kBEST: 25  ------------------------------
True Positives (tp): 3
False Positives (fp): 4
False Negatives (fn): 14
precision:  0.42857142857142855
recall:  0.17647058823529413
accuracy:  0.7096774193548387 f1 score:  0.25

k:  10 kBEST: 25  ------------------------------
True Positives (tp): 2
False Positives (fp): 3
False Negatives (fn): 15
precision:  0.4
recall:  0.11764705882352941
accuracy:  0.7096774193548387 f1 score:  0.1818181818181818

k:  20 kBEST: 25  ------------------------------
True Positives (tp): 0
False Positives (fp): 1
False Negatives (fn): 17
precision:  0.0
recall:  0.0
accuracy:  0.7096774193548387 f1 score:  nan

k:  30 kBEST: 25  ------------------------------
True Positives (tp): 0
False Positives (fp): 0
False Negatives 

  return 2 * precision * recall / (precision + recall)
  precision = tp / (tp + fp)


(142, 30)
k:  3 kBEST: 30  ------------------------------
True Positives (tp): 3
False Positives (fp): 8
False Negatives (fn): 14
precision:  0.2727272727272727
recall:  0.17647058823529413
accuracy:  0.6451612903225806 f1 score:  0.21428571428571427

k:  5 kBEST: 30  ------------------------------
True Positives (tp): 3
False Positives (fp): 7
False Negatives (fn): 14
precision:  0.3
recall:  0.17647058823529413
accuracy:  0.6612903225806451 f1 score:  0.22222222222222224

k:  10 kBEST: 30  ------------------------------
True Positives (tp): 4
False Positives (fp): 2
False Negatives (fn): 13
precision:  0.6666666666666666
recall:  0.23529411764705882
accuracy:  0.7580645161290323 f1 score:  0.3478260869565218

k:  20 kBEST: 30  ------------------------------
True Positives (tp): 0
False Positives (fp): 2
False Negatives (fn): 17
precision:  0.0
recall:  0.0
accuracy:  0.6935483870967742 f1 score:  nan

k:  30 kBEST: 30  ------------------------------
True Positives (tp): 0
False Posit

  return 2 * precision * recall / (precision + recall)
  precision = tp / (tp + fp)


# **Comparación con Scikit**

In [7]:
from sklearn.neighbors import KNeighborsClassifier

In [8]:
selector = SelectKBest(chi2, k=25)
X_train_new = selector.fit_transform(x_train1_transformed, y_train1.reshape(y_train1.shape[0],)).astype(float)
X_test_new = selector.transform(x_train2_transformed).astype(float)
X_test_new = X_test_new.toarray()
X_train_new = X_train_new.toarray()

In [9]:
neigh = KNeighborsClassifier(n_neighbors=15)
neigh.fit(X_train_new, y_train1.reshape(y_train1.shape[0],))

In [10]:
y_pred  = neigh.predict(X_test_new)
yt = y_train2.reshape(y_train2.shape[0],)

In [11]:
np.sum((y_pred + yt) == 2) / np.sum(yt)

np.float64(0.23529411764705882)

In [13]:
knn = kNNClassifier(d=25)
knn.insert_data(X_train_new, y_train1)
y_pred3 = knn.predict(X_test_new, k=15)
y_pred3 == y_pred

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True])

In [73]:
from sklearn.svm import SVC

In [106]:
svc = SVC(kernel='rbf')
svc.fit(X_train_new,  y_train1.reshape(y_train1.shape[0],))

In [107]:
y_pred2  = svc.predict(X_test_new)
y_pred2

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0])

In [139]:
y_pred3

array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0])