# Evaluation for the Advanced Analysis

In this notebook, we use the Dynamic Time Warping Distance Metric to implement the Brute Force k-NN classifiers on case-bases created by different case-base editing algorithms.

In [1]:
#importing the necessary packages
import import_ipynb
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import confusion_matrix
from sklearn.neighbors import KNeighborsClassifier  
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set()
import dataset_loader as dl
import time
from scipy.spatial import distance
import sklearn
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
import timeit
from sklearn.neighbors import KNeighborsClassifier
import time
import Conservative_RR
from Conservative_RR import crr2

import Dynamic_tw
from Dynamic_tw import dtw
from Dynamic_tw import DTW

importing Jupyter notebook from Conservative_RR.ipynb
importing Jupyter notebook from Dynamic_tw.ipynb


The following function takes the training and test sets and implements the Brute Force k-NN method on the original dataset, on the edited dataset after the implementation of the CNN algorithm and on the edited dataset after the implementation of the CRR algorithm. It simultaneously records the different model's speed and accuracy. The results are scaled w.r.t the Brute Force k-nn method, where the value 1 is the speed and accuracy of the Brute Force k-NN.

In [2]:
def eval_advance(X_train,y_train,X_test,y_test):
    results_adv1=pd.DataFrame(columns=['Algorithms','Algorithm time','Brute force time','Brute force accuracy'])
    results_adv1['Algorithms']=["None","CNN","CRR"]
    results_adv1_chart=pd.DataFrame()
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    from sklearn.neighbors import KNeighborsClassifier
    import time
    
    #Brute Force on original dataset
    classifier = KNeighborsClassifier(n_neighbors=5, algorithm="brute", metric=DTW)
    classifier.fit(X_train, y_train)
    tmp=0
    from sklearn.metrics import accuracy_score
    start = time.perf_counter()
    y_pred = classifier.predict(X_test)
    end = time.perf_counter()
    tmp=accuracy_score(y_test,y_pred)
    results_adv1.loc[0,"Brute force accuracy"]=tmp
    results_adv1.loc[0,"Algorithm time"]=0
    results_adv1.loc[0,"Brute force time"]=(end-start)
    results_adv1.loc[0,"Dataset Size"]=X_train.shape[0]
    
    
    #Brute Force on the case-base edited by thr CNN algorithm
    tmp=0
    from collections import Counter
    from sklearn.datasets import fetch_mldata
    from imblearn.under_sampling import CondensedNearestNeighbour
    start = time.perf_counter()
    cnn=CondensedNearestNeighbour(random_state=0)
    X_cnn, y_cnn=cnn.fit_resample(X_train, y_train)
    end = time.perf_counter()
    tmp=(end-start)
    results_adv1.loc[1,"Algorithm time"]=tmp
    from sklearn.neighbors import KNeighborsClassifier
    #Recording the representation of classes in the edited case base, and checking for an imbalanced dataset
    test=pd.DataFrame()
    test["Class"]=y_cnn
    p=test[test["Class"] == '1'].shape[0]
    q=test[test["Class"] == '2'].shape[0]
    classifier = KNeighborsClassifier(n_neighbors=5, algorithm="brute", metric=DTW)
    classifier.fit(X_cnn, y_cnn)
    tmp=0
    from sklearn.metrics import accuracy_score
    start = time.perf_counter()
    y_pred = classifier.predict(X_test)
    end = time.perf_counter()
    tmp=accuracy_score(y_test,y_pred)
    #Recording the results
    results_adv1.loc[1,"Brute force accuracy"]=tmp
    results_adv1.loc[1,"Brute force time"]=(end-start)
    results_adv1.loc[1,"Dataset Size"]=X_cnn.shape[0]
    
    #Brute Force k-NN on the edited case-base from the CRR algorithm
    eset,results_adv1.loc[2,"Algorithm time"]=crr2(X_train,y_train) #Calling the crr2 function to form the edited case-base
    X_crr=eset.iloc[:, :-1].values
    y_crr=eset["Class"].values
    classifier = KNeighborsClassifier(n_neighbors=5, algorithm="brute", metric=DTW)
    classifier.fit(X_crr, y_crr)
    tmp=0
    from sklearn.metrics import accuracy_score
    start = time.perf_counter()
    y_pred = classifier.predict(X_test)
    end = time.perf_counter()
    tmp=accuracy_score(y_test,y_pred)
    #Recording the results
    results_adv1.loc[2,"Brute force accuracy"]=tmp
    results_adv1.loc[2,"Brute force time"]=(end-start)
    results_adv1.loc[2,"Dataset Size"]=X_crr.shape[0]
    #Recording the representation of classes in the edited case base, and checking for an imbalanced dataset
    test=pd.DataFrame()
    test["Class"]=y_crr
    p2=test[test["Class"] == '1'].shape[0]
    q2=test[test["Class"] == '2'].shape[0]
    
    #Scaling the results w.r.t Brute Force k-NN
    results_adv1_chart['Time wrt BF']=[results_adv1['Brute force time'][0]/results_adv1['Brute force time'][0],results_adv1['Brute force time'][1]/results_adv1['Brute force time'][0],results_adv1['Brute force time'][2]/results_adv1['Brute force time'][0]]
    results_adv1_chart['Accuracy wrt BF']=[results_adv1['Brute force accuracy'][0]/results_adv1['Brute force accuracy'][0],results_adv1['Brute force accuracy'][1]/results_adv1['Brute force accuracy'][0],results_adv1['Brute force accuracy'][2]/results_adv1['Brute force accuracy'][0]]
    results_adv1_chart['Dataset size wrt BF']=[results_adv1['Dataset Size'][0]/results_adv1['Dataset Size'][0],results_adv1['Dataset Size'][1]/results_adv1['Dataset Size'][0],results_adv1['Dataset Size'][2]/results_adv1['Dataset Size'][0]]
    return results_adv1,results_adv1_chart,X_cnn.shape,X_crr.shape,p,q,p2,q2