In [246]:
import pandas as pd
import numpy as np


#preprocessing glass data
glass_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data', header=None)
glass_data.columns = ['Id', 'RI', 'Na', 'Mg','Al', 'Si', 'K', 'Ca','Ba', 'Fe','Class']
#removes ID and replaces ? with NaN to check for missing values and there are none
glass_data = glass_data.drop(['Id'],axis=1)
glass_data = glass_data.replace('?',np.NaN)

#preprocessing segmentation data
segmentation_data = pd.read_csv('/Users/reza/Downloads/segmentation.data', error_bad_lines=False,sep=',')
segmentation_data.columns = ['Class','region-centroid-col', 'region-centroid-row', 'region-pixel-count', 'short-line-density-5','short-line-density-2', 'vedge-mean', 'vegde-sd', 'hedge-mean','hedge-sd', 'intensity-mean','rawred-mean','rawblue-mean','rawgreen-mean','exred-mean','exblue-mean','exgreen-mean','value-mean','saturatoin-mean','hue-mean']
segmentation_data = segmentation_data.replace('?',np.NaN)
class_col= segmentation_data.pop('Class')
segmentation_data.insert(19,"Class",class_col)
dataset = segmentation_data.values.tolist()

#converts the string class name column to numerical values
class_values = [row[19] for row in dataset]
unique = set(class_values)
lookup = dict()
for i, value in enumerate(unique):
    lookup[value] = i
for row in dataset:
    row[19] = lookup[row[19]]
segmentation_data=pd.DataFrame(dataset)

#preprocessing vote data
vote_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/voting-records/house-votes-84.data', error_bad_lines=False,sep=',',header=None)
vote_data.columns = ['Class Name','handicapped-infants','water-project-cost-sharing','adoption-of-the-budget-resolution','physician-fee-freeze',
                    'el-salvador-aid','religious-groups-in-schools','anti-satellite-test-ban','aid-to-nicaraguan-contras','mx-missile',
                    'immigration','synfuels-corporation-cutback','education-spending','superfund-right-to-sue','crime','duty-free-exports','export-administration-act-south-africa']

#y is changed to 1 and n is changed to 0 to make the data numerical
ynmap = {'y':1,'n':0,'?':np.nan}
partymap = {'republican':0,'democrat':1}
vote_data['republican'] = vote_data['Class Name'].map(partymap)
vote_data.drop('Class Name',axis=1,inplace=True)
for column in vote_data.columns.drop('republican'):
    vote_data[column+'1'] = vote_data[column].map(ynmap)
    vote_data.drop(column,axis=1,inplace=True)
partymap = {'republican':1,'democrat':0}
data_col = vote_data.columns


class_col= vote_data.pop('republican')
vote_data.insert(16,"class",class_col)

#preprocessing abalone data
abalone_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data', header=None, sep=',')
abalone_data.columns = ['Sex', 'Length', 'Diameter','Height', 'Whole weight', 'Shucked weight', 'Viscera weight','Shell weight', 'Rings']
abalone_data = abalone_data.replace('?',np.NaN)
dataset = abalone_data.values.tolist()
#converts the string class name column to numerical values
class_values = [row[0] for row in dataset]
unique = set(class_values)
lookup = dict()
for i, value in enumerate(unique):
    lookup[value] = i
for row in dataset:
    row[0] = lookup[row[0]]
abalone_data=pd.DataFrame(dataset)

#preprocessing computer hardware data
cpu_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/cpu-performance/machine.data', header=None, sep=',')
cpu_data.columns = ['vendor name', 'Model Name', 'MYCT','MMIN', 'MMAX', 'CACH', 'CHMIN','CHMAX', 'PRP','ERP']
#deleted_field = cpu_data.pop('ERP')
cpu_data = cpu_data.replace('?',np.NaN)
dataset = cpu_data.values.tolist()
#converts the string class name column to numerical values
class_values = [row[0] for row in dataset]
unique = set(class_values)
lookup = dict()
for i, value in enumerate(unique):
    lookup[value] = i
for row in dataset:
    row[0] = lookup[row[0]]
cpu_data=pd.DataFrame(dataset)
dataset = cpu_data.values.tolist()
class_values = [row[1] for row in dataset]
unique = set(class_values)
lookup = dict()
for i, value in enumerate(unique):
    lookup[value] = i
for row in dataset:
    row[1] = lookup[row[1]]
cpu_data=pd.DataFrame(dataset)


#preprocessing forest fires data
ff_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/forest-fires/forestfires.csv', header=None, sep=',')
ff_data.columns = ['X', 'Y', 'month','day', 'FFMC', 'DMC', 'DC','ISI', 'temp','RH','wind','rain','area burned']
ff_data = ff_data.replace('?',np.NaN)


new_header = ff_data.iloc[0] #grab the first row for the header
ff_data = ff_data[1:] #take the data less the header row
ff_data.columns = new_header #set the header row as the df header
dataset = ff_data.values.tolist()
#converts the string class name column to numerical values
class_values = [row[2] for row in dataset]
unique = set(class_values)
lookup = dict()
for i, value in enumerate(unique):
    lookup[value] = i
for row in dataset:
    row[2] = lookup[row[2]]
class_values = [row[3] for row in dataset]
unique = set(class_values)
lookup = dict()
for i, value in enumerate(unique):
    lookup[value] = i
for row in dataset:
    row[3] = lookup[row[3]]
ff_data=pd.DataFrame(dataset)


In [88]:
import numpy as np 
 
class FiveFoldStratCv:
 
    #Constructor
    #problemType: 'r' for regression and 'c' for classification 
    def __init__(self, np_dataset, problemType):
        self.__np_dataset = np_dataset
        self.__problemType = problemType
   
    #Returns: 
    #Fold0, fold1, fold2, fold3, fold4
    #Five folds 
    def get_five_folds(self):
 
        #Dimensions of the dataset the data set
        num_columns = np.size(self.__np_dataset,1)
        num_rows = np.size(self.__np_dataset,0)
 
        #Create empty folds and shuffle data
        fold0 = np.arange(1)
        fold1 = np.arange(1)
        fold2 = np.arange(1)
        fold3 = np.arange(1)
        fold4 = np.arange(1)
        np.random.shuffle(self.__np_dataset)
 
        #for classification problem
        if self.__problemType == "c":
            #Record the column of the Actual Class
            actual_class = num_columns - 1
            #Array containing the unique Actual Class values
            unique_class = np.unique(self.__np_dataset[:,actual_class])
            unique_class_size = unique_class.size
 
            # For each unique class in the unique Actual Class array
            for unique_class_idx in range(0, unique_class_size):
                # Initialize the count to 0
                count = 0
                # Iterating through each row of the data
                for row in range(0, num_rows):
                    #If the unique class and actual class are the same
                    if unique_class[unique_class_idx] == (self.__np_dataset[row,actual_class]):
                            if count == 0:
                                #If fold has not yet been created
                                if np.size(fold0) == 1:
                                    fold0 = self.__np_dataset[row,:]
                                    count += 1
                                else:
                                    # Append this instance to the fold
                                    new_row = self.__np_dataset[row,:]
                                    fold0 = np.vstack([fold0,new_row])
                                    count += 1
                            #fold1
                            elif count == 1:
                                if np.size(fold1) == 1:
                                    fold1 = self.__np_dataset[row,:]
                                    count += 1
                                else:
                                    new_row = self.__np_dataset[row,:]
                                    fold1 = np.vstack([fold1,new_row])
                                    count += 1
                            #fold2
                            elif count == 2:
                                if np.size(fold2) == 1:
                                    fold2 = self.__np_dataset[row,:]
                                    count += 1
                                else:
                                    new_row = self.__np_dataset[row,:]
                                    fold2 = np.vstack([fold2,new_row])
                                    count += 1
                            #fold3
                            elif count == 3:
                                if np.size(fold3) == 1:
                                    fold3 = self.__np_dataset[row,:]
                                    count += 1
                                else:
                                    new_row = self.__np_dataset[row,:]
                                    fold3 = np.vstack([fold3,new_row])
                                    count += 1
                            #fold4
                            else:
                                if np.size(fold4) == 1:
                                    fold4 = self.__np_dataset[row,:]
                                    count = 0
                                else:
                                    new_row = self.__np_dataset[row,:]
                                    fold4 = np.vstack([fold4,new_row])
                                    count = 0
 
        #for regression problem
        else:
            # Record the column of the Stratification Bin
            strat_bin = num_columns - 1
 
            #array containing the unique strat bin values
            unique_bin = np.unique(self.__np_dataset[:,strat_bin])
            unique_bin_size = unique_bin.size
 
            #For each unique bin in the unique Stratification Bin array
            for unique_bin_idx in range(0, unique_bin_size):
                #Initialize the count to 0
                count = 0
                #iterate through each row
                for row in range(0, num_rows):
                    #If the actual bin and unique bin are the same
                    if unique_bin[unique_bin_idx] == (self.__np_dataset[row,strat_bin]):
                            #fold0
                            if count == 0:
                                # If fold has not yet been created
                                if np.size(fold0) == 1:
                                    fold0 = self.__np_dataset[row,:]
                                    count += 1
                                else:
                                    #Append this instance to the fold                        
                                    new_row = self.__np_dataset[row,:]
                                    fold0 = np.vstack([fold0,new_row])
                                    count += 1
                            #fold1
                            elif count == 1:
                                if np.size(fold1) == 1:
                                    fold1 = self.__np_dataset[row,:]
                                    count += 1
                                else:
                                    new_row = self.__np_dataset[row,:]
                                    fold1 = np.vstack([fold1,new_row])
                                    count += 1
                            #fold2
                            elif count == 2:
                                if np.size(fold2) == 1:
                                    fold2 = self.__np_dataset[row,:]
                                    count += 1
                                else:
                                    new_row = self.__np_dataset[row,:]
                                    fold2 = np.vstack([fold2,new_row])
                                    count += 1
                            #fold3
                            elif count == 3:
                                if np.size(fold3) == 1:
                                    fold3 = self.__np_dataset[row,:]
                                    count += 1
                                else:
                                    new_row = self.__np_dataset[row,:]
                                    fold3 = np.vstack([fold3,new_row])
                                    count += 1                            
                            #fold4
                            else:
                                if np.size(fold4) == 1:
                                    fold4 = self.__np_dataset[row,:]
                                    count = 0
                                else:
                                    new_row = self.__np_dataset[row,:]
                                    fold4 = np.vstack([fold4,new_row])
                                    count = 0
         
        return fold0, fold1, fold2, fold3, fold4

In [239]:
import math
class Knn:
    global distance
    def __init__(self, k, problemType):
        self.__k = k
        self.__problemType = problemType
 

    #The k most similar neighbors from the training set for a given test point. 
    def get_neighbors(self, training_set, test_point): 
        #dimensions of points in the training set
        num_training_points = np.size(training_set,0)
        num_training_columns = np.size(training_set,1)
 
        #Record the column index of the actual class of the training_set
        actual_class = None
        #If classification problem
        if self.__problemType == "c":
            actual_class = num_training_columns - 1
        #If regression problem
        else:
            actual_class = num_training_columns - 1
 
        #2D array for the result in the end
        actual_class_and_distance = np.zeros((num_training_points, 2))
        neighbors = None
 
        #For each row (training point) in the training set
        for row in range(0, num_training_points):
 
            #Record the actual class value and initialize temp values
            actual_class_and_distance[row,0] = training_set[row,actual_class]
            temp_training_point = np.copy(training_set[row,:])
            temp_test_point = np.copy(test_point)
 
            #If classification problem
            if self.__problemType == "c":           
                #Update temporary points
                temp_training_point = np.delete(temp_training_point,[0,actual_class])
                temp_test_point = np.delete(temp_test_point,[0,actual_class])
 
            # If this is a regression problem
            else:
                #Update temporary points
                temp_training_point = np.delete(temp_training_point,[0,actual_class,actual_class+1])
                temp_test_point = np.delete(temp_test_point,[0,actual_class,actual_class+1])
 
            #Calculate the euclidean distance and record it
            distance = np.linalg.norm(temp_test_point.astype(np.float) - temp_training_point.astype(np.float))
            actual_class_and_distance[row,1] = distance
 
        # Sort the actual_class_and_distance 2D array by distance
        actual_class_and_distance = actual_class_and_distance[actual_class_and_distance[:,1].argsort()]
        k = self.__k
        neighbors = actual_class_and_distance[:k,:]
        return neighbors
    #Used to make predictions mainly for classification problems. The Gaussian Kernel for regression is used in another method
    def make_prediction(self, neighbors): 
        prediction = None
        #If classification problem
        if self.__problemType == "c":
            neighborsint = neighbors.astype(int)
            prediction = np.bincount(neighborsint).argmax()             
        #If regression problem
        else:
            prediction = np.mean(neighbors)
        return prediction

    #Either classification accuracy (for classification problems) or 
    #mean squared error (for regression problems)
    def get_accuracy(self, actual_class_array, predicted_class_array):
 
        #Initialize variables
        accuracy = None
        decision = None
        counter = None
        actual_class_array_size = actual_class_array.size
 
        # If classification problem
        if self.__problemType == "c":             
            counter = 0
            #For each element in the actual class array
            for row in range(0,actual_class_array_size):
                #retuirns whether the prediction is correct or not
                if actual_class_array[row] == predicted_class_array[row]:
                    decision = "correct"
                    counter += 1
                else:
                    decision = "incorrect"
            #accuracy calculation
            classification_accuracy = counter/(actual_class_array_size)
            accuracy = classification_accuracy
 
        #If regression problem
        else:
            squared_error_array = np.empty(actual_class_array_size)
            squared_error = None
 
            #For each element in the actual class array
            for row in range(0,actual_class_array_size):
                # Calculate the squared error
                squared_error = (abs((actual_class_array[row] - predicted_class_array[row]))) 
                squared_error *= squared_error
                squared_error_array[row] = squared_error
 
            #takes average of the squared error array as accuracy
            mean_squared_error = np.mean(squared_error_array)             
            accuracy = mean_squared_error
         
        return accuracy

In [195]:
class EditedKnn:

    def __init__(self, training_set, problemType="c"):
        self.__training_set = training_set
        self.__problemType = problemType

    def get_trainingset(self):
         
        # Initialize a edited set
        edited_training_set = np.copy(self.__training_set)
        #dimensions of the training points
        num_training_points = np.size(edited_training_set,0)
        num_training_columns = np.size(edited_training_set,1)
 
        print("\nBefore editting: " + str(num_training_points) + " training points\n")

        actual_class = num_training_columns - 1
 
        #Initialize an array named z
        z = np.copy(edited_training_set[0,:])

        new_row = np.copy(edited_training_set[0,:])
        z = np.vstack([z,new_row])
        row = 1
        while row < num_training_points:
            #Record the actual class value
            actual_class_value = edited_training_set[row,actual_class]
            #Knn
            knn1 = Knn(1,self.__problemType) 
            #nearest neighbor for each point
            this_point = edited_training_set[row,:]
            neighbor_array = knn1.get_neighbors(z,this_point)
            #prediction from neighbors
            neighbors_arr = neighbor_array[:,0]
            prediction = knn1.make_prediction(neighbors_arr)
            #Check Prediction
            if actual_class_value == prediction:
                new_row = np.copy(this_point)
                z = np.vstack([z,new_row])
                edited_training_set = np.delete(edited_training_set, row, 0)
                num_training_points -= 1      
            row += 1
            
        zero_transfers_to_z = False
        zero_points_left = None
   
        # Update the number of points in the edited training_set
        num_training_points = np.size(edited_training_set,0)   
        if num_training_points > 0:
            zero_points_left = False
        else:
            zero_points_left = True
        while not(zero_transfers_to_z) and not(zero_points_left):
            transfers_made = 0
            row = 0
            while row < num_training_points:
                #Record the actual class value
                actual_class_value = edited_training_set[row,actual_class]
                #Knn
                knn1 = Knn(1,self.__problemType) 
                #nearest neighbor for each point
                this_point = edited_training_set[row,:]
                neighbor_array = knn1.get_neighbors(z,this_point)
                #prediction from neighbors
                neighbors_arr = neighbor_array[:,0]
                prediction = knn1.make_prediction(neighbors_arr)
                #Check Prediction
                if actual_class_value == prediction:
                    new_row = np.copy(this_point)
                    z = np.vstack([z,new_row])
                    edited_training_set = np.delete(edited_training_set, row, 0)
                    num_training_points -= 1      
                row += 1
         
            #Update the number of points
            num_training_points = np.size(edited_training_set,0)
            if num_training_points > 0:
                zero_points_left = False
            else:
                zero_points_left = True
            if transfers_made > 0:
                zero_transfers_to_z = False
            else: 
                zero_transfers_to_z = True
 
        #Delete row 0 from the z
        z = np.delete(z,0,0)
        # Print the final number of points in the z
        print("After editting: " + str(np.size(z,0)) + " training points\n")
        return z

In [196]:
class CondensedKnn:

    def __init__(self, training_set, problemType="c"):
        self.__training_set = training_set
        self.__problemType = problemType

    def get_trainingset(self):
         
        # Initialize a Condensed set
        edited_training_set = np.copy(self.__training_set)
        #dimensions of the training points
        num_training_points = np.size(edited_training_set,0)
        num_training_columns = np.size(edited_training_set,1)
 
        print("\nBefore condensing: " + str(num_training_points) + " training points\n")

        actual_class = num_training_columns - 1
 
        #Initialize an array named z
        z = np.copy(edited_training_set[0,:])

        new_row = np.copy(edited_training_set[0,:])
        z = np.vstack([z,new_row])
        row = 1
        while row < num_training_points:
            #Record the actual class value
            actual_class_value = edited_training_set[row,actual_class]
            #Knn
            knn1 = Knn(1,self.__problemType) 
            #nearest neighbor for each point
            this_point = edited_training_set[row,:]
            neighbor_array = knn1.get_neighbors(z,this_point)
            #prediction from neighbors
            neighbors_arr = neighbor_array[:,0]
            prediction = knn1.make_prediction(neighbors_arr)
            #Check Prediction, if it doesnt match, add that point to z
            if actual_class_value != prediction:
                new_row = np.copy(this_point)
                z = np.vstack([z,new_row])
                edited_training_set = np.delete(edited_training_set, row, 0)
                num_training_points -= 1      
            row += 1
            
        zero_transfers_to_z = False
        zero_points_left = None
   
        # Update the number of points in the edited training_set
        num_training_points = np.size(edited_training_set,0)   
        if num_training_points > 0:
            zero_points_left = False
        else:
            zero_points_left = True
        while not(zero_transfers_to_z) and not(zero_points_left):
            transfers_made = 0
            row = 0
            while row < num_training_points:
                #Record the actual class value
                actual_class_value = edited_training_set[row,actual_class]
                #Knn
                knn1 = Knn(1,self.__problemType) 
                #nearest neighbor for each point
                this_point = edited_training_set[row,:]
                neighbor_array = knn1.get_neighbors(z,this_point)
                #prediction from neighbors
                neighbors_arr = neighbor_array[:,0]
                prediction = knn1.make_prediction(neighbors_arr)
                #Check Prediction, if it does not match, add that point to z
                if actual_class_value != prediction:
                    new_row = np.copy(this_point)
                    z = np.vstack([z,new_row])
                    edited_training_set = np.delete(edited_training_set, row, 0)
                    num_training_points -= 1      
                row += 1
         
            #Update the number of points
            num_training_points = np.size(edited_training_set,0)
            if num_training_points > 0:
                zero_points_left = False
            else:
                zero_points_left = True
            if transfers_made > 0:
                zero_transfers_to_z = False
            else: 
                zero_transfers_to_z = True
 
        #Delete row 0 from the z
        z = np.delete(z,0,0)
        # Print the final number of points in the z
        print("After condensing: " + str(np.size(z,0)) + " training points\n")
        return z

In [264]:
import pandas as pd
import numpy as np
def implementAlgo(k,problemType,data,algo):
    global accuracy
    # "c" for classification or "r" for regression
    problem = problemType
    pd_data_set = data
    #Convert dataframe into a Numpy array
    np_data_set = pd_data_set.to_numpy(copy=True)
    #Create fivefolds object
    fivefolds1 = FiveFoldStratCv(np_data_set,problem)

    #number of folds
    num_folds = 5
    knn1 = Knn(k,problem) 

    #create 5 folds
    fold0, fold1, fold2, fold3, fold4 = fivefolds1.get_five_folds()
    ##FOR DEMO
    print("size of fold0 is")
    print(fold0.size)
    print("size of fold1 is")
    print(fold1.size)
    print("size of fold2 is")
    print(fold2.size)
    print("size of fold3 is")
    print(fold3.size)
    print("size of fold4 is")
    print(fold4.size)
    
    training_dataset = None
    test_dataset = None

    #initialize array of accuracy stats
    accuracy_statistics = np.zeros(num_folds)

    if algo=="knn":
        algo_name = "K-Nearest Neighbor"   
        #5 different splits depending on fold
        for testfold in range(0, num_folds):
            #Each fold will have a chance to be the test data set
            if testfold == 0:
                test_dataset = fold0
                training_dataset = np.concatenate((fold1, fold2, fold3, fold4), axis=0)
            elif testfold == 1:
                test_dataset = fold1
                training_dataset = np.concatenate((fold0, fold2, fold3, fold4), axis=0)
            elif testfold == 2:
                test_dataset = fold2
                training_dataset = np.concatenate((fold0, fold1, fold3, fold4), axis=0)
            elif testfold == 3:
                test_dataset = fold3
                training_dataset = np.concatenate((fold0, fold1, fold2, fold4), axis=0)
            else:
                test_dataset = fold4
                training_dataset = np.concatenate((fold0, fold1, fold2, fold3), axis=0)

            
            actual_class = None          

            #If classification problem
            if problem == "c":
                actual_class = np.size(test_dataset,1) - 1
            #If regression problem
            else:
                actual_class = np.size(test_dataset,1) - 1

            #array of class values for test points
            actual_class_values = test_dataset[:,actual_class]
            num_test_points = np.size(test_dataset,0)

            predicted_class_values = np.zeros(num_test_points)
            #weights = np.zeros(num_test_points)

            #iterate through each row
            for row in range(0, num_test_points):
                this_point = test_dataset[row,:]
                neighbor_array = knn1.get_neighbors(training_dataset,this_point)
                if row==0:
                    print("test point:")
                    print(this_point)
                    print("Neighbors:")
                    print(neighbor_array)
                neighbors_arr = neighbor_array[:,0]

                prediction=0
                weights=np.zeros(len(neighbor_array[:,1]))
                #For classification, employ a plurality vote to determine the class
                #For regression, apply a Gaussian kernel to make your prediction
                if problem == "c":
                    prediction = knn1.make_prediction(neighbors_arr)
                elif problem == "r":
                    mean = knn1.make_prediction(neighbors_arr)
                    for i in range(0,np.size(neighbor_array[:,1])):
                        weights[i] = math.exp((-1/(2*(.5)))*neighbor_array[i,1])
                        prediction = prediction + np.mean(neighbors_arr)*weights[i]
                #Record the prediction in the predicted_class_values array
                predicted_class_values[row] = prediction
   
            #Calculate accuracy
            accuracy = knn1.get_accuracy(actual_class_values,predicted_class_values)
            accuracy_statistics[testfold] = accuracy

    elif algo=="cknn":
        algo_name = "Condensed K-Nearest Neighbor"        
        for testfold in range(0, num_folds):
            # Each fold will have a chance to be the test data set
            if testfold == 0:
                test_dataset = fold0
                training_dataset = np.concatenate((fold1, fold2, fold3, fold4), axis=0)
            elif testfold == 1:
                test_dataset = fold1
                training_dataset = np.concatenate((fold0, fold2, fold3, fold4), axis=0)
            elif testfold == 2:
                test_dataset = fold2
                training_dataset = np.concatenate((fold0, fold1, fold3, fold4), axis=0)
            elif testfold == 3:
                test_dataset = fold3
                training_dataset = np.concatenate((fold0, fold1, fold2, fold4), axis=0)
            else:
                test_dataset = fold4
                training_dataset = np.concatenate((fold0, fold1, fold2, fold3), axis=0)

            #class CondensedKnn
            cknn1 = CondensedKnn(training_dataset,problem)

            #get condensed trainingset
            training_dataset = cknn1.get_trainingset()

            actual_class = np.size(test_dataset,1) - 1
            actual_class_values = test_dataset[:,actual_class]

            num_test_points = np.size(test_dataset,0)
            predicted_class_values = np.zeros(num_test_points)

            #For each row in the test data
            for row in range(0, num_test_points):
                #get neighbors of each point
                this_point = test_dataset[row,:]
                neighbor_array = knn1.get_neighbors(training_dataset,this_point)
                #actual class values
                neighbors_arr = neighbor_array[:,0]
                #make prediction
                prediction = knn1.make_prediction(neighbors_arr)
                #Record the prediction
                predicted_class_values[row] = prediction

            #Calculate accuracy
            accuracy = knn1.get_accuracy(actual_class_values,predicted_class_values)
            accuracy_statistics[testfold] = accuracy
            print("Classification Accuracy: " + str(accuracy * 100) + "%\n")
    elif algo=="eknn":
        algo_name = "Edited K-Nearest Neighbor"
        for testfold in range(0, num_folds):
            # Each fold will have a chance to be the test data set
            if testfold == 0:
                test_dataset = fold0
                training_dataset = np.concatenate((
                    fold1, fold2, fold3, fold4), axis=0)
            elif testfold == 1:
                test_dataset = fold1
                training_dataset = np.concatenate((
                    fold0, fold2, fold3, fold4), axis=0)
            elif testfold == 2:
                test_dataset = fold2
                training_dataset = np.concatenate((
                    fold0, fold1, fold3, fold4), axis=0)
            elif testfold == 3:
                test_dataset = fold3
                training_dataset = np.concatenate((
                    fold0, fold1, fold2, fold4), axis=0)
            else:
                test_dataset = fold4
                training_dataset = np.concatenate((
                    fold0, fold1, fold2, fold3), axis=0)

            #class EditedKnn
            eknn1 = EditedKnn(training_dataset,problem)

            #get condensed trainingset
            training_dataset = eknn1.get_trainingset()

            actual_class = np.size(test_dataset,1) - 1
            actual_class_values = test_dataset[:,actual_class]

            num_test_points = np.size(test_dataset,0)
            predicted_class_values = np.zeros(num_test_points)

            #For each row in the test data
            for row in range(0, num_test_points):
                #get neighbors of each point
                this_point = test_dataset[row,:]
                neighbor_array = knn1.get_neighbors(training_dataset,this_point)
                #actual class values
                neighbors_arr = neighbor_array[:,0]
                #make prediction
                prediction = knn1.make_prediction(neighbors_arr)
                #Record the prediction
                predicted_class_values[row] = prediction

            #Calculate accuracy
            accuracy = knn1.get_accuracy(actual_class_values,predicted_class_values)
            accuracy_statistics[testfold] = accuracy
            print("Classification Accuracy: " + str(accuracy * 100) + "%\n")
 
    accuracy = np.mean(accuracy_statistics)
    accuracy *= 100
    
    
    print("----------------------------------------------------------")
    print("Algorithm name: " + algo_name)
    print("Accuracy Statistics for All 5 testfolds:")
    print(accuracy_statistics)
   
 
    if problem == "c":
        print("Problem Type: Classification")
    else:
        print("Problem Type: Regression")
 
    print()
    print("Value for k : " + str(k))
    print()
    if problem == "c":
        print("Classification Accuracy : " + str(accuracy) + "%")
    else: 
        print("Mean Squared Error : " + str(accuracy/100))
    print("----------------------------------------------------------")
 


In [244]:
#Tune K and find best accuracy for knn and Vote data 
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"c",vote_data,"knn")
    if bestAccuracy<accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.57954545 0.625      0.62068966 0.62790698 0.72093023]
Problem Type : Classification

Value for k : 1

Classification Accuracy : 63.481446380403874%
----------------------------------------------------------
----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.61363636 0.60227273 0.65517241 0.65116279 0.6744186 ]
Problem Type : Classification

Value for k : 2

Classification Accuracy : 63.93325800102063%
----------------------------------------------------------
----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.85227273 0.82954545 0.81609195 0.81395349 0.80232558]
Problem Type : Classification

Value for k : 3

Classification Accuracy : 82.28378411217226%
--------------------

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.85227273 0.80681818 0.75862069 0.80232558 0.86046512]
Problem Type : Classification

Value for k : 25

Classification Accuracy : 81.61004592841002%
----------------------------------------------------------
----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.80681818 0.78409091 0.79310345 0.84883721 0.8255814 ]
Problem Type : Classification

Value for k : 26

Classification Accuracy : 81.16862287672231%
----------------------------------------------------------
----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.85227273 0.71590909 0.83908046 0.80232558 0.84883721]
Problem Type : Classification

Value for k : 27

Classification Accuracy : 81.16850137299215%
------------------

In [97]:
#Tune K and find best accuracy for cknn and Vote data
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"c",vote_data,"cknn")
    if bestAccuracy<accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))


Before condensing: 347 training instances

After condensing: 146 training instances

Classification Accuracy: 59.09090909090909%


Before condensing: 347 training instances

After condensing: 154 training instances

Classification Accuracy: 62.5%


Before condensing: 348 training instances

After condensing: 152 training instances

Classification Accuracy: 65.51724137931035%


Before condensing: 349 training instances

After condensing: 144 training instances

Classification Accuracy: 61.627906976744185%


Before condensing: 349 training instances

After condensing: 132 training instances

Classification Accuracy: 61.627906976744185%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.59090909 0.625      0.65517241 0.61627907 0.61627907]
Problem Type : Classification

Value for k : 1

Classification Accuracy : 62.07279288474157%

Before condensing: 347 training instances

After condensi

Classification Accuracy: 80.23255813953489%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.875      0.80681818 0.8045977  0.8372093  0.80232558]
Problem Type : Classification

Value for k : 9

Classification Accuracy : 82.51901533377074%

Before condensing: 347 training instances

After condensing: 144 training instances

Classification Accuracy: 86.36363636363636%


Before condensing: 347 training instances

After condensing: 160 training instances

Classification Accuracy: 82.95454545454545%


Before condensing: 348 training instances

After condensing: 146 training instances

Classification Accuracy: 75.86206896551724%


Before condensing: 349 training instances

After condensing: 161 training instances

Classification Accuracy: 81.3953488372093%


Before condensing: 349 training instances

After condensing: 145 training instances

Classification Accuracy: 81.3953488372093%


---

After condensing: 162 training instances

Classification Accuracy: 82.75862068965517%


Before condensing: 349 training instances

After condensing: 148 training instances

Classification Accuracy: 74.4186046511628%


Before condensing: 349 training instances

After condensing: 152 training instances

Classification Accuracy: 86.04651162790698%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.82954545 0.81818182 0.82758621 0.74418605 0.86046512]
Problem Type : Classification

Value for k : 18

Classification Accuracy : 81.59929284829045%

Before condensing: 347 training instances

After condensing: 144 training instances

Classification Accuracy: 76.13636363636364%


Before condensing: 347 training instances

After condensing: 159 training instances

Classification Accuracy: 88.63636363636364%


Before condensing: 348 training instances

After condensing: 147 training instances

Class

After condensing: 162 training instances

Classification Accuracy: 79.54545454545455%


Before condensing: 347 training instances

After condensing: 147 training instances

Classification Accuracy: 61.36363636363637%


Before condensing: 348 training instances

After condensing: 154 training instances

Classification Accuracy: 73.5632183908046%


Before condensing: 349 training instances

After condensing: 156 training instances

Classification Accuracy: 82.55813953488372%


Before condensing: 349 training instances

After condensing: 145 training instances

Classification Accuracy: 74.4186046511628%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.79545455 0.61363636 0.73563218 0.8255814  0.74418605]
Problem Type : Classification

Value for k : 27

Classification Accuracy : 74.28981069718841%

Before condensing: 347 training instances

After condensing: 148 training instances

Classi

In [135]:
#Tune K and find best accuracy for eknn and Vote data
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"c",vote_data,"eknn")
    if bestAccuracy<accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))


Before editting: 347 training instances

After editting: 213 training instances

Classification Accuracy: 62.5%


Before editting: 347 training instances

After editting: 209 training instances

Classification Accuracy: 54.54545454545454%


Before editting: 348 training instances

After editting: 210 training instances

Classification Accuracy: 64.36781609195403%


Before editting: 349 training instances

After editting: 207 training instances

Classification Accuracy: 62.7906976744186%


Before editting: 349 training instances

After editting: 203 training instances

Classification Accuracy: 60.46511627906976%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.625      0.54545455 0.64367816 0.62790698 0.60465116]
Problem Type : Classification

Value for k : 1

Classification Accuracy : 60.93381691817938%

Before editting: 347 training instances

After editting: 204 training instances

Cl

After editting: 212 training instances

Classification Accuracy: 80.68181818181817%


Before editting: 347 training instances

After editting: 195 training instances

Classification Accuracy: 78.4090909090909%


Before editting: 348 training instances

After editting: 201 training instances

Classification Accuracy: 73.5632183908046%


Before editting: 349 training instances

After editting: 210 training instances

Classification Accuracy: 79.06976744186046%


Before editting: 349 training instances

After editting: 212 training instances

Classification Accuracy: 83.72093023255815%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.80681818 0.78409091 0.73563218 0.79069767 0.8372093 ]
Problem Type : Classification

Value for k : 10

Classification Accuracy : 79.08896503122646%

Before editting: 347 training instances

After editting: 202 training instances

Classification Accuracy: 82.954

After editting: 209 training instances

Classification Accuracy: 84.0909090909091%


Before editting: 347 training instances

After editting: 214 training instances

Classification Accuracy: 78.4090909090909%


Before editting: 348 training instances

After editting: 216 training instances

Classification Accuracy: 80.45977011494253%


Before editting: 349 training instances

After editting: 215 training instances

Classification Accuracy: 83.72093023255815%


Before editting: 349 training instances

After editting: 206 training instances

Classification Accuracy: 72.09302325581395%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.84090909 0.78409091 0.8045977  0.8372093  0.72093023]
Problem Type : Classification

Value for k : 19

Classification Accuracy : 79.75474472066293%

Before editting: 347 training instances

After editting: 203 training instances

Classification Accuracy: 62.5%


After editting: 211 training instances

Classification Accuracy: 78.4090909090909%


Before editting: 347 training instances

After editting: 208 training instances

Classification Accuracy: 68.18181818181817%


Before editting: 348 training instances

After editting: 203 training instances

Classification Accuracy: 80.45977011494253%


Before editting: 349 training instances

After editting: 211 training instances

Classification Accuracy: 82.55813953488372%


Before editting: 349 training instances

After editting: 206 training instances

Classification Accuracy: 77.90697674418605%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.78409091 0.68181818 0.8045977  0.8255814  0.77906977]
Problem Type : Classification

Value for k : 28

Classification Accuracy : 77.50315909698429%

Before editting: 347 training instances

After editting: 209 training instances

Classification Accuracy: 85.22

In [98]:
#Tune K and find best accuracy for knn and glass data
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"c",glass_data,"knn")
    if bestAccuracy<accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))


----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.75555556 0.68181818 0.81395349 0.69047619 0.775     ]
Problem Type : Classification

Value for k : 1

Classification Accuracy : 74.3360683244404%

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.6        0.68181818 0.62790698 0.66666667 0.7       ]
Problem Type : Classification

Value for k : 2

Classification Accuracy : 65.52783650458069%

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.68888889 0.70454545 0.58139535 0.73809524 0.725     ]
Problem Type : Classification

Value for k : 3

Classification Accuracy : 68.75849860733581%

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:


----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.66666667 0.52272727 0.65116279 0.52380952 0.625     ]
Problem Type : Classification

Value for k : 30

Classification Accuracy : 59.78732507802276%
The Best Accuracy was: 74.3360683244404
The Best K was: 1


In [99]:
#Tune K and find best accuracy for cknn and glass data
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"c",glass_data,"cknn")
    if bestAccuracy<accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))


Before condensing: 169 training instances

After condensing: 81 training instances

Classification Accuracy: 68.88888888888889%


Before condensing: 170 training instances

After condensing: 87 training instances

Classification Accuracy: 68.18181818181817%


Before condensing: 171 training instances

After condensing: 88 training instances

Classification Accuracy: 79.06976744186046%


Before condensing: 172 training instances

After condensing: 86 training instances

Classification Accuracy: 71.42857142857143%


Before condensing: 174 training instances

After condensing: 81 training instances

Classification Accuracy: 57.49999999999999%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.68888889 0.68181818 0.79069767 0.71428571 0.575     ]
Problem Type : Classification

Value for k : 1

Classification Accuracy : 69.0138091882278%

Before condensing: 169 training instances

After con

After condensing: 81 training instances

Classification Accuracy: 51.11111111111111%


Before condensing: 170 training instances

After condensing: 81 training instances

Classification Accuracy: 50.0%


Before condensing: 171 training instances

After condensing: 80 training instances

Classification Accuracy: 53.48837209302325%


Before condensing: 172 training instances

After condensing: 81 training instances

Classification Accuracy: 50.0%


Before condensing: 174 training instances

After condensing: 88 training instances

Classification Accuracy: 55.00000000000001%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.51111111 0.5        0.53488372 0.5        0.55      ]
Problem Type : Classification

Value for k : 10

Classification Accuracy : 51.91989664082687%

Before condensing: 169 training instances

After condensing: 85 training instances

Classification Accuracy: 46.66666666

After condensing: 84 training instances

Classification Accuracy: 40.0%


Before condensing: 170 training instances

After condensing: 76 training instances

Classification Accuracy: 47.72727272727273%


Before condensing: 171 training instances

After condensing: 86 training instances

Classification Accuracy: 46.51162790697674%


Before condensing: 172 training instances

After condensing: 83 training instances

Classification Accuracy: 50.0%


Before condensing: 174 training instances

After condensing: 79 training instances

Classification Accuracy: 60.0%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.4        0.47727273 0.46511628 0.5        0.6       ]
Problem Type : Classification

Value for k : 19

Classification Accuracy : 48.8477801268499%

Before condensing: 169 training instances

After condensing: 83 training instances

Classification Accuracy: 37.77777777777778%


Befo

After condensing: 79 training instances

Classification Accuracy: 44.44444444444444%


Before condensing: 170 training instances

After condensing: 80 training instances

Classification Accuracy: 52.27272727272727%


Before condensing: 171 training instances

After condensing: 76 training instances

Classification Accuracy: 37.2093023255814%


Before condensing: 172 training instances

After condensing: 85 training instances

Classification Accuracy: 42.857142857142854%


Before condensing: 174 training instances

After condensing: 72 training instances

Classification Accuracy: 42.5%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.44444444 0.52272727 0.37209302 0.42857143 0.425     ]
Problem Type : Classification

Value for k : 28

Classification Accuracy : 43.85672337997919%

Before condensing: 169 training instances

After condensing: 86 training instances

Classification Accuracy

In [136]:
#Tune K and find best accuracy for enn and glass data
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"c",glass_data,"eknn")
    if bestAccuracy<accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))


Before editting: 169 training instances

After editting: 93 training instances

Classification Accuracy: 68.88888888888889%


Before editting: 170 training instances

After editting: 92 training instances

Classification Accuracy: 63.63636363636363%


Before editting: 171 training instances

After editting: 91 training instances

Classification Accuracy: 69.76744186046511%


Before editting: 172 training instances

After editting: 93 training instances

Classification Accuracy: 64.28571428571429%


Before editting: 174 training instances

After editting: 99 training instances

Classification Accuracy: 80.0%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.68888889 0.63636364 0.69767442 0.64285714 0.8       ]
Problem Type : Classification

Value for k : 1

Classification Accuracy : 69.3156817342864%

Before editting: 169 training instances

After editting: 100 training instances

Classif

After editting: 94 training instances

Classification Accuracy: 51.11111111111111%


Before editting: 170 training instances

After editting: 91 training instances

Classification Accuracy: 56.81818181818182%


Before editting: 171 training instances

After editting: 89 training instances

Classification Accuracy: 46.51162790697674%


Before editting: 172 training instances

After editting: 93 training instances

Classification Accuracy: 50.0%


Before editting: 174 training instances

After editting: 101 training instances

Classification Accuracy: 62.5%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.51111111 0.56818182 0.46511628 0.5        0.625     ]
Problem Type : Classification

Value for k : 10

Classification Accuracy : 53.38818416725394%

Before editting: 169 training instances

After editting: 94 training instances

Classification Accuracy: 51.11111111111111%


Before edittin

After editting: 95 training instances

Classification Accuracy: 40.0%


Before editting: 170 training instances

After editting: 91 training instances

Classification Accuracy: 43.18181818181818%


Before editting: 171 training instances

After editting: 95 training instances

Classification Accuracy: 48.837209302325576%


Before editting: 172 training instances

After editting: 90 training instances

Classification Accuracy: 42.857142857142854%


Before editting: 174 training instances

After editting: 98 training instances

Classification Accuracy: 52.5%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.4        0.43181818 0.48837209 0.42857143 0.525     ]
Problem Type : Classification

Value for k : 19

Classification Accuracy : 45.47523406825732%

Before editting: 169 training instances

After editting: 94 training instances

Classification Accuracy: 46.666666666666664%


Before editt

After editting: 95 training instances

Classification Accuracy: 36.36363636363637%


Before editting: 171 training instances

After editting: 89 training instances

Classification Accuracy: 51.162790697674424%


Before editting: 172 training instances

After editting: 99 training instances

Classification Accuracy: 42.857142857142854%


Before editting: 174 training instances

After editting: 97 training instances

Classification Accuracy: 42.5%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.4        0.36363636 0.51162791 0.42857143 0.425     ]
Problem Type : Classification

Value for k : 28

Classification Accuracy : 42.57671398369073%

Before editting: 169 training instances

After editting: 90 training instances

Classification Accuracy: 35.55555555555556%


Before editting: 170 training instances

After editting: 93 training instances

Classification Accuracy: 38.63636363636363%




In [123]:
#Tune K and find best accuracy for knn and segmentation data
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"c",segmentation_data,"knn")
    if bestAccuracy<accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))

[SKY] => 0
[WINDOW] => 1
[BRICKFACE] => 2
[GRASS] => 3
[FOLIAGE] => 4
[PATH] => 5
[CEMENT] => 6

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.80952381 0.95238095 0.95238095 0.9047619  0.88095238]
Problem Type : Classification

Value for k : 1

Classification Accuracy : 90.0%

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.80952381 0.9047619  0.88095238 0.92857143 0.85714286]
Problem Type : Classification

Value for k : 2

Classification Accuracy : 87.6190476190476%

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.95238095 0.92857143 0.85714286 0.88095238 0.83333333]
Problem Type : Classification

Value for k : 3

Classification Accuracy : 89.04761904761905%

-----------------------------------------------------


----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.88095238 0.78571429 0.69047619 0.78571429 0.71428571]
Problem Type : Classification

Value for k : 30

Classification Accuracy : 77.14285714285714%
The Best Accuracy was: 90.0
The Best K was: 1


In [125]:
#Tune K and find best accuracy for cknn and segmentation data
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"c",segmentation_data,"cknn")
    if bestAccuracy<accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))


Before condensing: 168 training instances

After condensing: 46 training instances

Classification Accuracy: 88.09523809523809%


Before condensing: 168 training instances

After condensing: 43 training instances

Classification Accuracy: 88.09523809523809%


Before condensing: 168 training instances

After condensing: 45 training instances

Classification Accuracy: 88.09523809523809%


Before condensing: 168 training instances

After condensing: 45 training instances

Classification Accuracy: 83.33333333333334%


Before condensing: 168 training instances

After condensing: 47 training instances

Classification Accuracy: 88.09523809523809%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.88095238 0.88095238 0.88095238 0.83333333 0.88095238]
Problem Type : Classification

Value for k : 1

Classification Accuracy : 87.14285714285715%

Before condensing: 168 training instances

After co

After condensing: 46 training instances

Classification Accuracy: 35.714285714285715%


Before condensing: 168 training instances

After condensing: 43 training instances

Classification Accuracy: 28.57142857142857%


Before condensing: 168 training instances

After condensing: 45 training instances

Classification Accuracy: 33.33333333333333%


Before condensing: 168 training instances

After condensing: 38 training instances

Classification Accuracy: 30.952380952380953%


Before condensing: 168 training instances

After condensing: 40 training instances

Classification Accuracy: 26.190476190476193%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.35714286 0.28571429 0.33333333 0.30952381 0.26190476]
Problem Type : Classification

Value for k : 10

Classification Accuracy : 30.95238095238095%

Before condensing: 168 training instances

After condensing: 44 training instances

Classif

After condensing: 47 training instances

Classification Accuracy: 21.428571428571427%


Before condensing: 168 training instances

After condensing: 49 training instances

Classification Accuracy: 23.809523809523807%


Before condensing: 168 training instances

After condensing: 40 training instances

Classification Accuracy: 28.57142857142857%


Before condensing: 168 training instances

After condensing: 44 training instances

Classification Accuracy: 19.047619047619047%


Before condensing: 168 training instances

After condensing: 49 training instances

Classification Accuracy: 21.428571428571427%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.21428571 0.23809524 0.28571429 0.19047619 0.21428571]
Problem Type : Classification

Value for k : 19

Classification Accuracy : 22.857142857142858%

Before condensing: 168 training instances

After condensing: 49 training instances

Class

After condensing: 46 training instances

Classification Accuracy: 28.57142857142857%


Before condensing: 168 training instances

After condensing: 42 training instances

Classification Accuracy: 11.904761904761903%


Before condensing: 168 training instances

After condensing: 43 training instances

Classification Accuracy: 16.666666666666664%


Before condensing: 168 training instances

After condensing: 40 training instances

Classification Accuracy: 11.904761904761903%


Before condensing: 168 training instances

After condensing: 43 training instances

Classification Accuracy: 26.190476190476193%


----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.28571429 0.11904762 0.16666667 0.11904762 0.26190476]
Problem Type : Classification

Value for k : 28

Classification Accuracy : 19.047619047619047%

Before condensing: 168 training instances

After condensing: 46 training instances

Class

In [137]:
#Tune K and find best accuracy for eknn and segmentation data
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"c",segmentation_data,"eknn")
    if bestAccuracy<accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))


Before editting: 168 training instances

After editting: 55 training instances

Classification Accuracy: 88.09523809523809%


Before editting: 168 training instances

After editting: 57 training instances

Classification Accuracy: 88.09523809523809%


Before editting: 168 training instances

After editting: 56 training instances

Classification Accuracy: 90.47619047619048%


Before editting: 168 training instances

After editting: 55 training instances

Classification Accuracy: 88.09523809523809%


Before editting: 168 training instances

After editting: 54 training instances

Classification Accuracy: 85.71428571428571%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.88095238 0.88095238 0.9047619  0.88095238 0.85714286]
Problem Type : Classification

Value for k : 1

Classification Accuracy : 88.09523809523809%

Before editting: 168 training instances

After editting: 62 training insta

After editting: 57 training instances

Classification Accuracy: 45.23809523809524%


Before editting: 168 training instances

After editting: 58 training instances

Classification Accuracy: 38.095238095238095%


Before editting: 168 training instances

After editting: 52 training instances

Classification Accuracy: 40.476190476190474%


Before editting: 168 training instances

After editting: 55 training instances

Classification Accuracy: 47.61904761904761%


Before editting: 168 training instances

After editting: 58 training instances

Classification Accuracy: 50.0%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.45238095 0.38095238 0.4047619  0.47619048 0.5       ]
Problem Type : Classification

Value for k : 10

Classification Accuracy : 44.28571428571429%

Before editting: 168 training instances

After editting: 51 training instances

Classification Accuracy: 42.857142857142854%



After editting: 63 training instances

Classification Accuracy: 40.476190476190474%


Before editting: 168 training instances

After editting: 56 training instances

Classification Accuracy: 30.952380952380953%


Before editting: 168 training instances

After editting: 51 training instances

Classification Accuracy: 38.095238095238095%


Before editting: 168 training instances

After editting: 55 training instances

Classification Accuracy: 35.714285714285715%


Before editting: 168 training instances

After editting: 49 training instances

Classification Accuracy: 42.857142857142854%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.4047619  0.30952381 0.38095238 0.35714286 0.42857143]
Problem Type : Classification

Value for k : 19

Classification Accuracy : 37.61904761904763%

Before editting: 168 training instances

After editting: 58 training instances

Classification Accuracy: 35.71

After editting: 58 training instances

Classification Accuracy: 28.57142857142857%


Before editting: 168 training instances

After editting: 62 training instances

Classification Accuracy: 30.952380952380953%


Before editting: 168 training instances

After editting: 53 training instances

Classification Accuracy: 30.952380952380953%


Before editting: 168 training instances

After editting: 50 training instances

Classification Accuracy: 26.190476190476193%


Before editting: 168 training instances

After editting: 59 training instances

Classification Accuracy: 38.095238095238095%


----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[0.28571429 0.30952381 0.30952381 0.26190476 0.38095238]
Problem Type : Classification

Value for k : 28

Classification Accuracy : 30.952380952380953%

Before editting: 168 training instances

After editting: 55 training instances

Classification Accuracy: 28.57

In [216]:
#Tune K and find best accuracy for knn and cpu data
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"r",cpu_data,"knn")
    if i == 1:
        bestAccuracy = accuracy
        bestk = i
    if bestAccuracy>accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))




----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[59510.54607882  7667.34042543  4031.77497547  1243.49999999
  1238.73233732]
Problem Type : Regression

Value for k : 1

Mean Squared Error : 1473837.876340601

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[59510.58061758  7667.34041669  4031.80769218  1243.20602829
  1238.72185891]
Problem Type : Regression

Value for k : 2

Mean Squared Error : 1473833.132272975

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[59510.5818144   7667.2567831   4031.77499012  1243.49172359
  1238.7296127 ]
Problem Type : Regression

Value for k : 3

Mean Squared Error : 1473836.6984781425

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accurac


----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[59510.58042091  7667.32024463  4031.57568876  1243.49999999
  1238.70637426]
Problem Type : Regression

Value for k : 29

Mean Squared Error : 1473833.6545708892

----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[59510.58048071  7667.34024507  4031.77098825  1243.11966672
  1238.70247854]
Problem Type : Regression

Value for k : 30

Mean Squared Error : 1473830.2771857672
The Best Accuracy was: 1473830.2771857672
The Best K was: 30


In [243]:
#Tune K and find best accuracy for knn and abalone data
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"r",abalone_data,"knn")
    if i == 1:
        bestAccuracy = accuracy
        bestk = i
    if bestAccuracy>accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))



----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[9.53793593 8.28553048 8.35182829 8.00396521 8.12933175]
Problem Type : Regression

Value for k : 1

Mean Squared Error : 8.461718331981777
----------------------------------------------------------
----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[95.13224478 92.28906817 95.02918601 94.40639915 99.67091959]
Problem Type : Regression

Value for k : 2

Mean Squared Error : 95.30556354022742
----------------------------------------------------------
----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 Experiments:
[362.22984735 365.58027348 352.87593954 381.1029079  373.40052597]
Problem Type : Regression

Value for k : 3

Mean Squared Error : 367.0378988463776
------------------------------------

KeyboardInterrupt: 

In [None]:
#Tune K and find best accuracy for knn and forest fires data
bestAccuracy=0
bestk=0
for i in range(1,31):
    implementAlgo(i,"r",ff_data,"knn")
    if i == 1:
        bestAccuracy = accuracy
        bestk = i
    if bestAccuracy>accuracy:
        bestAccuracy = accuracy
        bestk = i
print("The Best Accuracy was: "+ str(bestAccuracy))
print("The Best K was: "+ str(bestk))

In [269]:
##FOR DEMO
implementAlgo(2,"c",segmentation_data,"knn")

size of fold0 is
840
size of fold1 is
840
size of fold2 is
840
size of fold3 is
840
size of fold4 is
840
test point:
[174.         50.          9.          0.          0.          1.0000013
   0.7601153   0.9444453   0.9525792 107.44444    94.666664  125.77778
 101.888885  -38.333332   55.        -16.666666  125.77778     0.2473368
  -2.3372955   0.       ]
Neighbors:
[[ 0.         10.47992634]
 [ 0.         12.55216416]]
test point:
[112.          30.           9.           0.           0.
   0.55555725   0.2721644    1.2222227    0.7200825  113.25926
 100.77778    130.11111    108.888885   -37.444443    50.555557
 -13.111111   130.11111      0.22538376  -2.3853076    0.        ]
Neighbors:
[[ 0.          8.55317356]
 [ 0.         13.84409324]]
test point:
[ 1.25000000e+02  4.60000000e+01  9.00000000e+00  1.11111110e-01
  0.00000000e+00  6.11109440e-01  6.11615840e-01  2.16666800e+00
  7.81735200e-01  1.24555560e+02  1.12777780e+02  1.41000000e+02
  1.19888885e+02 -3.53333320e+01  4.9

In [266]:
implementAlgo(1,"c",segmentation_data,"eknn")

size of fold0 is
840
size of fold1 is
840
size of fold2 is
840
size of fold3 is
840
size of fold4 is
840

Before editting: 168 training instances

After editting: 60 training instances

Classification Accuracy: 92.85714285714286%


Before editting: 168 training instances

After editting: 51 training instances

Classification Accuracy: 85.71428571428571%


Before editting: 168 training instances

After editting: 55 training instances

Classification Accuracy: 88.09523809523809%


Before editting: 168 training instances

After editting: 62 training instances

Classification Accuracy: 88.09523809523809%


Before editting: 168 training instances

After editting: 52 training instances

Classification Accuracy: 80.95238095238095%

----------------------------------------------------------
Algorithm name: Edited K-Nearest Neighbor
Accuracy Statistics for All 5 testfolds:
[0.92857143 0.85714286 0.88095238 0.88095238 0.80952381]
Problem Type: Classification

Value for k : 1

Classification Accu

In [267]:
implementAlgo(1,"c",segmentation_data,"cknn")

size of fold0 is
840
size of fold1 is
840
size of fold2 is
840
size of fold3 is
840
size of fold4 is
840

Before condensing: 168 training instances

After condensing: 50 training instances

Classification Accuracy: 92.85714285714286%


Before condensing: 168 training instances

After condensing: 44 training instances

Classification Accuracy: 83.33333333333334%


Before condensing: 168 training instances

After condensing: 42 training instances

Classification Accuracy: 83.33333333333334%


Before condensing: 168 training instances

After condensing: 44 training instances

Classification Accuracy: 92.85714285714286%


Before condensing: 168 training instances

After condensing: 39 training instances

Classification Accuracy: 83.33333333333334%

----------------------------------------------------------
Algorithm name: Condensed K-Nearest Neighbor
Accuracy Statistics for All 5 testfolds:
[0.92857143 0.83333333 0.83333333 0.92857143 0.83333333]
Problem Type: Classification

Value for k :

In [268]:
implementAlgo(1,"r",abalone_data,"knn")

size of fold0 is
7650
size of fold1 is
7569
size of fold2 is
7497
size of fold3 is
7461
size of fold4 is
7416
test point:
[2.0e+00 7.5e-02 5.5e-02 1.0e-02 2.0e-03 1.0e-03 5.0e-04 1.5e-03 1.0e+00]
Neighbors:
[[3.         0.05378429]]




test point:
[2.00e+00 1.40e-01 1.05e-01 3.50e-02 1.40e-02 5.50e-03 2.50e-03 4.00e-03
 3.00e+00]
Neighbors:
[[4.00000000e+00 1.58113883e-03]]
test point:
[2.     0.18   0.13   0.045  0.0275 0.0125 0.01   0.009  3.    ]
Neighbors:
[[4.         0.00926013]]
test point:
[2.     0.165  0.12   0.05   0.021  0.0075 0.0045 0.014  3.    ]
Neighbors:
[[6.         0.01097725]]
test point:
[0.     0.155  0.11   0.04   0.0155 0.0065 0.003  0.005  3.    ]
Neighbors:
[[4.         0.01146734]]
----------------------------------------------------------
Algorithm name: K-Nearest Neighbor
Accuracy Statistics for All 5 testfolds:
[9.92009753 8.4596507  8.10827595 8.82847117 7.40801129]
Problem Type: Regression

Value for k : 1

Mean Squared Error : 8.54490132788571
----------------------------------------------------------
