# KNN Algorithm

In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('datasets/Classification/KNNDataset.csv')

In [3]:
data.rename(columns = {"diagnosis": "Class"}, inplace=True)
data.dropna(axis=1, inplace=True)

In [4]:
clss = data["Class"].unique()
print(clss)

['M' 'B']


In [5]:
print(data.columns)

Index(['id', 'Class', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst'],
      dtype='object')


In [6]:
trainData = data.sample(frac=0.8, random_state=0)

In [7]:
print(trainData)

           id Class  radius_mean  texture_mean  perimeter_mean  area_mean  \
512    915691     M        13.40         20.52           88.64      556.7   
457   9112367     B        13.21         25.25           84.10      537.9   
439    909410     B        14.02         15.66           89.59      606.5   
298    892214     B        14.26         18.17           91.22      633.1   
37     854941     B        13.03         18.42           82.61      523.8   
..        ...   ...          ...           ...             ...        ...   
86   86135501     M        14.48         21.46           94.25      648.2   
266   8910251     B        10.60         18.95           69.28      346.4   
36     854268     M        14.25         21.72           93.63      633.0   
193    875263     M        12.34         26.86           81.15      477.4   
58     857810     B        13.05         19.31           82.61      527.2   

     smoothness_mean  compactness_mean  concavity_mean  concave points_mean

In [8]:
testData = data.drop(trainData.index)

In [9]:
def k_nn(x,k):
    l = []
    keys = list(testData.columns)
    if "id" in keys:
        keys.remove("id")
    keys.remove("Class")
    n = len(keys)
    for d in data.iterrows():
        dist = 0
        for i in range(n):
            dist += (x[i] - d[1][keys[i]])**2
        l.append((dist,d[0]))
    l.sort()
    d = {}
    for i in range(k):
        val = data.iloc[l[i][1]]['Class']
        d[val] = d.get(val,0) + 1
    predClass = None
    maxval = 0
    for i in d.keys():
        if d[i] > maxval:
            maxval = d[i]
            predClass = i
    return predClass

In [10]:
def confusion(kValue):
    col = list(data.columns)
    if "id" in col:
        col.remove("id")
    target = "Class"
    col.remove("Class")
    classes = {}
    clss = data["Class"].unique()
    i = 0
    for clas in clss:
        classes[clas] = i
        i += 1
    n = len(classes)
    confmat = np.zeros(n*n)
    confmat = confmat.reshape(n,n)
    for d in testData.iterrows():
        x = [d[1][i] for i in col]
        y = d[1][target]
        predy = k_nn(x, kValue)
        confmat[classes[predy]][classes[y]] += 1
    return confmat

### Performance Evaluation

In [11]:
def accuracy(mat):
    n = len(mat)
    num = 0
    den = 0
    for i in range(n):
        num += mat[i][i]
        den += sum(mat[i])
    return num/den

In [12]:
def precision(mat, key):
    n = len(mat)
    num = mat[key][key]
    den = 0
    for i in range(n):
        den += mat[i][key]
    return num/den

In [13]:
def recall(mat, key):
    n = len(mat)
    num = mat[key][key]
    den = 0
    for i in range(n):
        den += mat[key][i]
    return num/den

In [14]:
def F1Score(p,r):
    return (2*(p*r))/(p + r)

### For K == 12

In [15]:
mat = confusion(12) 
acc = accuracy(mat)
pre = precision(mat,0)
rec = recall(mat,0)
fscore = F1Score(pre, rec)
print(acc, pre, rec, fscore)

0.9385964912280702 0.8863636363636364 0.9512195121951219 0.9176470588235294


### For K == 15

In [16]:
mat = confusion(15) 
acc = accuracy(mat)
pre = precision(mat,0)
rec = recall(mat,0)
fscore = F1Score(pre, rec)
print(acc, pre, rec, fscore)

0.9298245614035088 0.8409090909090909 0.9736842105263158 0.9024390243902439


### For K == 18

In [17]:
mat = confusion(18) 
acc = accuracy(mat)
pre = precision(mat,0)
rec = recall(mat,0)
fscore = F1Score(pre, rec)
print(acc, pre, rec, fscore)

0.9298245614035088 0.8409090909090909 0.9736842105263158 0.9024390243902439


### For K == 21

In [18]:
mat = confusion(21) 
acc = accuracy(mat)
pre = precision(mat,0)
rec = recall(mat,0)
fscore = F1Score(pre, rec)
print(acc, pre, rec, fscore)

0.9210526315789473 0.8409090909090909 0.9487179487179487 0.891566265060241
