In [21]:
import pandas as pd

### Dataset Loading

In [22]:
data= pd.read_csv("anemia_prediction.csv")
# 0 means not present
# 1 means present
data

Unnamed: 0,Hemoglobin,MCH,Result
0,14.9,22.7,0
1,15.9,25.4,0
2,9.0,21.5,1
3,14.9,16.0,0
4,14.7,22.0,0
...,...,...,...
245,14.1,29.7,0
246,14.9,25.8,0
247,14.0,18.3,0
248,10.8,27.5,1


### Main Code

In [23]:
'''
    1- Put self in every func in class
    2- Try to build phase vise.
    3- See the structure of obj, variable, or any other data type clearly 

'''


class knn:
    def __init__(self, k, mode):
        self.k= k
        self.m= mode
    
    def manhattan(self, v1, v2):
        total= 0
        for i in range(len(v1)):
            diff= v1[i]- v2[i]
            if diff<0:
                diff= -diff
            total+= diff
        return total                    # return total (mistake)

    def euclidean(self, v1, v2):
        total= 0
        for i in range(len(v1)):
            diff= (v1[i])**0.5 - (v2[i])**0.5
            total+= (diff)**2
        return total                    # return total (mistake)
    
    def minkowski(self, v1, v2, p=3):
        total= 0
        for i in range(len(v1)):
            diff= (v1[i])**(1/p) - (v2[i])**(1/p)
            if diff<0:
                diff= -diff
            total+= (diff)**p
        return total                    # return total (mistake)
    
    def fit(self, X_train, y_train):
        self.X_train= X_train
        self.y_train= y_train
    
    def predict(self, X_test):
        preds=[]
        for i in range(len(X_test)):
            test_row= X_test[i]
            neighb= self.get_neighbour(test_row)
            maj_class= self.maj_class(neighb)
            print(f"Predicted Class for {test_row}: {maj_class}\n")
            preds.append(maj_class)
        return preds
    
    def get_neighbour(self, test_row):
        distance= []
        for train_row, train_class in zip(self.X_train, self.y_train):
            if self.m== 1:
                dist= self.manhattan(train_row, test_row)
            elif self.m== 2:
                dist= self.euclidean(train_row, test_row)
            else:
                dist= self.minkowski(train_row, test_row)
            # Calculate distance and append to the list
            distance.append([dist, train_class])
        for i in range(1, len(distance)):
            key= distance[i]
            j= i-1
            while j>=0 and distance[j][0]>key[0]:
                distance[j+1]= distance[j]
                j-= 1
            distance[j+1]= key
        
        # Now distance is sorted based on the first element (distance)  
        k_neighb= []
        count= 0
        print(f"\nDistances and Classes of Nearest Neighbours for Test Row: {test_row}: ")
        for j in range(len(distance)):
            if count< self.k:
                k_neighb.append(distance[j][1])
                print(f"Distance: {distance[j]}, Class: {distance[j][1]}")
                count+= 1
            else:   break
        return k_neighb
    
    def maj_class(self, neighb):
        classes= {}
        for i in neighb:
            if i in classes.keys():
                classes[i]+= 1
            else:   classes[i]= 1
        max= -1
        for i in classes:
            if max< classes[i]:
                max= classes[i]
                maj= i
        return maj
    
    def acc(self, pred, y_test):
        corr= 0
        for i,j in zip(pred, y_test):
            if i==j:
                corr+= 1
        return print("Accuracy: ",(corr/len(y_test)))

In [24]:
X= data.drop('Result', axis=1).values
y= data['Result'].values

X_train, y_train= X[:80], y[:80]
X_test, y_test= X[80:], y[80:]

#### 1. KNN for Manhattan Distance

In [25]:
KNN= knn(3, mode=1)
KNN.fit(X_train, y_train)
pred= KNN.predict(X_test)
KNN.acc(pred, y_test)


Distances and Classes of Nearest Neighbours for Test Row: [13.2 23.5]: 
Distance: [np.float64(0.6000000000000014), np.int64(1)], Class: 1
Distance: [np.float64(0.6999999999999993), np.int64(0)], Class: 0
Distance: [np.float64(1.299999999999999), np.int64(0)], Class: 0
Predicted Class for [13.2 23.5]: 0


Distances and Classes of Nearest Neighbours for Test Row: [16.8 19.2]: 
Distance: [np.float64(0.40000000000000213), np.int64(0)], Class: 0
Distance: [np.float64(0.9000000000000021), np.int64(0)], Class: 0
Distance: [np.float64(1.7000000000000028), np.int64(0)], Class: 0
Predicted Class for [16.8 19.2]: 0


Distances and Classes of Nearest Neighbours for Test Row: [16.5 26.4]: 
Distance: [np.float64(1.2999999999999972), np.int64(0)], Class: 0
Distance: [np.float64(1.3000000000000007), np.int64(0)], Class: 0
Distance: [np.float64(1.5999999999999996), np.int64(0)], Class: 0
Predicted Class for [16.5 26.4]: 0


Distances and Classes of Nearest Neighbours for Test Row: [11.8 18.6]: 
Distan

#### 2. KNN for Euclidean Distance

In [26]:
KNN= knn(3, mode=2)
KNN.fit(X_train, y_train)
pred= KNN.predict(X_test)
KNN.acc(pred, y_test)


Distances and Classes of Nearest Neighbours for Test Row: [13.2 23.5]: 
Distance: [np.float64(0.004932861808936196), np.int64(1)], Class: 1
Distance: [np.float64(0.009534842669741913), np.int64(0)], Class: 0
Distance: [np.float64(0.015191302998158298), np.int64(0)], Class: 0
Predicted Class for [13.2 23.5]: 0


Distances and Classes of Nearest Neighbours for Test Row: [16.8 19.2]: 
Distance: [np.float64(0.0014812491210540377), np.int64(0)], Class: 0
Distance: [np.float64(0.005708022559278273), np.int64(0)], Class: 0
Distance: [np.float64(0.03216206431408417), np.int64(0)], Class: 0
Predicted Class for [16.8 19.2]: 0


Distances and Classes of Nearest Neighbours for Test Row: [16.5 26.4]: 
Distance: [np.float64(0.010199350415496754), np.int64(0)], Class: 0
Distance: [np.float64(0.011828073874057106), np.int64(0)], Class: 0
Distance: [np.float64(0.015209441090142616), np.int64(0)], Class: 0
Predicted Class for [16.5 26.4]: 0


Distances and Classes of Nearest Neighbours for Test Row: [1

#### 3. KNN for Minkowski Distance

In [27]:
KNN= knn(3, mode=3)
KNN.fit(X_train, y_train)
pred= KNN.predict(X_test)
KNN.acc(pred, y_test)


Distances and Classes of Nearest Neighbours for Test Row: [13.2 23.5]: 
Distance: [np.float64(2.7678703815028723e-05), np.int64(1)], Class: 1
Distance: [np.float64(7.696675001507813e-05), np.int64(0)], Class: 0
Distance: [np.float64(0.00012436859252861157), np.int64(0)], Class: 0
Predicted Class for [13.2 23.5]: 0


Distances and Classes of Nearest Neighbours for Test Row: [16.8 19.2]: 
Distance: [np.float64(3.707321999449591e-06), np.int64(0)], Class: 0
Distance: [np.float64(2.1496175613299844e-05), np.int64(0)], Class: 0
Distance: [np.float64(0.0003797287532098551), np.int64(0)], Class: 0
Predicted Class for [16.8 19.2]: 0


Distances and Classes of Nearest Neighbours for Test Row: [16.5 26.4]: 
Distance: [np.float64(4.86015703609019e-05), np.int64(0)], Class: 0
Distance: [np.float64(6.896393652216716e-05), np.int64(0)], Class: 0
Distance: [np.float64(8.571281187715971e-05), np.int64(0)], Class: 0
Predicted Class for [16.5 26.4]: 0


Distances and Classes of Nearest Neighbours for T