### KNN Classification (Real)

In [10]:
import pandas as pd
import numpy as np

#####  Read Training Data

In [11]:
Training_df = pd.read_csv('KNN_Real.csv')
Training_df

Unnamed: 0,F1,F2,F3,F4,Actual
0,1.0,0.5,0.44,0.0,0.3
1,0.8,0.6,0.0,0.22,0.9
2,0.7,0.4,0.0,0.0,0.7
3,0.0,0.1,2.64,4.4,0.5
4,0.1,0.0,1.76,12.32,0.2


##### Split Features and Target

In [12]:
Target = Training_df['Actual']
Features = Training_df.drop(columns = 'Actual')
Target,Features

(0    0.3
 1    0.9
 2    0.7
 3    0.5
 4    0.2
 Name: Actual, dtype: float64,
     F1   F2    F3     F4
 0  1.0  0.5  0.44   0.00
 1  0.8  0.6  0.00   0.22
 2  0.7  0.4  0.00   0.00
 3  0.0  0.1  2.64   4.40
 4  0.1  0.0  1.76  12.32)

In [13]:
Test_df = pd.read_csv('KNN_Real_query.csv')
Test_df

Unnamed: 0,F1,F2,F3,F4
0,0.5,0.6,0.22,0.0
1,0.1,0.0,2.64,0.88


####  Class Definition

In [14]:
    class KNNReal:
        def fit(self,Features: pd.DataFrame,Target: pd.Series,K) -> pd.DataFrame:
            self.Features = Features
            self.Target = Target
            self.K = K

        def eucDist(self,_feature: pd.Series,_test: pd.Series):
            euc_dist = np.sqrt(np.sum(np.square(_feature-_test)))
            return euc_dist

        def predict(self,Test):
            self.target_arr = []
            self.Test = Test
            for i in range(len(Test)):
                self.euc_dist_dict = {}
                for j in range(len(Features)):
                  #Assigning the corresponding class to the found eucledian distance
                    self.euc_dist_dict[self.eucDist(self.Features.iloc[j],self.Test.iloc[i])] = self.Target[j]

                #Appending the corresponding real predicted value for the query point i 
                self.target_arr.append(self.getActual(self.euc_dist_dict))    

            self.Test['Target'] = self.Target
            self.Test['Predicted'] = self.target_arr
            return self.Test
      
        def predict_weight(self,Test):
            self.Test = Test
            self.weight_arr = []
            self.target_arr = []
            for i in range(len(Test)):
                #Weight dictionary
                self.euc_weight = {}
                for j in range(len(Features)):
                    self.euc_weight[self.eucWeight(self.eucDist(self.Features.iloc[j],self.Test.iloc[i]))] = self.Target[j]

                self.target_arr.append(self.getActual_weight(self.euc_weight))

            self.Test['Target'] = self.Target
            self.Test['Predicted'] = self.target_arr
            return self.Test
        
        def eucWeight(self,dist: float):
            return (1/np.square(dist))
        
        def slice_k_weight(self,euc_weight: dict)-> dict:
            #Sorting dictionary with key
            euc_weight = dict(sorted(euc_weight.items()))
            #Converting to list and slicing and then converting to dict
            #Weight is inversely proportional to distance
            return dict(list(euc_weight.items())[-self.K:])
        
        def getActual_weight(self,euc_dist_dict: dict):
            #Slicing is the only difference for weight and dist
            euc_dist_dict = self.slice_k_weight(euc_dist_dict)
            
            return np.mean(np.array(list(euc_dist_dict.values())))

        def getActual(self,euc_dist_dict: dict):
            euc_dist_dict = self.slice_k_weight(euc_dist_dict)
            # Convert values of dictionary to list to easily find the mean..
            # Return the mean - which is the predicted output..
            return np.mean(np.array(list(euc_dist_dict.values())))

        def slice_k(self,euc_dist_dict: dict):
            #Sorting dictionary with key
            euc_dist_dict = dict(sorted(euc_dist_dict.items()))
            #Converting to list and slicing and then converting to dict..
            return dict(list(euc_dist_dict.items())[:self.K])
        

#####  Fitting Model

In [15]:
model = KNNReal()
model.fit(Features,Target,3)

#####  Predicting (Dist)

In [16]:
predicted_df = model.predict(Test_df)
predicted_df

Unnamed: 0,F1,F2,F3,F4,Target,Predicted
0,0.5,0.6,0.22,0.0,0.3,0.333333
1,0.1,0.0,2.64,0.88,0.9,0.466667


#### Predicting (Weight)

In [17]:
predicted_df_weight = model.predict_weight(Test_df)
predicted_df_weight

Unnamed: 0,F1,F2,F3,F4,Target,Predicted
0,0.5,0.6,0.22,0.0,0.3,0.633333
1,0.1,0.0,2.64,0.88,0.9,0.633333
