### Perform the Classification using KNN , Bayes Classifier, and Naive Bayes Classifier

## Import libraries

In [2]:
import pandas as pd
import numpy as np
from scipy.stats import multivariate_normal
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score


## Step 1: Load Train, Validation, and Test Data

In [3]:
# Load datasets
train_df = pd.read_csv('DiabeticRetinopathy_train.csv')
valid_df = pd.read_csv('DiabeticRetinopathy_validation.csv')
test_df = pd.read_csv('DiabeticRetinopathy_test.csv')

# Display first few rows of train data
train_df.head()

Unnamed: 0,quality_assessment,Pre-screening,MA_detection_Confidence0.5,MA_detection_Confidence0.6,MA_detection_Confidence0.7,MA_detection_Confidence0.8,MA_detection_Confidence0.9,MA_detection_Confidence1.0,Exudates_Confidence0.3,Exudates_Confidence0.4,Exudates_Confidence0.5,Exudates_Confidence0.6,Exudates_Confidence0.7,Exudates_Confidence0.8,Exudates_Confidence0.9,Exudates_Confidence1.0,Distance_Between_macula_and_optic_disk,Diameter_optic_disc,AM/FM,Class
0,1,1,29,29,28,26,24,18,189.752152,46.437986,22.668862,4.696562,0.730099,0.287336,0.106345,0.068511,0.494069,0.121683,1,1
1,1,1,73,70,61,51,40,24,286.341351,49.691422,27.729469,5.687412,1.987205,0.517598,0.05443,0.007189,0.530448,0.11913,0,1
2,1,1,26,26,26,25,23,19,18.197432,8.463779,0.347027,0.007678,0.0,0.0,0.0,0.0,0.514848,0.110557,0,0
3,1,0,75,67,56,43,33,24,24.609135,10.657996,3.456785,2.159855,1.668933,1.252208,0.814138,0.294757,0.515086,0.087411,0,1
4,1,1,21,20,19,17,13,6,27.481596,5.579173,1.98435,1.110242,0.65351,0.508516,0.238205,0.053855,0.519491,0.109781,0,1


## Step 2: Prepare Input Feature Vectors and Target Labels

In [4]:
# Separate features and target labels
X_train, y_train = train_df.iloc[:, :-1], train_df.iloc[:, -1]
X_valid, y_valid = valid_df.iloc[:, :-1], valid_df.iloc[:, -1]
X_test, y_test = test_df.iloc[:, :-1], test_df.iloc[:, -1]

# Print dataset shapes
print('Train shape:', X_train.shape)
print('Validation shape:', X_valid.shape)
print('Test shape:', X_test.shape)

Train shape: (690, 19)
Validation shape: (230, 19)
Test shape: (231, 19)


## Step 3: K-Nearest Neighbors (KNN) Classification (K=7)

In [5]:
# Train KNN model
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, y_train)

# Predict on validation and test data
y_valid_pred = knn.predict(X_valid)
y_test_pred = knn.predict(X_test)

# Print performance metrics
print('KNN Accuracy on Validation:', accuracy_score(y_valid, y_valid_pred))
print('KNN Accuracy on Test:', accuracy_score(y_test, y_test_pred))

KNN Accuracy on Validation: 0.6217391304347826
KNN Accuracy on Test: 0.645021645021645


## Step 4: Bayes Classifier Implementation Steps

### Compute mean and covariance per class
### Computer Prior probability per class
### Find likelihood, used allow_singular=True (in case you got any a singular covariance matrix), x is the features from validation or test
likelihood = multivariate_normal(mean=classwise_Mean, cov=classwise_CovarianceMatrix, allow_singular=True).pdf(x)
### Find posterior probability
posterior = likelihood * prior
### Get the prediction label, for which class posterior is maximum


## Step 5: Naive Bayes Classification (Using GaussianNB)

In [7]:
# Train Naive Bayes model
nb = GaussianNB()
nb.fit(X_train, y_train)

# Predict
y_test_pred_nb = nb.predict(X_test)


## Step 6: Performance Metrics Evaluation

### Find covariance matrix, accuracy, precision, recall, and f1-score for validation and test data for all classification algorithms.