# Diabetic Retinopathy detection

### 1.Importing liabraries

In [117]:
import pandas as pd
import numpy as np
import cv2
import os
import glob
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from skimage.feature import local_binary_pattern
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.metrics import jaccard_score
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix, accuracy_score
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import seaborn as sns

### 2. defining functions and data extraction.

In [106]:
def load_and_preprocess_data(data_dir, csv_file):
    images = []
    labels = []
    image_filenames = []  # To store the corresponding image file names

    # Load the CSV file
    data_df = pd.read_csv(csv_file)

    for index, row in data_df.iterrows():
        image_filename = row['id_code']
        class_label = row['diagnosis']

        # Build the full path to the image file
        image_path = os.path.join(data_dir, f'{image_filename}.png')  # Assuming images have a .png extension

        if os.path.isfile(image_path):
            # Load the image and preprocess it
            image = cv2.imread(image_path)
            if image is not None:  # Check if image loading was successful
                image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
                image = cv2.resize(image, (128, 128))  # Resize to a consistent size

                images.append(image)
                labels.append(class_label)
                image_filenames.append(image_filename)
            else:
                print(f"Failed to load image: {image_path}")

    return images, labels, image_filenames

In [107]:
def extract_lbp_feature(images):
    lbp_features = []
    for image in images:
        lbp = local_binary_pattern(image, P=8, R=1, method="uniform")
        lbp_hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 59), range=(0, 58))
        lbp_features.append(lbp_hist)

    return lbp_features


In [111]:
data_dir = 'F:/data/Diabetic retinopathy/gaussian_filtered_images/gaussian_filtered_images/all'
csv_file = "C:/Users/Chirayu/Downloads/train.csv"  # Replace with the correct path to your CSV file
images, labels, image_filenames = load_and_preprocess_data(data_dir, csv_file)
lbp_features = extract_lbp_feature(images)
# Create a DataFrame with the required data
data_dict = {
    'id_code': image_filenames,
    'diagnosis': labels,
}
for i in range(len(lbp_features[0])):
    data_dict[f'feature_{i+1}'] = [feat[i] for feat in lbp_features]

output_df = pd.DataFrame(data_dict)

# Save the data to a new CSV file
output_df.to_csv('output_data.csv', index=False)

### 3. Splitting data into traning and testing dataset.

In [112]:
X_train, X_test, y_train, y_test = train_test_split(lbp_features, labels, test_size=0.2, random_state=42)


### 4. prediction and evaluation using Random forest classifier

In [123]:
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [124]:
y_pred = classifier.predict(X_test)

In [125]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print the evaluation results
print("Accuracy:", accuracy)
print("Classification Report:")
print(report)

Accuracy: 0.7285129604365621
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.95      0.92       351
           1       0.40      0.25      0.31        68
           2       0.59      0.80      0.68       213
           3       0.40      0.11      0.17        36
           4       0.47      0.12      0.20        65

    accuracy                           0.73       733
   macro avg       0.55      0.45      0.46       733
weighted avg       0.70      0.73      0.69       733



### 5. prediction and evaluation using KNN classifier

In [137]:
k=5
KNN = KNeighborsClassifier(n_neighbors=k).fit(X_train, y_train)
KNN

KNeighborsClassifier()

In [138]:
predictions = KNN.predict(X_test)

In [140]:
KNN_Accuracy_Score = metrics.accuracy_score(y_test, predictions)
KNN_JaccardIndex = metrics.jaccard_score(y_test, predictions,average='weighted')
KNN_F1_Score = metrics.f1_score(y_test, predictions,average='weighted')
#KNN_Log_Loss = metrics.log_loss(y_test, predictions,labels=[0,1,2,3,4])
print("KNN_Accuracy_Score :", KNN_Accuracy_Score)
print("KNN_JaccardIndex :" , KNN_JaccardIndex)
print("KNN_F1_Score :", KNN_F1_Score)
#print("KNN_Log_Loss :", KNN_Log_Loss)

KNN_Accuracy_Score : 0.7094133697135061
KNN_JaccardIndex : 0.5756134266201822
KNN_F1_Score : 0.6759179815692242


### 6.prediction and evaluation using Decision Tree classifier

In [142]:
Tree = DecisionTreeClassifier(criterion ="entropy",max_depth = 5)
Tree.fit(X_train, y_train)

DecisionTreeClassifier(criterion='entropy', max_depth=5)

In [143]:
predictions_tree = Tree.predict(X_test)

In [152]:
Tree_Accuracy_Score = metrics.accuracy_score(y_test, predictions_tree)
Tree_JaccardIndex = metrics.jaccard_score(y_test, predictions_tree, average='weighted')
Tree_F1_Score = metrics.f1_score(y_test, predictions_tree,average= 'weighted')
#Tree_Log_Loss = metrics.log_loss(y_test, predictions_tree, labels =[0 1 2 3 4])
print("Tree_Accuracy_Score :", Tree_Accuracy_Score)
print("Tree_JaccardIndex :" , Tree_JaccardIndex)
print("Tree_F1_Score :", Tree_F1_Score)
#print("Tree_Log_Loss :", Tree_Log_Loss)

Tree_Accuracy_Score : 0.6998635743519782
Tree_JaccardIndex : 0.5403765622769512
Tree_F1_Score : 0.6362770124827999


### 7. prediction and evaluation using Logistic regression

In [154]:
LR = LogisticRegression(C=0.01, solver ="liblinear").fit(X_train,y_train)
LR

LogisticRegression(C=0.01, solver='liblinear')

In [155]:
predictions_LR = LR.predict(X_test)

In [157]:
LR_Accuracy_Score = metrics.accuracy_score(y_test, predictions_LR)
LR_JaccardIndex = metrics.jaccard_score(y_test, predictions_LR,average='weighted')
LR_F1_Score = metrics.f1_score(y_test, predictions_LR,average='weighted')
#LR_Log_Loss = metrics.log_loss(y_test, predictions_LR)
print("LR_Accuracy_Score :", LR_Accuracy_Score)
print("LR_JaccardIndex :" , LR_JaccardIndex)
print("LR_F1_Score :", LR_F1_Score)
#print("LR_Log_Loss :", LR_Log_Loss)

LR_Accuracy_Score : 0.7121418826739427
LR_JaccardIndex : 0.5489806279601199
LR_F1_Score : 0.6464728592076198


### 8.prediction and evaluation using SVM classifier.

In [159]:
from sklearn.svm import SVC
SVM = SVC(kernel= 'linear')
SVM.fit(X_train, y_train)

SVC(kernel='linear')

In [161]:
predictions_SVM = SVM.predict(X_test)

In [163]:
SVM_Accuracy_Score = metrics.accuracy_score(y_test, predictions_SVM)
SVM_JaccardIndex = metrics.jaccard_score(y_test, predictions_SVM, average='weighted')
SVM_F1_Score = metrics.f1_score(y_test, predictions_SVM,average='weighted')
#SVM_Log_Loss = metrics.log_loss(y_test, predictions_SVM)
print("SVM_Accuracy_Score :", SVM_Accuracy_Score)
print("SVM_JaccardIndex :" , SVM_JaccardIndex)
print("SVM_F1_Score :", SVM_F1_Score)
#print("SVM_Log_Loss :", SVM_Log_Loss)

SVM_Accuracy_Score : 0.7121418826739427
SVM_JaccardIndex : 0.5619719945294624
SVM_F1_Score : 0.6529937230009057


In [164]:
err = {'KNN':[KNN_Accuracy_Score,KNN_JaccardIndex,KNN_F1_Score],
        'Tree':[Tree_Accuracy_Score,Tree_JaccardIndex,Tree_F1_Score],
        'LR': [LR_Accuracy_Score,LR_JaccardIndex,LR_F1_Score],
        'SVM':[SVM_Accuracy_Score,SVM_JaccardIndex,SVM_F1_Score]}
errors = pd.DataFrame(data=err, index=['Accuracy_score','Jaccard_index', 'F1_score'])

In [165]:
errors

Unnamed: 0,KNN,Tree,LR,SVM
Accuracy_score,0.709413,0.699864,0.712142,0.712142
Jaccard_index,0.575613,0.540377,0.548981,0.561972
F1_score,0.675918,0.636277,0.646473,0.652994


## Therefore here we can see that Random forest classifier fits best for diabetic retinopathy.