<a href="https://colab.research.google.com/github/Harshkotkar/Deep-Learning/blob/main/Early_Detection_of_Diabetic_Retinopathy_from_Retinal_Fundus_Images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Early Detection of Diabetic Retinopathy from Retinal Fundus Images

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, applications
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from PIL import Image
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, cohen_kappa_score
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Define the dataset class
class DRDataset:
    def __init__(self, csv_file, img_dir, img_size=(256, 256)):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.img_size = img_size

        # Calculate class weights for handling imbalance
        class_counts = self.data['diagnosis'].value_counts().sort_index()
        total_samples = len(self.data)
        self.class_weights = {i: total_samples / (5 * count) for i, count in enumerate(class_counts)}

    def __len__(self):
        return len(self.data)

    def load_image(self, img_name):
        img_path = os.path.join(self.img_dir, img_name + '.png')
        image = Image.open(img_path).convert('RGB')
        return image

    def preprocess_image(self, image):
        # Crop black borders
        image = np.array(image)
        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY)
        contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        if contours:
            cnt = contours[0]
            x, y, w, h = cv2.boundingRect(cnt)
            image = image[y:y+h, x:x+w]

        # Resize and normalize
        image = Image.fromarray(image)
        image = image.resize(self.img_size)
        image = np.array(image) / 255.0
        return image

    def get_class_weights(self):
        return self.class_weights

# Create data generators
def create_data_generators(csv_path, img_dir, batch_size=32, validation_split=0.2, img_size=(256, 256)):
    # Load dataset
    dataset = DRDataset(csv_path, img_dir, img_size)

    # Split data
    train_df, val_df = train_test_split(
        dataset.data,
        test_size=validation_split,
        stratify=dataset.data['diagnosis'],
        random_state=42
    )

    # Get class weights
    class_weights = dataset.get_class_weights()

    # Create data generators
    train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        preprocessing_function=None,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        brightness_range=[0.8, 1.2],
        zoom_range=0.2,
        fill_mode='constant',
        cval=0
    )

    val_datagen = tf.keras.preprocessing.image.ImageDataGenerator()

    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        directory=img_dir,
        x_col='id_code',
        y_col='diagnosis',
        target_size=img_size,
        batch_size=batch_size,
        class_mode='raw',
        shuffle=True,
        seed=42
    )

    val_generator = val_datagen.flow_from_dataframe(
        dataframe=val_df,
        directory=img_dir,
        x_col='id_code',
        y_col='diagnosis',
        target_size=img_size,
        batch_size=batch_size,
        class_mode='raw',
        shuffle=False
    )

    return train_generator, val_generator, class_weights

# Load dataset and analyze class distribution
csv_path = 'train.csv'  # Update with your path
img_dir = 'train_images'  # Update with your path

dataset = DRDataset(csv_path, img_dir)
print(f"Dataset size: {len(dataset)}")

# Analyze class distribution
class_counts = dataset.data['diagnosis'].value_counts().sort_index()
print("Class distribution:")
for i, count in enumerate(class_counts):
    print(f"Class {i}: {count} samples ({count/len(dataset)*100:.2f}%)")

plt.figure(figsize=(10, 6))
sns.barplot(x=class_counts.index, y=class_counts.values)
plt.title('Class Distribution in APTOS 2019 Dataset')
plt.xlabel('DR Severity Level')
plt.ylabel('Number of Images')
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'train.csv'