In [1]:
import kagglehub

# Download Dataset
dataset_path = kagglehub.dataset_download("kushagratandon12/diabetic-retinopathy-balanced")
print("Dataset downloaded to:", dataset_path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/kushagratandon12/diabetic-retinopathy-balanced?dataset_version_number=1...


100%|██████████| 1.90G/1.90G [00:27<00:00, 73.6MB/s]

Extracting files...





Dataset downloaded to: /root/.cache/kagglehub/datasets/kushagratandon12/diabetic-retinopathy-balanced/versions/1


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# This code is used to navigate the dataset folder and correcting real path
import os

dataset_folder = "/root/.cache/kagglehub/datasets/kushagratandon12/diabetic-retinopathy-balanced/versions/1/content/Diabetic_Balanced_Data"

# List and print folder names
folder_names = [f.name for f in os.scandir(dataset_folder) if f.is_dir()]
print("Folders inside the dataset folder:", folder_names)



Folders inside the dataset folder: ['train', 'val', 'test']


In [4]:
import os
from collections import Counter

# Dataset Path
DATASET_DIR = "/root/.cache/kagglehub/datasets/kushagratandon12/diabetic-retinopathy-balanced/versions/1/content/Diabetic_Balanced_Data"

# Function to count images in each class (subfolder)
def count_images_in_directory(directory):
    class_counts = Counter()
    for class_name in os.listdir(directory):
        class_path = os.path.join(directory, class_name)
        if os.path.isdir(class_path):
            # Count the number of images in the class folder
            class_counts[class_name] = len([f for f in os.listdir(class_path) if f.endswith(('.jpg', '.jpeg', '.png'))])
    return class_counts

# Counting images in train, test, and valid directories
train_dir = os.path.join(DATASET_DIR, 'train')
valid_dir = os.path.join(DATASET_DIR, 'val')
test_dir = os.path.join(DATASET_DIR, 'test')

train_class_counts = count_images_in_directory(train_dir)
valid_class_counts = count_images_in_directory(valid_dir)
test_class_counts = count_images_in_directory(test_dir)

# Print counts
print("Class counts in Train directory:")
for class_name, count in train_class_counts.items():
    print(f"{class_name}: {count} images")

print("\nClass counts in Valid directory:")
for class_name, count in valid_class_counts.items():
    print(f"{class_name}: {count} images")

print("\nClass counts in Test directory:")
for class_name, count in test_class_counts.items():
    print(f"{class_name}: {count} images")


Class counts in Train directory:
3: 7000 images
4: 7000 images
1: 6792 images
2: 7000 images
0: 7000 images

Class counts in Valid directory:
3: 2000 images
4: 2000 images
1: 1940 images
2: 2000 images
0: 2000 images

Class counts in Test directory:
3: 1000 images
4: 1000 images
1: 971 images
2: 1000 images
0: 1000 images


In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

# Fix random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Dataset Paths
DATASET_DIR = "/root/.cache/kagglehub/datasets/kushagratandon12/diabetic-retinopathy-balanced/versions/1/content/Diabetic_Balanced_Data"
TRAIN_DIR = f"{DATASET_DIR}/train"
VAL_DIR = f"{DATASET_DIR}/val"
TEST_DIR = f"{DATASET_DIR}/test"

# Hyperparameters
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS = 20
LEARNING_RATE = 0.0001

# Data Augmentation for Training and Normalization for Validation/Test
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_test_datagen = ImageDataGenerator(rescale=1.0 / 255)

# Load Data
train_data = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    seed=42
)

val_data = val_test_datagen.flow_from_directory(
    VAL_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    seed=42
)

test_data = val_test_datagen.flow_from_directory(
    TEST_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    seed=42
)


# Model Definition
base_model = EfficientNetB0(include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3), weights="imagenet")
base_model.trainable = True

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(train_data.num_classes, activation='softmax')
])

# Compile Model
model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Train the Model
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=EPOCHS,
)

# Evaluate the Model
val_loss, val_accuracy = model.evaluate(test_data)
print(f"Validation Accuracy: {val_accuracy:.2f}")

# Save the Model
model.save('/content/drive/MyDrive/final_model_20_epoch.h5')


Found 34792 images belonging to 5 classes.
Found 9940 images belonging to 5 classes.
Found 4971 images belonging to 5 classes.
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/20


  self._warn_if_super_not_called()


[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m710s[0m 571ms/step - accuracy: 0.4198 - loss: 1.3115 - val_accuracy: 0.5819 - val_loss: 0.9970
Epoch 2/20
[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m559s[0m 506ms/step - accuracy: 0.5550 - loss: 1.0252 - val_accuracy: 0.6032 - val_loss: 0.9414
Epoch 3/20
[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m551s[0m 504ms/step - accuracy: 0.5965 - loss: 0.9362 - val_accuracy: 0.6130 - val_loss: 0.8883
Epoch 4/20
[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m563s[0m 506ms/step - accuracy: 0.6326 - loss: 0.8635 - val_accuracy: 0.6736 - val_loss: 0.7845
Epoch 5/20
[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m565s[0m 517ms/step - accuracy: 0.6621 - loss: 0.7952 - val_accuracy: 0.6910 - val_loss: 0.7272
Epoch 6/20
[1m1088/1088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m603s[0m 499ms/step - accuracy: 0.6808 - loss: 0.7374 - val_accuracy: 0.6886 - val_loss: 0.7350
Epo



Validation Accuracy: 0.80
