First, install the Roboflow SDK
!pip install roboflow

In [10]:

from roboflow import Roboflow

# Initialize Roboflow with your API key
rf = Roboflow(api_key="79dhzfFvs8UNcpV3EWLU")

# Access the specific project and version in the Roboflow workspace
project = rf.workspace("diabetic-retinopathy-efigv").project("diabetic-retinopathy-hvhiu")

# Access the specific version of the project
version = project.version(1)

# Download the dataset in "multiclass" format
dataset = version.download("multiclass")


loading Roboflow workspace...
loading Roboflow project...


 importing all the necessary libraries and modules to build, train, and optimize a deep learning model for image classification, diabetic retinopathy detection.

In [14]:
import os  # Used for interacting with the operating system, like file paths and directories.
import pandas as pd  # Used for reading and handling data in tabular form (CSV, Excel, etc.).
import numpy as np  # Used for numerical operations and handling arrays.
import cv2  # OpenCV library used for image processing tasks.
import tensorflow as tf  # Imports TensorFlow, the deep learning framework.
from tensorflow.keras.applications import EfficientNetB0  # Imports a pre-trained EfficientNetB0 model for transfer learning.
from tensorflow.keras.models import Model, load_model  # Used to define custom models and load saved models.
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D  # Layers used to modify and extend the pre-trained model.
from tensorflow.keras.optimizers import Adam  # Imports the Adam optimizer for training the model.
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau  # Callbacks to improve training (stop early, save best model, adjust learning rate).
from tensorflow.keras.preprocessing.image import ImageDataGenerator  # Used for real-time data augmentation and preprocessing.

# Constants
IMG_SIZE = (224, 224)  # Target size of input images
BATCH_SIZE = 32  # Number of samples processed before model update
EPOCHS = 50  # Number of complete passes through the training dataset
LEARNING_RATE = 0.0001  # Learning rate for optimizer
MODEL_SAVE_PATH = 'C:\\Users\\narra\\OneDrive\\Desktop\\My personal Files\\MY projects\\diabetic_retinopathy_app\\diabetic_retinopathy_app\\diabetic_retinopathy_model.h5'  # Path to save the trained model

# 1. Data Preparation
def load_data():
    """Load data from your exact folder structure"""
    base_path = r'C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic\DIABETIC-RETINOPATHY-1'  # Root directory for dataset

    # Load CSV Files
    train_df = pd.read_csv(os.path.join(base_path, 'train', '_classes.csv'))  # Load training data CSV
    test_df = pd.read_csv(os.path.join(base_path, 'test', '_classes.csv'))  # Load testing data CSV
    valid_df = pd.read_csv(os.path.join(base_path, 'valid', '_classes.csv'))  # Load validation data CSV

    # Create full image paths (images are in same folder as CSVs)
    train_df['full_path'] = train_df['filename'].apply(lambda x: os.path.join(base_path, 'train', x))  # Add full image path for training data
    test_df['full_path'] = test_df['filename'].apply(lambda x: os.path.join(base_path, 'test', x))  # Add full image path for testing data
    valid_df['full_path'] = valid_df['filename'].apply(lambda x: os.path.join(base_path, 'valid', x))  # Add full image path for validation data

    # Verify all images exist
    for name, df in [('Train', train_df), ('Test', test_df), ('Valid', valid_df)]:
        df['exists'] = df['full_path'].apply(lambda x: os.path.exists(x))  # Check if image file exists
        missing = len(df[~df['exists']])  # Count missing images
        if missing > 0:
            print(f"{name}: Missing {missing} images. First missing: {df[~df['exists']]['full_path'].iloc[0]}")  # Print warning if missing
        df = df[df['exists']].drop(columns=['exists'])  # Drop non-existing entries

    classes = [col for col in train_df.columns if col not in ['filename', 'full_path', 'exists']]  # Extract class labels

    return train_df, test_df, valid_df, classes  # Return dataframes and class names

# 2. Data Generators
def create_generators(train_df, valid_df, test_df, classes):
    """Create data generators using your exact paths"""
    train_datagen = ImageDataGenerator(  # Define training data augmentation
        rescale=1./255,  # Normalize pixel values
        rotation_range=20,  # Randomly rotate images
        width_shift_range=0.2,  # Random horizontal shift
        height_shift_range=0.2,  # Random vertical shift
        shear_range=0.2,  # Apply shearing
        zoom_range=0.2,  # Random zoom
        horizontal_flip=True,  # Random horizontal flip
        fill_mode='nearest'  # Fill missing pixels
    )
    valtest_datagen = ImageDataGenerator(rescale=1./255)  # Validation/test data rescaling
    train_generator = train_datagen.flow_from_dataframe(  # Create train generator
        dataframe=train_df,
        x_col='full_path',
        y_col=classes,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='raw',  # Use raw labels (multi-label)
        shuffle=True
    )

    valid_generator = valtest_datagen.flow_from_dataframe(  # Create validation generator
        dataframe=valid_df,
        x_col='full_path',
        y_col=classes,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='raw',
        shuffle=False
    )
    test_generator = valtest_datagen.flow_from_dataframe(  # Create test generator
        dataframe=test_df,
        x_col='full_path',
        y_col=classes,
        target_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        class_mode='raw',
        shuffle=False
    )
    return train_generator, valid_generator, test_generator  # Return all generators

# 3. Model Creation
def create_model(num_classes):
    """Create and compile the model"""
    base_model = EfficientNetB0(  # Load EfficientNetB0 base model
        weights='imagenet',
        include_top=False,
        input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)
    )
    # Freeze base model layers
    base_model.trainable = False  # Prevent the base model layers from being updated during training

    inputs = tf.keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))  # Define the input layer with the image size
    x = base_model(inputs, training=False)  # Pass input through the base model (set training=False to keep base model frozen)

    x = GlobalAveragePooling2D()(x)  # Global average pooling layer
    x = Dense(1024, activation='relu')(x)  # Dense layer with ReLU activation
    outputs = Dense(num_classes, activation='sigmoid')(x)  # Output layer for multi-label classification
    model = Model(inputs, outputs)  # Create model
    model.compile(  # Compile the model
        optimizer=Adam(learning_rate=LEARNING_RATE),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    return model  # Return compiled model

# 4. Training
def train_model(model, train_generator, valid_generator):
    """Train the model with callbacks"""
    callbacks = [  # Define training callbacks
        EarlyStopping(monitor='val_loss', patience=5, verbose=1),  # Stop training early if no improvement
        ModelCheckpoint(  # Save best model based on validation loss
            MODEL_SAVE_PATH,
            monitor='val_loss',
            save_best_only=True,
            verbose=1
        ),
        ReduceLROnPlateau(  # Reduce learning rate if validation loss plateaus
            monitor='val_loss',
            factor=0.1,
            patience=3,
            verbose=1,
            min_lr=1e-6
        )
    ]

    history = model.fit(  # Train the model
        train_generator,
        steps_per_epoch=len(train_generator),
        epochs=EPOCHS,
        validation_data=valid_generator,
        validation_steps=len(valid_generator),
        callbacks=callbacks,
        verbose=1
    )

    return history  # Return training history

# 5. Evaluation
def evaluate_model(model, test_generator):
    """Evaluate model on test set"""
    results = model.evaluate(test_generator, steps=len(test_generator))  # Evaluate on test data

    print(f"\nTest Evaluation:")
    print(f"Loss: {results[0]:.4f}")  # Print loss
    print(f"Accuracy: {results[1]:.4f}")  # Print accuracy
    print(f"AUC: {results[2]:.4f}")  # Print AUC

    return results  # Return results

# 6. Prediction Function
def predict_image(model, image_path, classes, threshold=0.5):
    """Make prediction for a single image"""
    img = cv2.imread(image_path)  # Read image from file

    if img is None:
        raise ValueError(f"Could not read image at {image_path}")  # Error if image can't be read
    img = cv2.resize(img, IMG_SIZE)  # Resize image to input size
    img = img / 255.0  # Normalize image
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    predictions = model.predict(img)[0]  # Get model predictions
    output = {"predictions": {}}  # Initialize output
    for i, class_name in enumerate(classes):  # Loop through classes
        confidence = float(predictions[i])  # Get confidence
        output["predictions"][class_name] = {  # Store prediction
            "confidence": confidence,
            "class_id": i,
            "present": confidence > threshold
        }
    return output  # Return dictionary of predictions

# Main Execution
if __name__ == "__main__":
    print("Loading data...")  # Notify data loading start

    train_df, test_df, valid_df, classes = load_data()  # Load dataset

    print("\nData Summary:")
    print(f"Classes: {classes}")  # Print class labels
    print(f"Train samples: {len(train_df)}")  # Number of train samples
    print(f"Validation samples: {len(valid_df)}")  # Number of validation samples
    print(f"Test samples: {len(test_df)}")  # Number of test samples

    print("\nCreating data generators...")  # Notify generator creation
    train_gen, valid_gen, test_gen = create_generators(train_df, valid_df, test_df, classes)  # Create generators

    print("\nCreating model...")  # Notify model creation
    model = create_model(len(classes))  # Create model

    model.summary()  # Print model summary

    print("\nStarting training...")  # Notify training start
    history = train_model(model, train_gen, valid_gen)  # Train model

    print("\nEvaluating on test set...")  # Notify test start
    evaluate_model(model, test_gen)  # Evaluate model

    print(f"\nSaving model to {MODEL_SAVE_PATH}")  # Notify model saving
    model.save(MODEL_SAVE_PATH)  # Save the model

    sample_image = test_df.iloc[0]['full_path']  # Pick a sample image
    print(f"\nMaking sample prediction for: {sample_image}")  # Notify prediction

    prediction = predict_image(model, sample_image, classes)  # Predict sample

    print("Prediction results:")  # Show prediction results
    for class_name, pred in prediction["predictions"].items():  # Loop through predictions
        print(f"{class_name}: {pred['confidence']:.2f} ({'Present' if pred['present'] else 'Absent'})")  # Display prediction


Loading data...

Data Summary:
Classes: [' Advanced', ' Cotton Wool Spot', ' Hard Exudates', ' Hemorrhages', ' Micro-aneurysms', ' Moderate', ' Moderate DR', ' NPDR', ' No DR', ' PDR', ' Severe', ' Severe  DR', ' Severe DR', ' Soft Cotton Wool', ' Soft Exudates', ' Very', ' hemorrhages']
Train samples: 241
Validation samples: 71
Test samples: 36

Creating data generators...
Found 241 validated image filenames.
Found 71 validated image filenames.
Found 36 validated image filenames.

Creating model...



Starting training...


Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.


Epoch 1/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.0524 - auc: 0.5782 - loss: 0.6365
Epoch 1: val_loss improved from inf to 0.48914, saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 3s/step - accuracy: 0.0577 - auc: 0.5850 - loss: 0.6313 - val_accuracy: 0.1268 - val_auc: 0.7880 - val_loss: 0.4891 - learning_rate: 1.0000e-04
Epoch 2/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1557 - auc: 0.7897 - loss: 0.4707
Epoch 2: val_loss improved from 0.48914 to 0.39358, saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.1578 - auc: 0.7909 - loss: 0.4680 - val_accuracy: 0.1268 - val_auc: 0.8269 - val_loss: 0.3936 - learning_rate: 1.0000e-04
Epoch 3/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1690 - auc: 0.8303 - loss: 0.3869
Epoch 3: val_loss improved from 0.39358 to 0.34825, saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.1696 - auc: 0.8303 - loss: 0.3856 - val_accuracy: 0.1268 - val_auc: 0.8352 - val_loss: 0.3483 - learning_rate: 1.0000e-04
Epoch 4/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1707 - auc: 0.8330 - loss: 0.3427
Epoch 4: val_loss improved from 0.34825 to 0.32919, saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1s/step - accuracy: 0.1711 - auc: 0.8337 - loss: 0.3426 - val_accuracy: 0.1268 - val_auc: 0.8408 - val_loss: 0.3292 - learning_rate: 1.0000e-04
Epoch 5/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1676 - auc: 0.8580 - loss: 0.3248
Epoch 5: val_loss improved from 0.32919 to 0.32021, saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.1683 - auc: 0.8564 - loss: 0.3250 - val_accuracy: 0.1268 - val_auc: 0.8408 - val_loss: 0.3202 - learning_rate: 1.0000e-04
Epoch 6/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1597 - auc: 0.8410 - loss: 0.3265
Epoch 6: val_loss improved from 0.32021 to 0.31471, saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.1609 - auc: 0.8419 - loss: 0.3257 - val_accuracy: 0.1268 - val_auc: 0.8417 - val_loss: 0.3147 - learning_rate: 1.0000e-04
Epoch 7/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2013 - auc: 0.8307 - loss: 0.3159
Epoch 7: val_loss improved from 0.31471 to 0.31195, saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.1983 - auc: 0.8322 - loss: 0.3160 - val_accuracy: 0.1268 - val_auc: 0.8430 - val_loss: 0.3119 - learning_rate: 1.0000e-04
Epoch 8/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1529 - auc: 0.8549 - loss: 0.3095
Epoch 8: val_loss improved from 0.31195 to 0.31095, saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1s/step - accuracy: 0.1543 - auc: 0.8541 - loss: 0.3101 - val_accuracy: 0.1268 - val_auc: 0.8414 - val_loss: 0.3110 - learning_rate: 1.0000e-04
Epoch 9/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1940 - auc: 0.8477 - loss: 0.3083
Epoch 9: val_loss improved from 0.31095 to 0.30945, saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.1918 - auc: 0.8477 - loss: 0.3090 - val_accuracy: 0.1268 - val_auc: 0.8418 - val_loss: 0.3095 - learning_rate: 1.0000e-04
Epoch 10/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2153 - auc: 0.8548 - loss: 0.3132
Epoch 10: val_loss improved from 0.30945 to 0.30907, saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.2107 - auc: 0.8540 - loss: 0.3132 - val_accuracy: 0.1268 - val_auc: 0.8426 - val_loss: 0.3091 - learning_rate: 1.0000e-04
Epoch 11/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1829 - auc: 0.8389 - loss: 0.3154
Epoch 11: val_loss improved from 0.30907 to 0.30856, saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.1820 - auc: 0.8399 - loss: 0.3151 - val_accuracy: 0.1268 - val_auc: 0.8413 - val_loss: 0.3086 - learning_rate: 1.0000e-04
Epoch 12/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1977 - auc: 0.8582 - loss: 0.3102
Epoch 12: val_loss did not improve from 0.30856
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1s/step - accuracy: 0.1946 - auc: 0.8569 - loss: 0.3105 - val_accuracy: 0.1268 - val_auc: 0.8412 - val_loss: 0.3096 - learning_rate: 1.0000e-04
Epoch 13/50
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.1544 - auc: 0.8526 - loss: 0.3143 
Epoch 13: val_loss did not improve from 0.30856
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2s/step - accuracy: 0.1566 - auc: 0.8521 - loss: 0.3142 - val_accuracy: 0.1268 - val_auc: 0.8411 - val_loss: 0.3086 - learning_rate: 1.0000e-04
Epoch 14/50
[1m8/8[0




Test Evaluation:
Loss: 0.2929
Accuracy: 0.1389
AUC: 0.9015

Saving model to C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\diabetic_retinopathy_model.h5

Making sample prediction for: C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic\DIABETIC-RETINOPATHY-1\test\313_jpg.rf.39db4e4d483f6f3d39c344a8d395210b.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
Prediction results:
 Advanced: 0.14 (Absent)
 Cotton Wool Spot: 0.01 (Absent)
 Hard Exudates: 0.26 (Absent)
 Hemorrhages: 0.20 (Absent)
 Micro-aneurysms: 0.50 (Absent)
 Moderate: 0.01 (Absent)
 Moderate DR: 0.10 (Absent)
 NPDR: 0.03 (Absent)
 No DR: 0.13 (Absent)
 PDR: 0.24 (Absent)
 Severe: 0.03 (Absent)
 Severe  DR: 0.01 (Absent)
 Severe DR: 0.49 (Absent)
 Soft Cotton Wool: 0.44 (Absent)
 Soft Exudates: 0.01 (Absent)
 Very: 0.01 (Absent)
 hemorrhages: 0.01 (Absent)


**Model Summary Review**

• **Base Model**: I am using EfficientNetB0, which is a solid pre-trained architecture. It's frozen (non-trainable), which is good for transfer learning at initial stages.

• **Trainable Params**: Only the last layers (dense, dense_1) are trainable — 1.3M params.

• **Output**: Our final layer has 17 units, implying a multi-class classification problem with 17 classes.

**Key Training Observations**
| Metric         | Observation                                                       |
| -------------- | ----------------------------------------------------------------- |
| `accuracy`     | Very low — stays around 0.01–0.17 during first 6 epochs.          |
| `val_accuracy` | Even worse — stuck around 0.12 or even drops to 0.02.             |
| `auc`          | Improving well from 0.58 → 0.84, both in training and validation. |
| `loss`         | Decreasing nicely — from 0.65 → 0.31 across epochs.               |
| `val_loss`     | Also decreasing — a positive sign for generalization.             |


**Interpretation**

• The model is learning: Both loss and AUC are improving significantly.

• However, accuracy is very low, which may be caused by:

**1. Label imbalance:** Some classes might dominate; accuracy fails in such cases.

**2. Multi-label confusion:** Are our labels multi-class (only one label per image) or multi-label (multiple labels per image)? If it's multi-label, you shouldn't use accuracy as-is.

**3. Incorrect label encoding:** One-hot vs. integer labels — this must match your loss function.

**4. Wrong loss:** Are you using categorical_crossentropy or sparse_categorical_crossentropy? It must match your label format.

**5. Small dataset:** Only 8 training steps per epoch hints that your dataset may be very small.


**Suggestions**

1. **Check our label format:**

     • If labels are integers: Use sparse_categorical_crossentropy.

     • If labels are one-hot vectors: Use categorical_crossentropy.

2. **Use appropriate metrics:**

     • For imbalanced data, use AUC, F1-score, or balanced_accuracy, not just accuracy.

3. **Unfreeze EfficientNetB0 later:**

     • After a few epochs, consider unfreezing part of EfficientNet to fine-tune on your data.

4. **Use class weights:**

     • If class imbalance exists, compute and pass class_weight to model.fit(...).

5. **Try data augmentation:**

     • Add rotation, zoom, flip, etc., to help generalize better.

6. **Track confusion matrix:**

     • To debug misclassifications, especially for underperforming classes.



**Diabetic Retinopathy Prediction Script**

In [33]:
# Import required libraries
import os  # For interacting with the file system (checking file existence, extracting file name)
import cv2  # OpenCV library for image processing (reading, resizing images)
import numpy as np  # NumPy for numerical operations (like image normalization, reshaping)
from tensorflow.keras.models import load_model  # TensorFlow Keras to load the trained model

# 1. Load your saved model
model_path = r"C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic_retinopathy_app\diabetic_retinopathy_app\model\diabetic_retinopathy_model.h5"  # Path to the trained model file
model = load_model(model_path)  # Load the trained Keras model from disk into the model variable

# 2. Define your classes (must match the class order used during model training)
classes = [
    'Advanced', 'Cotton Wool Spot', 'Hard Exudates', 'Hemorrhages',
    'Micro-aneurysms', 'Moderate', 'Moderate DR', 'NPDR',
    'No DR', 'PDR', 'Severe', 'Severe DR', 'Soft Cotton Wool',
    'Soft Exudates', 'Very', 'hemorrhages'
]  # The class labels are defined in the same order as in the model training

# 3. Set the image size used during model training
IMG_SIZE = (224, 224)  # The input size expected by the model (e.g., 224x224)

# 4. Define the prediction function for diabetic retinopathy prediction
def predict_diabetic_retinopathy(image_path, confidence_threshold=0.5):
    """
    Predict diabetic retinopathy conditions from an image.
    Args:
        image_path: Path to the retinal image
        confidence_threshold: Minimum confidence threshold for labeling a condition as present (0-1)
    Returns:
        Dictionary with prediction results for each condition
    """

    # Check if the image file exists
    if not os.path.exists(image_path):  # Ensure the image path is valid and the file exists
        raise FileNotFoundError(f"Image not found: {image_path}")  # Raise an error if the image is not found

    # Load the image using OpenCV
    img = cv2.imread(image_path)  # Read the image at the specified path
    if img is None:  # Check if OpenCV was able to load the image
        raise ValueError(f"Could not read image at {image_path}")  # Raise an error if the image could not be loaded

    # Resize and preprocess the image to match model's input size
    img = cv2.resize(img, IMG_SIZE)  # Resize the image to the model's required input size
    img = img / 255.0  # Normalize pixel values to the range [0, 1] (scaling)
    img = np.expand_dims(img, axis=0)  # Add a batch dimension to match the model's input shape (e.g., [1, height, width, channels])

    # Make predictions using the trained model
    predictions = model.predict(img)[0]  # Get predictions for the single image (since batch size is 1)

    # Prepare a dictionary to store results
    results = {
        "image": os.path.basename(image_path),  # Extracts the image file name from the full path
        "predictions": {},  # Dictionary to store predictions for each condition
        "summary": {  # Summary of the most likely condition and highest confidence
            "most_likely_condition": None,
            "highest_confidence": 0.0
        }
    }

    # Iterate over the classes and store prediction results for each
    for class_name, confidence in zip(classes, predictions):  # Zip pairs class names with corresponding confidence values
        is_present = confidence > confidence_threshold  # Determine if the condition is present based on the threshold
        results["predictions"][class_name] = {  # Store confidence and presence status for each condition
            "confidence": float(confidence),  # Store confidence value as a float
            "present": is_present  # Boolean indicating if the condition is present
        }
        # Track the most likely condition (highest confidence)
        if confidence > results["summary"]["highest_confidence"]:
            results["summary"]["highest_confidence"] = confidence  # Update highest confidence score
            results["summary"]["most_likely_condition"] = class_name  # Update the most likely condition

    return results  # Return the final results dictionary with predictions

# 5. Example usage with an image path
test_image_path = r"C:\Users\narra\OneDrive\Desktop\My personal Files\MY projects\diabetic\DIABETIC-RETINOPATHY-1\train\25_jpg.rf.087bb47fe927b1ea14a8d4cc64c4f90c.jpg"  # Path to the test image for prediction

# Get prediction results for the image
results = predict_diabetic_retinopathy(test_image_path)  # Call the prediction function with the test image path

# Display the summary of the most likely condition
print(f"\nPrediction results for {results['image']}:")  # Print the image file name
print(f"Most likely condition: {results['summary']['most_likely_condition']} "
      f"(confidence: {results['summary']['highest_confidence'] * 100:.2f}%)")  # Print the most likely condition and its confidence percentage

# Display detailed predictions for all conditions
print("\nDetailed predictions:")
for condition, data in results["predictions"].items():  # Iterate over each condition and its associated prediction data
    status = "present" if data["present"] else "absent"  # Determine if the condition is present based on confidence threshold
    print(f"- {condition:20}: {data['confidence'] * 100:.2f}% ({status})")  # Print each condition with confidence and presence status




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step

Prediction results for 25_jpg.rf.087bb47fe927b1ea14a8d4cc64c4f90c.jpg:
Most likely condition: Soft Cotton Wool (confidence: 49.56%)

Detailed predictions:
- Advanced            : 45.36% (absent)
- Cotton Wool Spot    : 35.88% (absent)
- Hard Exudates       : 47.16% (absent)
- Hemorrhages         : 45.63% (absent)
- Micro-aneurysms     : 49.31% (absent)
- Moderate            : 37.16% (absent)
- Moderate DR         : 43.66% (absent)
- NPDR                : 41.16% (absent)
- No DR               : 43.67% (absent)
- PDR                 : 46.58% (absent)
- Severe              : 39.53% (absent)
- Severe DR           : 36.03% (absent)
- Soft Cotton Wool    : 49.56% (absent)
- Soft Exudates       : 48.83% (absent)
- Very                : 34.55% (absent)
- hemorrhages         : 36.54% (absent)
