In [2]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Set the paths to the dataset directories
train_dir = "./retina_dataset/Training_Set/Training_Set/Training"
validation_dir = "./retina_dataset/Evaluation_Set/Evaluation_Set/Validation"
test_dir = "./retina_dataset/Test_Set/Test_Set/Test"

# Load metadata from CSV file
train_df = pd.read_csv("./retina_dataset/Training_Set/Training_Set/RFMiD_Training_Labels.csv")
validation_df = pd.read_csv("./retina_dataset/Evaluation_Set/Evaluation_Set/RFMiD_Validation_Labels.csv")
test_df = pd.read_csv("./retina_dataset/Test_Set/Test_Set/RFMiD_Testing_Labels.csv")

# Define image dimensions and batch size
img_height, img_width = 224, 224
batch_size = 32


In [18]:
print("Train DataFrame:")
print(train_df.head())
print()

print("Validation DataFrame:")
print(validation_df.head())
print()

print("Test DataFrame:")
print(test_df.head())
print()

Train DataFrame:
      ID Disease_Risk  DR  ARMD  MH  DN  MYA  BRVO  TSLN  ERM  ...  CME  PTCR  \
0  1.png            1   1     0   0   0    0     0     0    0  ...    0     0   
1  2.png            1   1     0   0   0    0     0     0    0  ...    0     0   
2  3.png            1   1     0   0   0    0     0     0    0  ...    0     0   
3  4.png            1   0     0   1   0    0     0     0    0  ...    0     0   
4  5.png            1   1     0   0   0    0     0     0    0  ...    0     0   

   CF  VH  MCA  VS  BRAO  PLQ  HPED  CL  
0   0   0    0   0     0    0     0   0  
1   0   0    0   0     0    0     0   0  
2   0   0    0   0     0    0     0   0  
3   0   0    0   0     0    0     0   0  
4   0   0    0   0     0    0     0   0  

[5 rows x 47 columns]

Validation DataFrame:
      ID Disease_Risk  DR  ARMD  MH  DN  MYA  BRVO  TSLN  ERM  ...  CME  PTCR  \
0  1.png            1   1     0   0   0    0     0     1    0  ...    0     0   
1  2.png            1   0     0   0 

In [9]:
# Convert values in the ID column to strings
train_df['ID'] = train_df['ID'].astype(str)
validation_df['ID'] = validation_df['ID'].astype(str)
test_df['ID'] = test_df['ID'].astype(str)

# Ensure that values in "Disease_Risk" column are strings
train_df['Disease_Risk'] = train_df['Disease_Risk'].astype(str)
validation_df['Disease_Risk'] = validation_df['Disease_Risk'].astype(str)
test_df['Disease_Risk'] = test_df['Disease_Risk'].astype(str)

In [17]:
train_df['ID'] = train_df['ID'].apply(lambda x: x + ".png")
validation_df['ID'] = validation_df['ID'].apply(lambda x: x + ".png")
test_df['ID'] = test_df['ID'].apply(lambda x: x + ".png")

In [19]:

# Data augmentation and normalization for training
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Normalization for validation and testing
validation_datagen = ImageDataGenerator(rescale=1.0 / 255)
test_datagen = ImageDataGenerator(rescale=1.0 / 255)

# Load images from directories and apply data augmentation
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=train_dir,
    x_col="ID",
    y_col="Disease_Risk",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
)

validation_generator = validation_datagen.flow_from_dataframe(
    dataframe=validation_df,
    directory=validation_dir,
    x_col="ID",
    y_col="Disease_Risk",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory=test_dir,
    x_col="ID",
    y_col="Disease_Risk",
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary'
)


Found 1920 validated image filenames belonging to 2 classes.
Found 640 validated image filenames belonging to 2 classes.
Found 640 validated image filenames belonging to 2 classes.


In [20]:

# Build the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dense(1, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [21]:

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // batch_size,
    epochs=10
)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_generator, steps=test_generator.samples // batch_size)
print('Test accuracy:', test_acc)


Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: 