In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB7
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix

In [3]:
# Load CSV data
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [4]:
train_df.head()

Unnamed: 0,id_code,diagnosis
0,000c1434d8d7,2
1,001639a390f0,4
2,0024cdab0c1e,1
3,002c21358ce6,0
4,005b95c28852,0


In [5]:
test_df.head()

Unnamed: 0,id_code
0,0005cfc8afb6
1,003f0afdcd15
2,006efc72b638
3,00836aaacf06
4,009245722fa4


In [6]:
train_df.describe()

Unnamed: 0,diagnosis
count,3662.0
mean,1.12698
std,1.298409
min,0.0
25%,0.0
50%,1.0
75%,2.0
max,4.0


In [7]:
test_df.describe()

Unnamed: 0,id_code
count,1928
unique,1928
top,0005cfc8afb6
freq,1


In [8]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3662 entries, 0 to 3661
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id_code    3662 non-null   object
 1   diagnosis  3662 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 57.3+ KB


In [9]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1928 entries, 0 to 1927
Data columns (total 1 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   id_code  1928 non-null   object
dtypes: object(1)
memory usage: 15.2+ KB


In [10]:
train_df.dtypes

id_code      object
diagnosis     int64
dtype: object

In [11]:
test_df.dtypes

id_code    object
dtype: object

In [12]:
# Load and preprocess images with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [13]:
# Convert 'diagnosis' column to string format
train_df['diagnosis'] = train_df['diagnosis'].astype(str)

In [14]:
# Update the 'id_code' column to include the '.png' extension
train_df['id_code'] = train_df['id_code'] + '.png'

In [15]:
# Split the data into training and validation sets
train_data, valid_data = train_test_split(train_df, test_size=0.2, random_state=42)

In [16]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='train_images',
    x_col='id_code',
    y_col='diagnosis',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',  
    subset='training'
)


Found 2930 validated image filenames belonging to 5 classes.


In [17]:
valid_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory='train_images',
    x_col='id_code',
    y_col='diagnosis',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',  
    subset='validation'
)

Found 732 validated image filenames belonging to 5 classes.


In [18]:
# Create a more advanced deep neural network
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    BatchNormalization(),
    Conv2D(32, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    
    Conv2D(256, (3, 3), activation='relu'),
    BatchNormalization(),
    Conv2D(256, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2, 2),
    
    Flatten(),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(5, activation='softmax')  # Five output classes for multi-class classification
])

In [19]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [20]:
# Train the model
history = model.fit(train_generator, validation_data=valid_generator, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [21]:
# Update the 'id_code' column to include the '.png' extension
test_df['id_code'] = test_df['id_code'] + '.png'

In [22]:
# Predict diagnosis for the test data and add it to test_df
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory='test_images',
    x_col='id_code',
    y_col=None,
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,
    shuffle=False
)

Found 1928 validated image filenames.


In [23]:
predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)  # Convert softmax outputs to class labels



In [24]:
# Map class labels back to their respective diagnoses
class_labels = {0: 'No DR', 1: 'Mild', 2: 'Moderate', 3: 'Severe', 4: 'Proliferative DR'}
predicted_diagnosis = [class_labels[label] for label in predicted_classes]

In [25]:
# Add the predicted diagnosis to test_df
test_df['diagnosis'] = predicted_diagnosis

# Save the updated test_df with predicted diagnoses to a CSV file
test_df.to_csv('test_with_diagnosis.csv', index=False)

In [26]:
# Evaluate the model on the validation set
validation_datagen = ImageDataGenerator(rescale=1./255)

In [27]:
# Calculate the validation loss and accuracy
validation_loss, validation_accuracy = model.evaluate(valid_generator)



In [28]:
print(f"Validation Loss: {validation_loss:.4f}")
print(f"Validation Accuracy: {validation_accuracy * 100:.2f}%")

Validation Loss: 0.8078
Validation Accuracy: 71.17%
