In [1]:
import os
import random
import numpy as np
import pandas as pd
import seaborn as sns
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix, accuracy_score
from mlxtend.plotting import plot_confusion_matrix
import matplotlib.pyplot as plt
from tqdm import tqdm
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.optimizers import Adam
from sklearn.utils import shuffle
from tensorflow.keras import regularizers

In [2]:
# Load data
df = pd.read_csv('full_df_diabetic2.csv')

In [3]:
df

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Fundus,Right-Fundus,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,...,target,filename,normal,diabetic,glaucoma,cataract,macular degeneration,hypertensive,myopia,other
0,0,69,Female,0_left.jpg,0_right.jpg,['cataract'],['normal fundus'],0,0,0,...,"[1, 0, 0, 0, 0, 0, 0, 0]",0_right.jpg,1,0,0,0,0,0,0,0
1,1,57,Male,1_left.jpg,1_right.jpg,['normal fundus'],['normal fundus'],1,0,0,...,"[1, 0, 0, 0, 0, 0, 0, 0]",1_right.jpg,1,0,0,0,0,0,0,0
2,2,42,Male,2_left.jpg,2_right.jpg,"['laser spot', 'moderate non diabetic retinopa...",['moderate non diabetic retinopathy'],0,1,0,...,"[0, 1, 0, 0, 0, 0, 0, 0]",2_right.jpg,0,1,0,0,0,0,0,0
3,4,53,Male,4_left.jpg,4_right.jpg,['macular epiretinal membrane'],['mild non diabetic retinopathy'],0,1,0,...,"[0, 1, 0, 0, 0, 0, 0, 0]",4_right.jpg,0,1,0,0,0,0,0,0
4,5,50,Female,5_left.jpg,5_right.jpg,['moderate non diabetic retinopathy'],['moderate non diabetic retinopathy'],0,1,0,...,"[0, 1, 0, 0, 0, 0, 0, 0]",5_right.jpg,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6671,4435,67,Male,4435_left.jpg,4435_right.jpg,"['post retinal laser surgery', 'moderate non d...","['post retinal laser surgery', 'diabetic diabe...",0,1,0,...,,4435_left.jpg,0,1,0,0,0,0,0,0
6672,4485,49,Female,4485_left.jpg,4485_right.jpg,"['post retinal laser surgery', 'moderate non d...","['post retinal laser surgery', 'moderate non d...",0,1,0,...,,4485_left.jpg,0,1,0,0,0,0,0,0
6673,4501,62,Female,4501_left.jpg,4501_right.jpg,"['post retinal laser surgery', 'diabetic diabe...","['post retinal laser surgery', 'diabetic diabe...",0,1,0,...,,4501_left.jpg,0,1,0,0,0,0,0,0
6674,4598,64,Male,4598_left.jpg,4598_right.jpg,"['post retinal laser surgery', 'severe non dia...","['post retinal laser surgery', 'severe non dia...",0,1,0,...,,4598_left.jpg,0,1,0,0,0,0,0,0


In [4]:
df.iloc[0]

ID                                                                           0
Patient Age                                                                 69
Patient Sex                                                             Female
Left-Fundus                                                         0_left.jpg
Right-Fundus                                                       0_right.jpg
Left-Diagnostic Keywords                                          ['cataract']
Right-Diagnostic Keywords                                    ['normal fundus']
N                                                                            0
D                                                                            0
G                                                                            0
C                                                                            1
A                                                                            0
H                                                   

In [5]:
counts = df[["normal", "diabetic", "glaucoma", "cataract", "macular degeneration", "hypertensive", "myopia", "other"]].apply(pd.Series.value_counts)
counts

Unnamed: 0,normal,diabetic,glaucoma,cataract,macular degeneration,hypertensive,myopia,other
0,3661,4917,6353,6369,6396,6484,6421,5948
1,3015,1759,323,307,280,192,255,728


In [24]:
# Generator for augmentations
augment_datagen = ImageDataGenerator(
    #preprocessing_function=preprocess_input,
    #rescale=1./255,
    rotation_range=20,
    brightness_range=[0.7,1.3],
    horizontal_flip=True,
    #vertical_flip=True,
    zoom_range=[0.95, 1.05],
    fill_mode='nearest'
)

In [25]:
# Folder for augmented images
generated_folder = 'generated_alldataset_images3'
if not os.path.exists(generated_folder):
    os.makedirs(generated_folder)

# Determine the maximum count of images any class has
max_count = 3000

# Start the new IDs after the last ID in the existing DataFrame
last_id = df['ID'].max()

# List to hold dictionaries of all augmented rows
augmented_rows = []

conditions = [ "normal", "diabetic", "glaucoma", "cataract", "macular degeneration", "hypertensive", "myopia", "other"]
# Create equal number of augmented images per condition to match max_count
for condition in conditions:
    #current_count = df[condition].sum()
    current_count = df[df[condition] == 1].shape[0]

    # Skip augmentation if the current count is already sufficient
    if current_count >= max_count:
        continue

    class_rows = df[df[condition] == 1]
    num_originals = len(class_rows)

    # Calculate how many augmentations are needed per original image
    augmentations_needed_per_image = (max_count - num_originals) // num_originals
    additional_augmentations_needed = (max_count - num_originals) % num_originals

    # Iterate over all rows in class_rows
    for index, row in class_rows.iterrows():
        original_filename = row['filename']
        original_filepath = os.path.join("Training Images", original_filename)
        image_data = cv2.imread(original_filepath)  # Load the image

        if image_data is None:
            print(f"Image at path {original_filepath} could not be loaded.")
            continue
        
        # Prepare to augment the image
        num_augmentations_for_this_image = augmentations_needed_per_image
        if additional_augmentations_needed > 0:
            num_augmentations_for_this_image += 1
            additional_augmentations_needed -= 1
        
        # Generate required augmentations
        image = image_data.reshape((1,) + image_data.shape)
        aug_count = 0
        for x in augment_datagen.flow(image, batch_size=1):
            augmented_image = x[0].astype('uint8')
            
            last_id += 1
            
            # Create a new filename and filepath for the augmented image
            augmented_filename = f"{os.path.splitext(original_filename)[0]}_{condition}{aug_count+1}_{last_id}.jpg"
            augmented_filepath = os.path.join(generated_folder, augmented_filename)
            #cv2.imwrite(augmented_filepath, cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR))
            cv2.imwrite(augmented_filepath,augmented_image)

            # Update the row dictionary for the augmented row
            augmented_row = row.copy()
            augmented_row['filename'] = augmented_filename
            augmented_row['filepath'] = f"Training Images/{augmented_filename}"
            augmented_row['ID'] = last_id
            
            # Add the dictionary to the list
            augmented_rows.append(augmented_row)

            aug_count += 1
            if aug_count >= num_augmentations_for_this_image:
                break  # Stop after generating the required number of augmentations

# Convert the list of dictionaries to a DataFrame
augmented_df = pd.DataFrame(augmented_rows)


In [26]:
augmented_df

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Fundus,Right-Fundus,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,...,target,filename,normal,diabetic,glaucoma,cataract,macular degeneration,hypertensive,myopia,other
2,4785,42,Male,2_left.jpg,2_right.jpg,"['laser spot', 'moderate non diabetic retinopa...",['moderate non diabetic retinopathy'],0,1,0,...,"[0, 1, 0, 0, 0, 0, 0, 0]",2_right_diabetic1_4785.jpg,0,1,0,0,0,0,0,0
3,4786,53,Male,4_left.jpg,4_right.jpg,['macular epiretinal membrane'],['mild non diabetic retinopathy'],0,1,0,...,"[0, 1, 0, 0, 0, 0, 0, 0]",4_right_diabetic1_4786.jpg,0,1,0,0,0,0,0,0
4,4787,50,Female,5_left.jpg,5_right.jpg,['moderate non diabetic retinopathy'],['moderate non diabetic retinopathy'],0,1,0,...,"[0, 1, 0, 0, 0, 0, 0, 0]",5_right_diabetic1_4787.jpg,0,1,0,0,0,0,0,0
5,4788,60,Male,6_left.jpg,6_right.jpg,['macular epiretinal membrane'],"['moderate non diabetic retinopathy', 'epireti...",0,1,0,...,"[0, 1, 0, 0, 0, 0, 0, 0]",6_right_diabetic1_4788.jpg,0,1,0,0,0,0,0,0
6,4789,60,Female,7_left.jpg,7_right.jpg,['drusen'],['mild non diabetic retinopathy'],0,1,0,...,"[0, 1, 0, 0, 0, 0, 0, 0]",7_right_diabetic1_4789.jpg,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6666,22454,55,Female,3402_left.jpg,3402_right.jpg,"['lens dust', 'lens dust']","['lens dust', 'normal fundus']",1,0,0,...,,3402_left_other2_22454.jpg,0,0,0,0,0,0,0,1
6666,22455,55,Female,3402_left.jpg,3402_right.jpg,"['lens dust', 'lens dust']","['lens dust', 'normal fundus']",1,0,0,...,,3402_left_other3_22455.jpg,0,0,0,0,0,0,0,1
6668,22456,55,Male,4149_left.jpg,4149_right.jpg,['low image quality'],['low image quality'],1,0,0,...,,4149_left_other1_22456.jpg,0,0,0,0,0,0,0,1
6668,22457,55,Male,4149_left.jpg,4149_right.jpg,['low image quality'],['low image quality'],1,0,0,...,,4149_left_other2_22457.jpg,0,0,0,0,0,0,0,1


In [27]:
augmented_df.iloc[0]

ID                                                                        4785
Patient Age                                                                 42
Patient Sex                                                               Male
Left-Fundus                                                         2_left.jpg
Right-Fundus                                                       2_right.jpg
Left-Diagnostic Keywords     ['laser spot', 'moderate non diabetic retinopa...
Right-Diagnostic Keywords                ['moderate non diabetic retinopathy']
N                                                                            0
D                                                                            1
G                                                                            0
C                                                                            0
A                                                                            0
H                                                   

In [28]:
counts = augmented_df[["normal", "diabetic", "glaucoma", "cataract", "macular degeneration", "hypertensive", "myopia", "other"]].apply(pd.Series.value_counts)
counts

Unnamed: 0,normal,diabetic,glaucoma,cataract,macular degeneration,hypertensive,myopia,other
0,17674.0,14278,14604,14905,14725,14656,14834,15402
1,,3396,3070,2769,2949,3018,2840,2272


In [34]:
augmented_df.to_csv("full_df_diabetic2_augmented2.csv")

In [30]:
# Concatenate the original DataFrame with the augmented DataFrame
Alldf = pd.concat([df, augmented_df], ignore_index=True)

In [31]:
Alldf

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Fundus,Right-Fundus,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,...,target,filename,normal,diabetic,glaucoma,cataract,macular degeneration,hypertensive,myopia,other
0,0,69,Female,0_left.jpg,0_right.jpg,['cataract'],['normal fundus'],0,0,0,...,"[1, 0, 0, 0, 0, 0, 0, 0]",0_right.jpg,1,0,0,0,0,0,0,0
1,1,57,Male,1_left.jpg,1_right.jpg,['normal fundus'],['normal fundus'],1,0,0,...,"[1, 0, 0, 0, 0, 0, 0, 0]",1_right.jpg,1,0,0,0,0,0,0,0
2,2,42,Male,2_left.jpg,2_right.jpg,"['laser spot', 'moderate non diabetic retinopa...",['moderate non diabetic retinopathy'],0,1,0,...,"[0, 1, 0, 0, 0, 0, 0, 0]",2_right.jpg,0,1,0,0,0,0,0,0
3,4,53,Male,4_left.jpg,4_right.jpg,['macular epiretinal membrane'],['mild non diabetic retinopathy'],0,1,0,...,"[0, 1, 0, 0, 0, 0, 0, 0]",4_right.jpg,0,1,0,0,0,0,0,0
4,5,50,Female,5_left.jpg,5_right.jpg,['moderate non diabetic retinopathy'],['moderate non diabetic retinopathy'],0,1,0,...,"[0, 1, 0, 0, 0, 0, 0, 0]",5_right.jpg,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24345,22454,55,Female,3402_left.jpg,3402_right.jpg,"['lens dust', 'lens dust']","['lens dust', 'normal fundus']",1,0,0,...,,3402_left_other2_22454.jpg,0,0,0,0,0,0,0,1
24346,22455,55,Female,3402_left.jpg,3402_right.jpg,"['lens dust', 'lens dust']","['lens dust', 'normal fundus']",1,0,0,...,,3402_left_other3_22455.jpg,0,0,0,0,0,0,0,1
24347,22456,55,Male,4149_left.jpg,4149_right.jpg,['low image quality'],['low image quality'],1,0,0,...,,4149_left_other1_22456.jpg,0,0,0,0,0,0,0,1
24348,22457,55,Male,4149_left.jpg,4149_right.jpg,['low image quality'],['low image quality'],1,0,0,...,,4149_left_other2_22457.jpg,0,0,0,0,0,0,0,1


In [32]:
counts = Alldf[["normal", "diabetic", "glaucoma", "cataract", "macular degeneration", "hypertensive", "myopia", "other"]].apply(pd.Series.value_counts)
counts

Unnamed: 0,normal,diabetic,glaucoma,cataract,macular degeneration,hypertensive,myopia,other
0,21335,19195,20957,21274,21121,21140,21255,21350
1,3015,5155,3393,3076,3229,3210,3095,3000


In [33]:
Alldf.to_csv("full_df_diabetic2_aug.csv")