In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras.preprocessing import image
import os
import pandas as pd

#Cataract dataset

In [5]:
# Define the folder paths for cataract and normal images
folder_path = 'cataract_data'  # Update this to your actual path
cataract_dir = os.path.join(folder_path, 'cataract')
normal_dir = os.path.join(folder_path, 'normal')

In [6]:
# Create lists to store image paths and labels
image_paths = []
labels = []

In [7]:
# Loop through the cataract directory
for image_name in os.listdir(cataract_dir):
    image_paths.append(os.path.join('cataract', image_name))
    labels.append('Cataract')

In [8]:
# Loop through the normal directory
for image_name in os.listdir(normal_dir):
    image_paths.append(os.path.join('normal', image_name))
    labels.append('Normal')

In [9]:
# Create a pandas DataFrame
df_cataract = pd.DataFrame({
    'image_path': image_paths,
    'label': labels
})

In [10]:
# Save the DataFrame to a CSV file
csv_path = 'cataract_data/cataract_dataset.csv'  # Update this path
df_cataract.to_csv(csv_path, index=False)

In [11]:
print(f"CSV file has been generated and saved to {csv_path}")

CSV file has been generated and saved to cataract_data/cataract_dataset.csv


In [14]:
csv_cataract=pd.read_csv('cataract_data/cataract_dataset.csv')

In [15]:
print(csv_cataract.columns)

Index(['image_path', 'label'], dtype='object')


In [16]:
print(csv_cataract.head())

                  image_path     label
0     cataract/2124_left.jpg  Cataract
1   cataract/_393_746807.jpg  Cataract
2     cataract/2125_left.jpg  Cataract
3  cataract/_182_9646286.jpg  Cataract
4   cataract/_245_498355.jpg  Cataract


In [17]:
print('csv_cataract size')
print(csv_cataract.size)
print('csv_cataract shape')
print(csv_cataract.shape)

csv_cataract size
4224
csv_cataract shape
(2112, 2)


**cataract data columns**

['image_path', 'label']

#*Base classifier*


In [18]:
df_base_classsifier=pd.read_csv(r"classifier_one_data/full_df.csv")


In [19]:
print(df_base_classsifier.columns)

Index(['ID', 'Patient Age', 'Patient Sex', 'Left-Fundus', 'Right-Fundus',
       'Left-Diagnostic Keywords', 'Right-Diagnostic Keywords', 'N', 'D', 'G',
       'C', 'A', 'H', 'M', 'O', 'filepath', 'labels', 'target', 'filename'],
      dtype='object')


In [20]:
print(df_base_classsifier.filepath[0])

../input/ocular-disease-recognition-odir5k/ODIR-5K/Training Images/0_right.jpg


In [21]:
print(df_base_classsifier.head())

   ID  Patient Age Patient Sex Left-Fundus Right-Fundus  \
0   0           69      Female  0_left.jpg  0_right.jpg   
1   1           57        Male  1_left.jpg  1_right.jpg   
2   2           42        Male  2_left.jpg  2_right.jpg   
3   4           53        Male  4_left.jpg  4_right.jpg   
4   5           50      Female  5_left.jpg  5_right.jpg   

                            Left-Diagnostic Keywords  \
0                                           cataract   
1                                      normal fundus   
2  laser spotï¼Œmoderate non proliferative retinopathy   
3                        macular epiretinal membrane   
4             moderate non proliferative retinopathy   

                Right-Diagnostic Keywords  N  D  G  C  A  H  M  O  \
0                           normal fundus  0  0  0  1  0  0  0  0   
1                           normal fundus  1  0  0  0  0  0  0  0   
2  moderate non proliferative retinopathy  0  1  0  0  0  0  0  1   
3       mild nonproliferative 

In [None]:
# print(df_base_classifier.head())

**BASE CLASSIFIER**

['ID', 'Patient Age', 'Patient Sex', 'Left-Fundus', 'Right-Fundus',
       'Left-Diagnostic Keywords', 'Right-Diagnostic Keywords', 'N', 'D', 'G',
       'C', 'A', 'H', 'M', 'O', 'filepath', 'labels', 'target', 'filename']

# CONCATENATION

In [22]:
# Step 1: Read the CSV files
df_base_classifier = pd.read_csv(r"classifier_one_data/full_df.csv")
df_cataract_classifier = pd.read_csv(r"cataract_data/cataract_dataset.csv")

In [23]:
# Step 2: Ensure consistent column names
# Assuming the column that contains image paths and labels are 'image_path' and 'label'
df_base_classifier.rename(columns={'filepath': 'image_path', 'labels': 'label'}, inplace=True)
df_cataract_classifier.rename(columns={'image_path': 'image_path', 'labels': 'label'}, inplace=True)


In [24]:
# Step 3: Add a 'source' column to each dataframe to indicate its origin
df_base_classifier['source'] = 'classifier_one'
df_cataract_classifier['source'] = 'cataract'


In [25]:
df_base_classifier_final=df_base_classifier[['image_path','label','source']]

In [26]:
print(df_base_classifier_final.shape)
print(df_cataract_classifier.shape)

(6392, 3)
(2112, 3)


In [27]:
# Use .loc to update the 'image_path' column for each dataset
df_base_classifier_final.loc[:, 'image_path'] = df_base_classifier_final['image_path'].apply(lambda x: 'classifier_one_data/' + x)
df_cataract_classifier.loc[:, 'image_path'] = df_cataract_classifier['image_path'].apply(lambda x: 'cataract_data/' + x)

In [29]:
df_base_classifier_final['image_path'][0]

'classifier_one_data/../input/ocular-disease-recognition-odir5k/ODIR-5K/Training Images/0_right.jpg'

In [30]:
df_cataract_classifier['image_path'][0]

'cataract_data/cataract/2124_left.jpg'

In [32]:
# Step 4: Concatenate the two DataFrames
df_combined = pd.concat([df_base_classifier_final, df_cataract_classifier], ignore_index=True)

In [35]:
print(df_cataract_classifier.tail())
print(df_base_classifier_final.tail())

                               image_path   label    source
2107  cataract_data/normal/2500_right.jpg  Normal  cataract
2108   cataract_data/normal/3353_left.jpg  Normal  cataract
2109   cataract_data/normal/3352_left.jpg  Normal  cataract
2110   cataract_data/normal/2554_left.jpg  Normal  cataract
2111   cataract_data/normal/2732_left.jpg  Normal  cataract
                                             image_path  label          source
6387  classifier_one_data/../input/ocular-disease-re...  ['D']  classifier_one
6388  classifier_one_data/../input/ocular-disease-re...  ['D']  classifier_one
6389  classifier_one_data/../input/ocular-disease-re...  ['D']  classifier_one
6390  classifier_one_data/../input/ocular-disease-re...  ['D']  classifier_one
6391  classifier_one_data/../input/ocular-disease-re...  ['H']  classifier_one


In [34]:
print(df_combined.head())

                                          image_path  label          source
0  classifier_one_data/../input/ocular-disease-re...  ['N']  classifier_one
1  classifier_one_data/../input/ocular-disease-re...  ['N']  classifier_one
2  classifier_one_data/../input/ocular-disease-re...  ['D']  classifier_one
3  classifier_one_data/../input/ocular-disease-re...  ['D']  classifier_one
4  classifier_one_data/../input/ocular-disease-re...  ['D']  classifier_one


In [36]:
print(6392+2112)

8504


In [37]:
print(df_combined.shape)

(8504, 3)


In [38]:
unique_labels = df_combined['label'].unique()

print("Unique label values:", unique_labels)


Unique label values: ["['N']" "['D']" "['O']" "['M']" "['H']" "['C']" "['A']" "['G']"
 'Cataract' 'Normal']


In [39]:
df_combined['label'] = df_combined['label'].replace("['C']", 'Cataract')
df_combined['label'] = df_combined['label'].replace("['N']", 'Normal')
df_combined['label'] = df_combined['label'].replace("['D']", 'Diabetic Retinopathy')
df_combined['label'] = df_combined['label'].replace("['O']", 'Other')
df_combined['label'] = df_combined['label'].replace("['M']", 'Pathological Myopia')
df_combined['label'] = df_combined['label'].replace("['H']", 'Hypertension')
df_combined['label'] = df_combined['label'].replace("['G']", 'Glaucoma')
df_combined['label'] = df_combined['label'].replace("['A']", 'Age related Macular Degeneration')



In [40]:
unique_labels = df_combined['label'].unique()

print("Unique label values:", unique_labels)


Unique label values: ['Normal' 'Diabetic Retinopathy' 'Other' 'Pathological Myopia'
 'Hypertension' 'Cataract' 'Age related Macular Degeneration' 'Glaucoma']


In [None]:
# # Step 5: Add the folder path to the 'image_path' column for each dataset
# df_base_classifier_final['image_path'] = df_base_classifier_final['image_path'].apply(lambda x: '/content/drive/My Drive/data/classifier_one_data/' + x)
# df_cataract_classifier['image_path'] = df_cataract_classifier['image_path'].apply(lambda x: '/content/drive/My Drive/data/cataract_data/' + x)


In [41]:
print(df_cataract_classifier.head())

                                image_path     label    source
0     cataract_data/cataract/2124_left.jpg  Cataract  cataract
1   cataract_data/cataract/_393_746807.jpg  Cataract  cataract
2     cataract_data/cataract/2125_left.jpg  Cataract  cataract
3  cataract_data/cataract/_182_9646286.jpg  Cataract  cataract
4   cataract_data/cataract/_245_498355.jpg  Cataract  cataract


In [42]:
print(df_base_classifier_final.head())

                                          image_path  label          source
0  classifier_one_data/../input/ocular-disease-re...  ['N']  classifier_one
1  classifier_one_data/../input/ocular-disease-re...  ['N']  classifier_one
2  classifier_one_data/../input/ocular-disease-re...  ['D']  classifier_one
3  classifier_one_data/../input/ocular-disease-re...  ['D']  classifier_one
4  classifier_one_data/../input/ocular-disease-re...  ['D']  classifier_one


In [43]:
# Check the concatenated DataFrame
print(df_combined.head())


                                          image_path                 label  \
0  classifier_one_data/../input/ocular-disease-re...                Normal   
1  classifier_one_data/../input/ocular-disease-re...                Normal   
2  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
3  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
4  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   

           source  
0  classifier_one  
1  classifier_one  
2  classifier_one  
3  classifier_one  
4  classifier_one  


In [44]:
print(df_combined.tail())

                               image_path   label    source
8499  cataract_data/normal/2500_right.jpg  Normal  cataract
8500   cataract_data/normal/3353_left.jpg  Normal  cataract
8501   cataract_data/normal/3352_left.jpg  Normal  cataract
8502   cataract_data/normal/2554_left.jpg  Normal  cataract
8503   cataract_data/normal/2732_left.jpg  Normal  cataract


In [46]:
# Step 6: Save the combined DataFrame to a new CSV file
df_combined.to_csv(r"base_cat_df_combined.csv", index=False)
print("Combined CSV file saved.")


Combined CSV file saved.


In [48]:
csv_path = 'concat1.csv'
df_combined.to_csv(csv_path, index=False)
