In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras.preprocessing import image
import os
import pandas as pd


# Glaucoma dataset

# CONCATENATION

In [3]:
# # Step 1: Read the CSV files
# df_base_classifier = pd.read_csv(r"/content/drive/My Drive/data/classifier_one_data/full_df.csv")
# df_cataract_classifier = pd.read_csv(r"/content/drive/My Drive/data/cataract_data/cataract_dataset.csv")
df_base_cat_dr = pd.read_csv(r"base_cat_dr_df_combined.csv")

In [5]:
df_base_cat_dr['image_path'][0]

'classifier_one_data/../input/ocular-disease-recognition-odir5k/ODIR-5K/Training Images/0_right.jpg'

In [4]:
df_base_cat_dr['image_path'][36000]

'DR_dataset_archive/resized_train_cropped/resized_train_cropped/34830_left'

In [6]:
df_g_classifier = pd.read_csv(r"glaucoma_data/archive/glaucoma.csv")

In [7]:
# Step 2: Ensure consistent column names
# Assuming the column that contains image paths and labels are 'image_path' and 'label'
df_g_classifier.rename(columns={'Filename': 'image_path', 'Glaucoma': 'label'}, inplace=True)

In [8]:
df_g_classifier.head()

Unnamed: 0,image_path,ExpCDR,Eye,Set,label
0,001.jpg,0.7097,OD,A,0
1,002.jpg,0.6953,OS,A,0
2,003.jpg,0.9629,OS,A,0
3,004.jpg,0.7246,OD,A,0
4,005.jpg,0.6138,OS,A,0


In [9]:
df_g_classifier.drop(columns=['ExpCDR','Eye','Set'], inplace=True)

In [10]:
df_g_classifier.head()

Unnamed: 0,image_path,label
0,001.jpg,0
1,002.jpg,0
2,003.jpg,0
3,004.jpg,0
4,005.jpg,0


In [11]:
# Step 3: Add a 'source' column to each dataframe to indicate its origin
df_g_classifier['source'] = 'glaucoma'

In [12]:
df_g_classifier_final=df_g_classifier[['image_path','label','source']]

In [13]:
df_g_classifier_final.head()

Unnamed: 0,image_path,label,source
0,001.jpg,0,glaucoma
1,002.jpg,0,glaucoma
2,003.jpg,0,glaucoma
3,004.jpg,0,glaucoma
4,005.jpg,0,glaucoma


In [14]:
print(df_g_classifier_final.shape)

(650, 3)


In [15]:
df_g_classifier_final['image_path']

0      001.jpg
1      002.jpg
2      003.jpg
3      004.jpg
4      005.jpg
        ...   
645    646.jpg
646    647.jpg
647    648.jpg
648    649.jpg
649    650.jpg
Name: image_path, Length: 650, dtype: object

In [16]:
# Use .loc to update the 'image_path' column for each dataset
df_g_classifier_final.loc[:, 'image_path'] = df_g_classifier_final['image_path'].apply(lambda x: 'glaucoma_data/archive/ORIGA/ORIGA/Images/' + x)


In [17]:
df_g_classifier_final.head()

Unnamed: 0,image_path,label,source
0,glaucoma_data/archive/ORIGA/ORIGA/Images/001.jpg,0,glaucoma
1,glaucoma_data/archive/ORIGA/ORIGA/Images/002.jpg,0,glaucoma
2,glaucoma_data/archive/ORIGA/ORIGA/Images/003.jpg,0,glaucoma
3,glaucoma_data/archive/ORIGA/ORIGA/Images/004.jpg,0,glaucoma
4,glaucoma_data/archive/ORIGA/ORIGA/Images/005.jpg,0,glaucoma


In [18]:
df_g_classifier_final['image_path'][0]

'glaucoma_data/archive/ORIGA/ORIGA/Images/001.jpg'

In [19]:
# Step 4: Concatenate the two DataFrames
df_combined = pd.concat([df_base_cat_dr,df_g_classifier_final], ignore_index=True)

In [20]:
print(df_base_cat_dr.tail())

                                              image_path    label source
43607  DR_dataset_archive/resized_train_cropped/resiz...   Normal     dr
43608  DR_dataset_archive/resized_train_cropped/resiz...   Normal     dr
43609  DR_dataset_archive/resized_train_cropped/resiz...   Normal     dr
43610  DR_dataset_archive/resized_train_cropped/resiz...   Normal     dr
43611  DR_dataset_archive/resized_train_cropped/resiz...  Mild DR     dr


In [21]:
print(df_g_classifier_final.tail())

                                           image_path  label    source
645  glaucoma_data/archive/ORIGA/ORIGA/Images/646.jpg      1  glaucoma
646  glaucoma_data/archive/ORIGA/ORIGA/Images/647.jpg      1  glaucoma
647  glaucoma_data/archive/ORIGA/ORIGA/Images/648.jpg      1  glaucoma
648  glaucoma_data/archive/ORIGA/ORIGA/Images/649.jpg      0  glaucoma
649  glaucoma_data/archive/ORIGA/ORIGA/Images/650.jpg      1  glaucoma


In [22]:
print(df_combined.head())

                                          image_path                 label  \
0  classifier_one_data/../input/ocular-disease-re...                Normal   
1  classifier_one_data/../input/ocular-disease-re...                Normal   
2  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
3  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
4  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   

           source  
0  classifier_one  
1  classifier_one  
2  classifier_one  
3  classifier_one  
4  classifier_one  


In [23]:
print(df_combined.tail())

                                             image_path label    source
44257  glaucoma_data/archive/ORIGA/ORIGA/Images/646.jpg     1  glaucoma
44258  glaucoma_data/archive/ORIGA/ORIGA/Images/647.jpg     1  glaucoma
44259  glaucoma_data/archive/ORIGA/ORIGA/Images/648.jpg     1  glaucoma
44260  glaucoma_data/archive/ORIGA/ORIGA/Images/649.jpg     0  glaucoma
44261  glaucoma_data/archive/ORIGA/ORIGA/Images/650.jpg     1  glaucoma


In [24]:
print(6392+2112)

8504


In [25]:
print(8504+35108) # combined + dr

43612


In [26]:
print(43612+650) # (combined + dr) + glaucoma

44262


In [27]:
print(df_combined.shape)

(44262, 3)


In [28]:
unique_labels = df_combined['label'].unique()

print("Unique label values:", unique_labels)


Unique label values: ['Normal' 'Diabetic Retinopathy' 'Other' 'Pathological Myopia'
 'Hypertension' 'Cataract' 'Age related Macular Degeneration' 'Glaucoma'
 'Mild DR' 'Moderate DR' 'Proliferative DR' 'Severe DR' 0 1]


In [29]:
df_combined['label'] = df_combined['label'].replace(0, 'Normal')
df_combined['label'] = df_combined['label'].replace(1, 'Glaucoma')

In [30]:
unique_labels = df_combined['label'].unique()

print("Unique label values:", unique_labels)


Unique label values: ['Normal' 'Diabetic Retinopathy' 'Other' 'Pathological Myopia'
 'Hypertension' 'Cataract' 'Age related Macular Degeneration' 'Glaucoma'
 'Mild DR' 'Moderate DR' 'Proliferative DR' 'Severe DR']


In [None]:
# # Step 5: Add the folder path to the 'image_path' column for each dataset
# df_base_classifier_final['image_path'] = df_base_classifier_final['image_path'].apply(lambda x: '/content/drive/My Drive/data/classifier_one_data/' + x)
# df_cataract_classifier['image_path'] = df_cataract_classifier['image_path'].apply(lambda x: '/content/drive/My Drive/data/cataract_data/' + x)


In [31]:
print(df_g_classifier_final.head())

                                         image_path  label    source
0  glaucoma_data/archive/ORIGA/ORIGA/Images/001.jpg      0  glaucoma
1  glaucoma_data/archive/ORIGA/ORIGA/Images/002.jpg      0  glaucoma
2  glaucoma_data/archive/ORIGA/ORIGA/Images/003.jpg      0  glaucoma
3  glaucoma_data/archive/ORIGA/ORIGA/Images/004.jpg      0  glaucoma
4  glaucoma_data/archive/ORIGA/ORIGA/Images/005.jpg      0  glaucoma


In [32]:
print(df_base_cat_dr.head())

                                          image_path                 label  \
0  classifier_one_data/../input/ocular-disease-re...                Normal   
1  classifier_one_data/../input/ocular-disease-re...                Normal   
2  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
3  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
4  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   

           source  
0  classifier_one  
1  classifier_one  
2  classifier_one  
3  classifier_one  
4  classifier_one  


In [33]:
# Check the concatenated DataFrame
print(df_combined.head())


                                          image_path                 label  \
0  classifier_one_data/../input/ocular-disease-re...                Normal   
1  classifier_one_data/../input/ocular-disease-re...                Normal   
2  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
3  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
4  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   

           source  
0  classifier_one  
1  classifier_one  
2  classifier_one  
3  classifier_one  
4  classifier_one  


In [34]:
print(df_combined.tail())

                                             image_path     label    source
44257  glaucoma_data/archive/ORIGA/ORIGA/Images/646.jpg  Glaucoma  glaucoma
44258  glaucoma_data/archive/ORIGA/ORIGA/Images/647.jpg  Glaucoma  glaucoma
44259  glaucoma_data/archive/ORIGA/ORIGA/Images/648.jpg  Glaucoma  glaucoma
44260  glaucoma_data/archive/ORIGA/ORIGA/Images/649.jpg    Normal  glaucoma
44261  glaucoma_data/archive/ORIGA/ORIGA/Images/650.jpg  Glaucoma  glaucoma


In [36]:
# Step 6: Save the combined DataFrame to a new CSV file
# /content/drive/MyDrive/data
df_combined.to_csv(r"final_df_combined.csv", index=False)
print("Combined CSV file saved.")

Combined CSV file saved.


In [38]:
csv_path = 'concat3.csv'
df_combined.to_csv(csv_path, index=False)

In [39]:
df1 = pd.read_csv(r"final_df_combined.csv")

In [40]:
df1['label'].value_counts()

label
Normal                              30231
Moderate DR                          5288
Mild DR                              2438
Diabetic Retinopathy                 1608
Cataract                             1331
Severe DR                             872
Other                                 708
Proliferative DR                      708
Glaucoma                              452
Age related Macular Degeneration      266
Pathological Myopia                   232
Hypertension                          128
Name: count, dtype: int64

In [41]:
df1.count()

image_path    44262
label         44262
source        44262
dtype: int64

In [42]:
df1['image_path'][0]

'classifier_one_data/../input/ocular-disease-recognition-odir5k/ODIR-5K/Training Images/0_right.jpg'

In [43]:
df1['image_path'][44261]

'glaucoma_data/archive/ORIGA/ORIGA/Images/650.jpg'

In [44]:
df1['image_path'][36000]

'DR_dataset_archive/resized_train_cropped/resized_train_cropped/34830_left'