In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Recall, Precision
from tensorflow.keras.preprocessing import image
import os


# DR dataset

# CONCATENATION

In [2]:
# # Step 1: Read the CSV files
# df_base_classifier = pd.read_csv(r"/content/drive/My Drive/data/classifier_one_data/full_df.csv")
# df_cataract_classifier = pd.read_csv(r"/content/drive/My Drive/data/cataract_data/cataract_dataset.csv")
df_base_cat = pd.read_csv(r"base_cat_df_combined.csv")

In [3]:
df_dr_classifier = pd.read_csv(r"DR_dataset_archive/trainLabels_cropped.csv")

In [4]:
# Step 2: Ensure consistent column names
# Assuming the column that contains image paths and labels are 'image_path' and 'label'
df_dr_classifier.rename(columns={'image': 'image_path', 'level': 'label'}, inplace=True)

In [5]:
df_dr_classifier.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,image_path,label
0,0,0,10_left,0
1,1,1,10_right,0
2,2,2,13_left,0
3,3,3,13_right,0
4,4,4,15_left,1


In [6]:
df_dr_classifier.drop(columns=['Unnamed: 0.1','Unnamed: 0'], inplace=True)

In [7]:
df_dr_classifier.head()

Unnamed: 0,image_path,label
0,10_left,0
1,10_right,0
2,13_left,0
3,13_right,0
4,15_left,1


In [8]:
# Step 3: Add a 'source' column to each dataframe to indicate its origin
df_dr_classifier['source'] = 'dr'

In [9]:
df_dr_classifier.head()

Unnamed: 0,image_path,label,source
0,10_left,0,dr
1,10_right,0,dr
2,13_left,0,dr
3,13_right,0,dr
4,15_left,1,dr


In [10]:
print(df_dr_classifier.shape)

(35108, 3)


In [11]:
print(df_dr_classifier['image_path'])

0            10_left
1           10_right
2            13_left
3           13_right
4            15_left
            ...     
35103    44347_right
35104     44348_left
35105    44348_right
35106     44349_left
35107    44349_right
Name: image_path, Length: 35108, dtype: object


In [12]:
# Use .loc to update the 'image_path' column for each dataset
df_dr_classifier.loc[:, 'image_path'] = df_dr_classifier['image_path'].apply(lambda x: 'DR_dataset_archive/resized_train_cropped/resized_train_cropped/' + x)


In [13]:
df_dr_classifier.head()

Unnamed: 0,image_path,label,source
0,DR_dataset_archive/resized_train_cropped/resiz...,0,dr
1,DR_dataset_archive/resized_train_cropped/resiz...,0,dr
2,DR_dataset_archive/resized_train_cropped/resiz...,0,dr
3,DR_dataset_archive/resized_train_cropped/resiz...,0,dr
4,DR_dataset_archive/resized_train_cropped/resiz...,1,dr


In [14]:
df_dr_classifier.head()['image_path'][0]

'DR_dataset_archive/resized_train_cropped/resized_train_cropped/10_left'

In [15]:
# Step 4: Concatenate the two DataFrames
df_combined = pd.concat([df_base_cat,df_dr_classifier], ignore_index=True)

In [16]:
print(df_base_cat.tail())

                               image_path   label    source
8499  cataract_data/normal/2500_right.jpg  Normal  cataract
8500   cataract_data/normal/3353_left.jpg  Normal  cataract
8501   cataract_data/normal/3352_left.jpg  Normal  cataract
8502   cataract_data/normal/2554_left.jpg  Normal  cataract
8503   cataract_data/normal/2732_left.jpg  Normal  cataract


In [17]:
print(df_dr_classifier.tail())

                                              image_path  label source
35103  DR_dataset_archive/resized_train_cropped/resiz...      0     dr
35104  DR_dataset_archive/resized_train_cropped/resiz...      0     dr
35105  DR_dataset_archive/resized_train_cropped/resiz...      0     dr
35106  DR_dataset_archive/resized_train_cropped/resiz...      0     dr
35107  DR_dataset_archive/resized_train_cropped/resiz...      1     dr


In [18]:
print(df_combined.head())

                                          image_path                 label  \
0  classifier_one_data/../input/ocular-disease-re...                Normal   
1  classifier_one_data/../input/ocular-disease-re...                Normal   
2  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
3  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
4  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   

           source  
0  classifier_one  
1  classifier_one  
2  classifier_one  
3  classifier_one  
4  classifier_one  


In [19]:
print(df_combined.tail())

                                              image_path label source
43607  DR_dataset_archive/resized_train_cropped/resiz...     0     dr
43608  DR_dataset_archive/resized_train_cropped/resiz...     0     dr
43609  DR_dataset_archive/resized_train_cropped/resiz...     0     dr
43610  DR_dataset_archive/resized_train_cropped/resiz...     0     dr
43611  DR_dataset_archive/resized_train_cropped/resiz...     1     dr


In [20]:
print(6392+2112)

8504


In [21]:
print(8504+35108) # combined + dr

43612


In [22]:
print(df_combined.shape)

(43612, 3)


In [23]:
unique_labels = df_combined['label'].unique()

print("Unique label values:", unique_labels)


Unique label values: ['Normal' 'Diabetic Retinopathy' 'Other' 'Pathological Myopia'
 'Hypertension' 'Cataract' 'Age related Macular Degeneration' 'Glaucoma' 0
 1 2 4 3]


In [24]:
df_combined['label'] = df_combined['label'].replace(0, 'Normal')
df_combined['label'] = df_combined['label'].replace(1, 'Mild DR')
df_combined['label'] = df_combined['label'].replace(2, 'Moderate DR')
df_combined['label'] = df_combined['label'].replace(3, 'Severe DR')
df_combined['label'] = df_combined['label'].replace(4, 'Proliferative DR')

In [25]:
unique_labels = df_combined['label'].unique()

print("Unique label values:", unique_labels)

Unique label values: ['Normal' 'Diabetic Retinopathy' 'Other' 'Pathological Myopia'
 'Hypertension' 'Cataract' 'Age related Macular Degeneration' 'Glaucoma'
 'Mild DR' 'Moderate DR' 'Proliferative DR' 'Severe DR']


In [None]:
# # Step 5: Add the folder path to the 'image_path' column for each dataset
# df_base_classifier_final['image_path'] = df_base_classifier_final['image_path'].apply(lambda x: '/content/drive/My Drive/data/classifier_one_data/' + x)
# df_cataract_classifier['image_path'] = df_cataract_classifier['image_path'].apply(lambda x: '/content/drive/My Drive/data/cataract_data/' + x)


In [28]:
print(df_base_cat.head())

                                          image_path                 label  \
0  classifier_one_data/../input/ocular-disease-re...                Normal   
1  classifier_one_data/../input/ocular-disease-re...                Normal   
2  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
3  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
4  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   

           source  
0  classifier_one  
1  classifier_one  
2  classifier_one  
3  classifier_one  
4  classifier_one  


In [29]:
# Check the concatenated DataFrame
print(df_combined.head())


                                          image_path                 label  \
0  classifier_one_data/../input/ocular-disease-re...                Normal   
1  classifier_one_data/../input/ocular-disease-re...                Normal   
2  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
3  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   
4  classifier_one_data/../input/ocular-disease-re...  Diabetic Retinopathy   

           source  
0  classifier_one  
1  classifier_one  
2  classifier_one  
3  classifier_one  
4  classifier_one  


In [30]:
print(df_combined.tail())

                                              image_path    label source
43607  DR_dataset_archive/resized_train_cropped/resiz...   Normal     dr
43608  DR_dataset_archive/resized_train_cropped/resiz...   Normal     dr
43609  DR_dataset_archive/resized_train_cropped/resiz...   Normal     dr
43610  DR_dataset_archive/resized_train_cropped/resiz...   Normal     dr
43611  DR_dataset_archive/resized_train_cropped/resiz...  Mild DR     dr


In [31]:
# Step 6: Save the combined DataFrame to a new CSV file
# /content/drive/MyDrive/data
df_combined.to_csv(r"base_cat_dr_df_combined.csv", index=False)
print("Combined CSV file saved.")


Combined CSV file saved.


In [32]:
csv_path = 'concat2.csv'
df_combined.to_csv(csv_path, index=False)
