## Import libraries

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import shutil
import glob

In [4]:
def match_string_with_patterns(patterns, target_string):
    """
    Checks if the target string matches any pattern in the given list.

    Args:
        patterns (list): List of string patterns.
        target_string (str): The string to check against the patterns.

    Returns:
        bool: True if the target string matches any pattern, False otherwise.
    """
    for pattern in patterns:
        if pattern in target_string:
            return True
    return False

# Normal

In [9]:
IRFundus_records = "/home/ahmad/ahmad_experiments/retinal_data/IRFundus/irfundusset_master_catalogue.csv"

data_path_dict = {
    "stare": '/home/ahmad/ahmad_experiments/retinal_data/Stare',
    "idrid" : '/home/ahmad/ahmad_experiments/retinal_data/idrid',
    "kaggle_dr_train" : "/home/ahmad/ahmad_experiments/retinal_data/Diabetic_Retinopathy/diabetic-retinopathy-detection/train",
    "kaggle_dr_test" : "/home/ahmad/ahmad_experiments/retinal_data/Diabetic_Retinopathy/diabetic-retinopathy-detection/test",
    "hrf_path" : "/home/ahmad/ahmad_experiments/retinal_data/HRF/images",
    "odir" : "/home/ahmad/ahmad_experiments/retinal_data/ODIRD/archive/ODIR-5K/ODIR-5K/",
    "kaggle_1000" : "/home/ahmad/ahmad_experiments/retinal_data/kaggle_1000",
    "papila" : "/home/ahmad/ahmad_experiments/retinal_data/Papila/FundusImages",
    "chase" : "/home/ahmad/ahmad_experiments/retinal_data/chase",
    "five_path_train" : "/home/ahmad/ahmad_experiments/retinal_data/FIVES/train",
    "five_path_test" : "/home/ahmad/ahmad_experiments/retinal_data/FIVES/test",
    "cataracts_path" : "/home/ahmad/ahmad_experiments/retinal_data/Cataract/dataset"

}
spie_normal_path = '/home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Normal_unprocessed'

In [10]:
df = pd.read_csv(IRFundus_records)

In [11]:
df.head(5)

Unnamed: 0,image_id,image_name,cohort,is_normal,src_is_normal,src_condition,is_left_or_right_eye,is_onh_or_macula_centered,image_fpath,src_train_test_split
0,deee356ae2c41b92534189365e3c905e3aa78345e177b9...,im0001,STARE,Not normal,Not Normal,BDR/NPDR,Left Eye,MAC,im0001.ppm,
1,5ef74a8950819adac1307a4ce5dd6e4836f99445fd5705...,im0002,STARE,Not normal,Not Normal,"ASR,CNV",Left Eye,MAC,im0002.ppm,
2,c901d29e1b92d6cbaf693ce951877cccbc9075b7c1b412...,im0003,STARE,Not normal,Not Normal,UNDEF,Left Eye,MAC,im0003.ppm,
3,cbf2ed574244168dc417ab00b2144347946dfeaea55cf5...,im0004,STARE,Not normal,Not Normal,"CRAO,UNDEF",Right Eye,MAC,im0004.ppm,
4,f268e641544e94a3e5fde96dfecf7e9f055eb673469139...,im0005,STARE,Not normal,Not Normal,"CRAO,CRVO",Right Eye,MAC,im0005.ppm,


In [12]:
normal_df = df.loc[(df['is_normal'] == "Normal") & (df['is_onh_or_macula_centered'] != 'ONH')]

In [13]:
normal_df

Unnamed: 0,image_id,image_name,cohort,is_normal,src_is_normal,src_condition,is_left_or_right_eye,is_onh_or_macula_centered,image_fpath,src_train_test_split
31,400583b1ac5d9449a6b099a54d2530a9e27bc4264609da...,im0032,STARE,Normal,Normal,NORM,Left Eye,MAC,im0032.ppm,
74,ac059e6893f58e4e56fb4d327917b2d03df62f2c83a44b...,im0076,STARE,Normal,Normal,NORM,Right Eye,MAC,im0076.ppm,
79,677a106d5c447f55280a77073c6abced9b58625d2b77f1...,im0081,STARE,Normal,Normal,NORM,Left Eye,MAC,im0081.ppm,
80,255e4c9be0ed9b02ed34fa496fcec1601767ed4748a9d3...,im0082,STARE,Normal,Normal,NORM,Right Eye,MAC,im0082.ppm,
115,7eae9aac806fcb9d96fcc1c9773ed877b02ca910e1ea7e...,im0119,STARE,Normal,Normal,NORM,Left Eye,MAC,im0119.ppm,
...,...,...,...,...,...,...,...,...,...,...
45752,4386d7e172510a57e0bf5225196f627c2ccb5c04ca5f3b...,NL_290,Retina Cataracts,Normal,Normal,normal,Left Eye,MAC,1_normal/NL_290.png,
45753,1eea595f2f87a1be12b8fe94c0e8463ae731b27733ec84...,NL_291,Retina Cataracts,Normal,Normal,normal,Right Eye,MAC,1_normal/NL_291.png,
45754,484da8c89ab15ccb602f4444d05e43c1d4dbebbbf49a35...,NL_292,Retina Cataracts,Normal,Normal,normal,Left Eye,MAC,1_normal/NL_292.png,
45755,a7c04557d2ad96bc5274ff3d719d38eddc712b3d810b8d...,NL_293,Retina Cataracts,Normal,Normal,normal,Right Eye,MAC,1_normal/NL_293.png,


In [14]:
for index, rows in normal_df.iterrows():
    if rows['cohort'] == 'STARE':
        image_path = os.path.join(data_path_dict['stare'], rows['image_fpath'])
        shutil.copy2(image_path, spie_normal_path)
    elif rows['cohort'] == 'IDRiD':
        image_path = os.path.join(data_path_dict['idrid'], rows['image_fpath'])
        shutil.copy2(image_path, spie_normal_path)
    elif rows['cohort'] == 'Kaggle1000':
        image_path = os.path.join(data_path_dict['kaggle_1000'], rows['image_fpath'])
        shutil.copy2(image_path, spie_normal_path)
    elif rows['cohort'] == 'PAPILA':
        image_path = os.path.join(data_path_dict['papila'], rows['image_fpath'])
        shutil.copy2(image_path, spie_normal_path)
    elif rows['cohort'] == 'HRF':
        image_path = os.path.join(data_path_dict['hrf_path'], rows['image_fpath'])
        shutil.copy2(image_path, spie_normal_path)
    elif rows['cohort'] == 'FIVE':
        if rows['src_train_test_split'] == 'train':
            image_path = os.path.join(data_path_dict['five_path_train'], rows['image_fpath'])
        else:
            image_path = os.path.join(data_path_dict['five_path_test'], rows['image_fpath'])
        shutil.copy2(image_path, spie_normal_path)
    elif rows['cohort'] == 'EyePACS':
        image_path = os.path.join(data_path_dict['kaggle_dr_train'], rows['image_fpath'])
        if os.path.exists(image_path):
            shutil.copy2(image_path, spie_normal_path)
        else: 
            image_path = os.path.join(data_path_dict['kaggle_dr_test'], rows['image_fpath'])
            shutil.copy2(image_path, spie_normal_path)
    elif rows['cohort'] == 'ODIR':
        image_path = os.path.join(data_path_dict['odir'], rows['image_fpath'])
        shutil.copy2(image_path, spie_normal_path)
    elif rows['cohort'] == 'Retina Cataracts':
        image_path = os.path.join(data_path_dict['cataracts_path'], rows['image_fpath'])
        shutil.copy2(image_path, spie_normal_path)
    
    print(f"{index}: Copied image {rows['image_fpath']} from {rows['cohort']} to {spie_normal_path}")

31: Copied image im0032.ppm from STARE to /home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Normal_unprocessed
74: Copied image im0076.ppm from STARE to /home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Normal_unprocessed
79: Copied image im0081.ppm from STARE to /home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Normal_unprocessed
80: Copied image im0082.ppm from STARE to /home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Normal_unprocessed
115: Copied image im0119.ppm from STARE to /home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Normal_unprocessed
116: Copied image im0120.ppm from STARE to /home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Normal_unprocessed
157: Copied image im0162.ppm from STARE to /home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Normal_unprocessed
164: Copied image im0170.ppm from STARE to /home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Normal_unprocessed
184: Copied image im0190.ppm from STARE to /home/ahmad/ahmad

# JustRAIGS

In [2]:
just_raigs_dir_path = '/home/ahmad/ahmad_experiments/retinal_data/JustRAIGS'
just_raigs_csv_path = "/home/ahmad/ahmad_experiments/retinal_data/JustRAIGS/JustRAIGS_Train_labels.csv"
glaucoma_destination_path = "/home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Glaucoma"

glaucoma_pattern = f"{just_raigs_dir_path}/*/*"

In [11]:
raigs_csv = pd.read_csv(just_raigs_csv_path)

In [12]:
raigs_csv.head()

Unnamed: 0,Eye ID,Final_label,Fellow Eye ID,Age,Label G1,Label G2,Label G3,G1 ANRS,G1 ANRI,G1 RNFLDS,...,G3 ANRS,G3 ANRI,G3 RNFLDS,G3 RNFLDI,G3 BCLVS,G3 BCLVI,G3 NVT,G3 DH,G3 LD,G3 LC
0,TRAIN000000,NRG,TRAIN081848,48.0,NRG,NRG,,0.0,0.0,0.0,...,,,,,,,,,,
1,TRAIN000001,NRG,TRAIN018312,29.0,NRG,NRG,,0.0,0.0,0.0,...,,,,,,,,,,
2,TRAIN000002,NRG,TRAIN083822,61.0,NRG,NRG,,0.0,0.0,0.0,...,,,,,,,,,,
3,TRAIN000003,NRG,TRAIN055721,70.0,NRG,NRG,,0.0,0.0,0.0,...,,,,,,,,,,
4,TRAIN000004,NRG,TRAIN099283,48.0,NRG,NRG,,0.0,0.0,0.0,...,,,,,,,,,,


In [13]:
rg_glaucoma = raigs_csv.loc[raigs_csv['Final_label'] == 'RG']

In [18]:
rg_glaucoma

Unnamed: 0,Eye ID,Final_label,Fellow Eye ID,Age,Label G1,Label G2,Label G3,G1 ANRS,G1 ANRI,G1 RNFLDS,...,G3 ANRS,G3 ANRI,G3 RNFLDS,G3 RNFLDI,G3 BCLVS,G3 BCLVI,G3 NVT,G3 DH,G3 LD,G3 LC
34,TRAIN000034,RG,,32.0,RG,RG,,1.0,1.0,0.0,...,,,,,,,,,,
60,TRAIN000060,RG,,74.0,RG,RG,,1.0,1.0,0.0,...,,,,,,,,,,
68,TRAIN000068,RG,TRAIN012581,64.0,RG,NRG,RG,1.0,1.0,0.0,...,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0
100,TRAIN000100,RG,,77.0,RG,NRG,RG,1.0,1.0,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
106,TRAIN000106,RG,TRAIN014144,81.0,RG,RG,,1.0,1.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101296,TRAIN101315,RG,TRAIN032604,55.0,RG,RG,,1.0,1.0,0.0,...,,,,,,,,,,
101339,TRAIN101358,RG,TRAIN008388,84.0,RG,RG,,1.0,1.0,0.0,...,,,,,,,,,,
101343,TRAIN101362,RG,TRAIN060851,67.0,RG,RG,,0.0,1.0,0.0,...,,,,,,,,,,
101348,TRAIN101367,RG,,57.0,RG,RG,,0.0,0.0,0.0,...,,,,,,,,,,


In [33]:
eye_ids = rg_glaucoma['Eye ID'].values
print(type(eye_ids))

<class 'numpy.ndarray'>


In [None]:
for count, img in enumerate(glob.glob(glaucoma_pattern)):
    if match_string_with_patterns(eye_ids,img):
        shutil.copy2(img, glaucoma_destination_path)
        print(f'Copied image:{count} {img}')
    else:
        continue



# DR: done


In [3]:
dr_destination_path = '/home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/DR'
dr_train_csv = "/home/ahmad/ahmad_experiments/retinal_data/Diabetic_Retinopathy/diabetic-retinopathy-detection/trainLabels.csv"
dr_train_images = "/home/ahmad/ahmad_experiments/retinal_data/Diabetic_Retinopathy/diabetic-retinopathy-detection/train"

dr_csv = pd.read_csv(dr_train_csv)

In [4]:
count = 0
for index, rows in dr_csv.iterrows():
    if rows['level'] != 0:
        image_path = os.path.join(dr_train_images, (rows['image'] + '.jpeg'))
        shutil.copy2(image_path, dr_destination_path)
        count += 1

print(count)


9316


# RFMID


In [45]:
rfmid_train_dir = '/home/ahmad/ahmad_experiments/retinal_data/RFMID/Training_Set/Training'
rfmid_train_csv_path = '/home/ahmad/ahmad_experiments/retinal_data/RFMID/Training_Set/RFMiD_Training_Labels.csv'
normal_destination_path = "/home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Normal_unprocessed"
myopia_destination_path = "/home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Myopia"
amd_destination_path = '/home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/AMD'

rfmid_csv = pd.read_csv(rfmid_train_csv_path)

# Disease_risk, ARMD : Age related Macular Degeneration, MYA: Myopia 

In [46]:
rfmid_csv.columns

Index(['ID', 'Disease_Risk', 'DR', 'ARMD', 'MH', 'DN', 'MYA', 'BRVO', 'TSLN',
       'ERM', 'LS', 'MS', 'CSR', 'ODC', 'CRVO', 'TV', 'AH', 'ODP', 'ODE', 'ST',
       'AION', 'PT', 'RT', 'RS', 'CRS', 'EDN', 'RPEC', 'MHL', 'RP', 'CWS',
       'CB', 'ODPM', 'PRH', 'MNF', 'HR', 'CRAO', 'TD', 'CME', 'PTCR', 'CF',
       'VH', 'MCA', 'VS', 'BRAO', 'PLQ', 'HPED', 'CL'],
      dtype='object')

## Normal, AMD, MYA

In [48]:
amd = 0
for index, rows in rfmid_csv.iterrows():
    img_file = str(rows['ID']) + '.png'
    img = os.path.join(rfmid_train_dir, img_file)
    if rows['Disease_Risk'] == 0:
        shutil.copy2(img, normal_destination_path)
    else:
        if rows['ARMD'] == 1:
            shutil.copy2(img, amd_destination_path)
            amd +=1
        if rows['MYA'] == 1:
            shutil.copy2(img, myopia_destination_path)


In [49]:
amd

100

# PALM

In [3]:
## Validation
validation_dir = '/home/ahmad/ahmad_experiments/retinal_data/PALM/PALM/Validation/Images'
validation_csv_path = '/home/ahmad/ahmad_experiments/retinal_data/PALM/PALM/Validation/Classification_Labels.xlsx'
validation_csv = pd.read_excel(validation_csv_path)


## Training
train_dir = '/home/ahmad/ahmad_experiments/retinal_data/PALM/PALM/Training/Images'
train_csv_path = '/home/ahmad/ahmad_experiments/retinal_data/PALM/PALM/Training/Classification_Labels.xlsx'
train_csv = pd.read_excel(train_csv_path)

## Testing
test_dir = '/home/ahmad/ahmad_experiments/retinal_data/PALM/PALM/Testing/Images'
test_csv_path = '/home/ahmad/ahmad_experiments/retinal_data/PALM/PALM/Testing/Classification_Labels.xlsx'
test_csv = pd.read_excel(test_csv_path)



In [8]:
train_csv.columns, validation_csv.columns, test_csv.columns

(Index(['imgName', 'Label'], dtype='object'),
 Index(['imgName', 'Label'], dtype='object'),
 Index(['imgName', 'Label'], dtype='object'))

In [12]:
train_csv.head()

Unnamed: 0,imgName,Label
0,H0001.jpg,0
1,H0002.jpg,0
2,H0003.jpg,0
3,H0004.jpg,0
4,H0005.jpg,0


In [16]:
count = 0
for index, rows in train_csv.iterrows():
    img_path = os.path.join(train_dir, rows['imgName'])
    if rows['Label'] == 1:
        shutil.copy2(img_path, myopia_destination_path)
        count += 1

print(f"Copied: {count} images")

for index, rows in validation_csv.iterrows():
    img_path = os.path.join(validation_dir, rows['imgName'])
    if rows['Label'] == 1:
        shutil.copy2(img_path, myopia_destination_path)
        count += 1

print(f"Copied: {count} images")


for index, rows in test_csv.iterrows():
    img_path = os.path.join(test_dir, rows['imgName'])
    if rows['Label'] == 1:
        shutil.copy2(img_path, myopia_destination_path)
        count += 1

print(f"Copied: {count} images")

Copied: 213 images
Copied: 424 images
Copied: 637 images


# Cataracts: Done

# ODIR

In [5]:
odir_csv_path = "/home/ahmad/ahmad_experiments/retinal_data/ODIRD/archive/ODIR-5K/ODIR-5K/ODIR-5K_Training_Annotations(Updated)_V2.xlsx"
odir_images_train = "/home/ahmad/ahmad_experiments/retinal_data/ODIRD/archive/ODIR-5K/ODIR-5K/ODIR-5K_Training_Dataset"
odir_images_test = "/home/ahmad/ahmad_experiments/retinal_data/ODIRD/archive/ODIR-5K/ODIR-5K/testing_images"

normal_destination_path = "/home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Normal"
cataract_destination_path = "/home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/Cataract"
amd_destination_path = '/home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/AMD'

odir_csv = pd.read_excel(odir_csv_path)

In [6]:
odir_csv.head()

Unnamed: 0,ID,Patient Age,Patient Sex,Left-Fundus,Right-Fundus,Left-Diagnostic Keywords,Right-Diagnostic Keywords,N,D,G,C,A,H,M,O
0,0,69,Female,0_left.jpg,0_right.jpg,cataract,normal fundus,0,0,0,1,0,0,0,0
1,1,57,Male,1_left.jpg,1_right.jpg,normal fundus,normal fundus,1,0,0,0,0,0,0,0
2,2,42,Male,2_left.jpg,2_right.jpg,laser spot，moderate non proliferative retinopathy,moderate non proliferative retinopathy,0,1,0,0,0,0,0,1
3,3,66,Male,3_left.jpg,3_right.jpg,normal fundus,branch retinal artery occlusion,0,0,0,0,0,0,0,1
4,4,53,Male,4_left.jpg,4_right.jpg,macular epiretinal membrane,mild nonproliferative retinopathy,0,1,0,0,0,0,0,1


In [7]:
cataract_count = 0
amd_count = 0
train_images = os.listdir(odir_images_train)
test_images = os.listdir(odir_images_test)

for index, rows in odir_csv.iterrows():
    if  'cataract' in rows['Left-Diagnostic Keywords'] :
        left_image = rows['Left-Fundus'] 
        if left_image in train_images:
            shutil.copy2(os.path.join(odir_images_train, left_image), cataract_destination_path)
        elif left_image in test_images:
            shutil.copy2(os.path.join(odir_images_test, left_image), cataract_destination_path)
        cataract_count += 1

    # elif 'age-related macular degeneration' in rows['Left-Diagnostic Keywords']:
    #     left_image = rows['Left-Fundus'] 
    #     if left_image in train_images:
    #         shutil.copy2(os.path.join(odir_images_train, left_image), amd_destination_path)
    #     elif left_image in test_images:
    #         shutil.copy2(os.path.join(odir_images_test, left_image), amd_destination_path)
    #     amd_count += 1

    if  'cataract' in rows['Right-Diagnostic Keywords']:
        right_image = rows['Right-Fundus'] 
        if right_image in train_images:
            shutil.copy2(os.path.join(odir_images_train, right_image), cataract_destination_path)
        elif right_image in test_images:
            shutil.copy2(os.path.join(odir_images_test, right_image), cataract_destination_path)
        cataract_count += 1

    # elif  'age-related macular degeneration' in rows['Right-Diagnostic Keywords']:
    #     right_image = rows['Right-Fundus'] 
    #     if right_image in train_images:
    #         shutil.copy2(os.path.join(odir_images_train, right_image), amd_destination_path)
    #     elif right_image in test_images:
    #         shutil.copy2(os.path.join(odir_images_test, right_image), amd_destination_path)
    #     amd_count += 1

print(f"Cataract: {cataract_count}, AMD: {amd_count}")    
   


Cataract: 313, AMD: 0


# ADAM


In [37]:
adam_test_image_dir = "/home/ahmad/ahmad_experiments/retinal_data/ADAM/ADAM/Test/Test-image-400"
adam_test_gt = "/home/ahmad/ahmad_experiments/retinal_data/ADAM/ADAM/Test/test_classification_GT.txt"

adam_validation_image_dir = "/home/ahmad/ahmad_experiments/retinal_data/ADAM/ADAM/Validation/image"
adam_validation_gt = "/home/ahmad/ahmad_experiments/retinal_data/ADAM/ADAM/Validation/validation_classification_GT.txt"

destination = "/home/ahmad/ahmad_experiments/retinal_data/Dataset_SPIE/AMD"

In [31]:
test_csv = pd.read_csv(adam_test_gt, sep="  ", header=None)
val_csv = pd.read_csv(adam_validation_gt,sep=" ", header=None)

  test_csv = pd.read_csv(adam_test_gt, sep="  ", header=None)


In [32]:
test_csv.head()

Unnamed: 0,0,1
0,T0307.jpg,1
1,T0129.jpg,1
2,T0315.jpg,0
3,T0323.jpg,1
4,T0226.jpg,1


In [34]:
val_csv.head()

Unnamed: 0,0,1
0,V0001.jpg,0
1,V0002.jpg,0
2,V0003.jpg,0
3,V0004.jpg,0
4,V0005.jpg,0


In [38]:
t = 0
for index, rows in test_csv.iterrows():
    if rows[1] == 1:
        image_path = os.path.join(adam_test_image_dir, rows[0])
        shutil.copy2(image_path, destination)
        t += 1
print(t)


v = 0 
for index, rows in val_csv.iterrows():
    if rows[1] == 1:
        image_path = os.path.join(adam_validation_image_dir, rows[0])
        shutil.copy2(image_path, destination)
        v += 1


print(v)

89
89
