## Data organisation example - IDRiD

In [34]:
import os
import shutil
import pickle
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# replace with your own data path to save IDRiD datasets
DATAPATH = './datasets/IDRiD/B_Disease_Grading/1__Original_Images/'

### Split val set from train data
- Download dataset from [official website](https://ieee-dataport.org/open-access/indian-diabetic-retinopathy-image-dataset-idrid) 

In [35]:
list_ = pd.read_csv('a__IDRiD_Disease_Grading_Training_Labels_csv')

In [36]:
noDR = list_.loc[list_['Retinopathy grade']==0, 'Image name']
mildDR = list_.loc[list_['Retinopathy grade']==1, 'Image name']
moderateDR = list_.loc[list_['Retinopathy grade']==2, 'Image name']
severeDR = list_.loc[list_['Retinopathy grade']==3, 'Image name']
proDR = list_.loc[list_['Retinopathy grade']==4, 'Image name']

In [37]:
noDR_train, noDR_val = train_test_split(noDR, test_size=0.2,random_state=1)
mildDR_train, mildDR_val = train_test_split(mildDR, test_size=0.2,random_state=1)
moderateDR_train, moderateDR_val = train_test_split(moderateDR, test_size=0.2,random_state=1)
severeDR_train, severeDR_val = train_test_split(severeDR, test_size=0.2,random_state=1)
proDR_train, proDR_val = train_test_split(proDR, test_size=0.2,random_state=1)


In [38]:
train_list = [noDR_train, mildDR_train, moderateDR_train, severeDR_train, proDR_train]
for idx, disease in enumerate(train_list):
    data = [{'img_root': os.path.join(DATAPATH, 'a__Training_Set', value+'.jpg'), 'label': idx} for value in disease]
print(data[0])
save_path = 'data/IDRiD'
os.makedirs(save_path, exist_ok=True)
with open(os.path.join(save_path, 'train.pkl') , 'wb') as file:
    pickle.dump(np.array(data), file)

{'img_root': './datasets/IDRiD/B_Disease_Grading/1__Original_Images/a__Training_Set\\IDRiD_178.jpg', 'label': 4}


In [39]:
val_list = [noDR_val, mildDR_val, moderateDR_val, severeDR_val, proDR_val]
for idx, disease in enumerate(val_list):
    data = [{'img_root': os.path.join(DATAPATH, 'a__Training_Set', value+'.jpg'), 'label': idx} for value in disease]
print(data[0])
with open(os.path.join(save_path, 'val.pkl') , 'wb') as file:
    pickle.dump(np.array(data), file)

{'img_root': './datasets/IDRiD/B_Disease_Grading/1__Original_Images/a__Training_Set\\IDRiD_100.jpg', 'label': 4}


### Organise test set

In [40]:
list_test = pd.read_csv('b__IDRiD_Disease_Grading_Testing_Labels_csv')

In [41]:
noDR_test = list_test.loc[list_test['Retinopathy grade']==0, 'Image name']
mildDR_test = list_test.loc[list_test['Retinopathy grade']==1, 'Image name']
moderateDR_test = list_test.loc[list_test['Retinopathy grade']==2, 'Image name']
severeDR_test = list_test.loc[list_test['Retinopathy grade']==3, 'Image name']
proDR_test = list_test.loc[list_test['Retinopathy grade']==4, 'Image name']

In [42]:
test_list = [noDR_test, mildDR_test, moderateDR_test, severeDR_test, proDR_test]
for idx, disease in enumerate(test_list):
    data = [{'img_root': os.path.join(DATAPATH, 'b__Testing_Set', value+'.jpg'), 'label': idx} for value in disease]
print(data[0])
with open(os.path.join(save_path, 'test.pkl') , 'wb') as file:
    pickle.dump(np.array(data), file)

{'img_root': './datasets/IDRiD/B_Disease_Grading/1__Original_Images/b__Testing_Set\\IDRiD_001.jpg', 'label': 4}
