In [1]:
import pandas as pd
from tensorflow.keras.preprocessing import image
from PIL import Image 
import PIL 

import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tqdm import tqdm
import numpy as np

In [2]:
df = pd.read_csv('./data/HAM10000_Metadata')
df.tail()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset
10010,HAM_0002867,ISIC_0033084,akiec,histo,40.0,male,abdomen,vidir_modern
10011,HAM_0002867,ISIC_0033550,akiec,histo,40.0,male,abdomen,vidir_modern
10012,HAM_0002867,ISIC_0033536,akiec,histo,40.0,male,abdomen,vidir_modern
10013,HAM_0000239,ISIC_0032854,akiec,histo,80.0,male,face,vidir_modern
10014,HAM_0003521,ISIC_0032258,mel,histo,70.0,female,back,vidir_modern


In [3]:
risk = []

for i in range(0, 10015):
    if df.iloc[i]['dx'] in ['bkl', 'df', 'vasc']:
        risk.append(0.0)
    elif df.iloc[i]['dx'] == 'nv':
        risk.append(1.0)
    elif df.iloc[i]['dx'] == 'akiec':
        risk.append(2.0)
    elif df.iloc[i]['dx'] in ['mel', 'bcc']:
        risk.append(3.0)
        
df['risk'] = risk

In [4]:
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,risk
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,vidir_modern,0.0
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,vidir_modern,0.0
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,vidir_modern,0.0
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,vidir_modern,0.0
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,vidir_modern,0.0


In [5]:
df['risk'].value_counts()

1.0    6705
3.0    1627
0.0    1356
2.0     327
Name: risk, dtype: int64

In [6]:
aug_rate = {1.0:0, 3.0:3, 0.0:4, 2.0:20}

In [7]:
data_gen = ImageDataGenerator(
    horizontal_flip=True, 
    brightness_range=[0.8, 1.2],
    vertical_flip=True, 
    zoom_range=0.3)

In [8]:
for i in tqdm(range(0, 10015)):
    original = df.iloc[i]
    
    img = image.load_img('./Data/HAM10000_images/' + original['image_id'] + '.jpg')
    img = image.img_to_array(img)
    
    for i in range(0, aug_rate[original['risk']]):
        new_img = data_gen.random_transform(img).astype(np.uint8)
        new_img = Image.fromarray(new_img)
    
        new = original.copy()

        new_id = 'aug' + str(i) + '_' + original['image_id']
        new['image_id'] = new_id
        df = df.append(new, ignore_index = True)

        new_img.save('./Data/HAM10000_images/' + new_id + '.jpg')

100%|██████████████████████████████████████████| 10015/10015 [13:20<00:00, 12.51it/s]


In [9]:
df['risk'].value_counts()

2.0    6867
0.0    6780
1.0    6705
3.0    6508
Name: risk, dtype: int64

In [10]:
df.tail()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,risk
26855,HAM_0000239,aug18_ISIC_0032854,akiec,histo,80.0,male,face,vidir_modern,2.0
26856,HAM_0000239,aug19_ISIC_0032854,akiec,histo,80.0,male,face,vidir_modern,2.0
26857,HAM_0003521,aug0_ISIC_0032258,mel,histo,70.0,female,back,vidir_modern,3.0
26858,HAM_0003521,aug1_ISIC_0032258,mel,histo,70.0,female,back,vidir_modern,3.0
26859,HAM_0003521,aug2_ISIC_0032258,mel,histo,70.0,female,back,vidir_modern,3.0


In [12]:
df.to_csv('./data/aug_HAM10000_Metadata')

In [13]:
df_read = pd.read_csv('./data/aug_HAM10000_Metadata')

In [14]:
df_read.tail()

Unnamed: 0.1,Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,risk
26855,26855,HAM_0000239,aug18_ISIC_0032854,akiec,histo,80.0,male,face,vidir_modern,2.0
26856,26856,HAM_0000239,aug19_ISIC_0032854,akiec,histo,80.0,male,face,vidir_modern,2.0
26857,26857,HAM_0003521,aug0_ISIC_0032258,mel,histo,70.0,female,back,vidir_modern,3.0
26858,26858,HAM_0003521,aug1_ISIC_0032258,mel,histo,70.0,female,back,vidir_modern,3.0
26859,26859,HAM_0003521,aug2_ISIC_0032258,mel,histo,70.0,female,back,vidir_modern,3.0


In [15]:
df.tail()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,dataset,risk
26855,HAM_0000239,aug18_ISIC_0032854,akiec,histo,80.0,male,face,vidir_modern,2.0
26856,HAM_0000239,aug19_ISIC_0032854,akiec,histo,80.0,male,face,vidir_modern,2.0
26857,HAM_0003521,aug0_ISIC_0032258,mel,histo,70.0,female,back,vidir_modern,3.0
26858,HAM_0003521,aug1_ISIC_0032258,mel,histo,70.0,female,back,vidir_modern,3.0
26859,HAM_0003521,aug2_ISIC_0032258,mel,histo,70.0,female,back,vidir_modern,3.0
