In [None]:
import os
import pandas as pd
import imgaug as ia
import imgaug.augmenters as iaa
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input as vgg19_preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [None]:
# Define base directory
base_dir = "./covid-chestxray-dataset-master"

In [None]:
# Read metadata file containing information about the COVID and non-COVID chest xrays
df = pd.read_csv("metadata.csv")

In [None]:
# Filter all COVID Xrays
covid_df = df[df['finding']=='COVID-19']
covid_df

In [None]:
# Make path for the image directory
image_dir = os.path.join(base_dir, 'images')
image_dir

In [None]:
# make a list of filepaths for all the COVID-19 Chest Xrays
f_paths = ["./images/" + fname for fname in covid_df["filename"]]
# Strip any white spaces
f_paths = [fpath.rstrip() for fpath in f_paths]
# Keep only jpeg, jpg and png files
f_paths = [fi for fi in f_paths if fi.endswith((".jpeg", ".png", ".jpg"))]
# Count the number of files
len(f_paths)

In [None]:
sometimes = lambda aug: iaa.Sometimes(0.5, aug)

# apply the following augmenters to most images
seq = iaa.Sequential([
    iaa.Fliplr(0.5), # horizontally flip 50% of all images
    iaa.Flipud(0.2), # vertically flip 20% of all images
    
    sometimes(iaa.Affine(
            #scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
            translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, # translate by -20 to +20 percent (per axis)
            rotate=(-25, 25), # rotate by -25 to +25 degrees
            shear=(-8, 8), # shear by -8 to +8 degrees
            order=[0, 1], # use nearest neighbour or bilinear interpolation (fast)
            cval=(0, 255), # if mode is constant, use a cval between 0 and 255
            mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
        )),
    
    iaa.SomeOf((0, 5),
            [iaa.OneOf([
                iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0
                iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7
                #iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7
            ]),
             iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images
             iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images
            ],
               random_order=True
              )],
    random_order=True
)

In [None]:
# Make a new directory to store augmented images
!mkdir covid_aug
!ls
os.listdir("./covid_aug")

In [None]:
def augment(img):
    """
    to_deterministic() removes the randomness from all augmenters and makes them deterministic 
    (e.g. for each parameter that comes from a distribution, it samples one value from that 
    distribution and then keeps reusing that value)
    """
    seq_det = seq.to_deterministic()             
    aug_img = seq_det.augment_image(img)         
    aug_img = vgg19_preprocess_input(aug_img)    
    return aug_img

In [None]:
# Instantiate the ImageDataGenerator from tensorflow.keras passing in out custom augmentation function
train_generator = ImageDataGenerator(preprocessing_function=augment)
for f in f_paths:                                   # loop through all the filenames/paths
    failed_files = []
    try:
        img = load_img(f)                           # load image  
        x = img_to_array(img)                       # convert to an numpy array
        x = x.reshape((1, ) + x.shape)              # # Reshape the input image [batch, height, width, channels]
        i = 0                                       #Instantiate the augmentation count at zero
                                                    # generate 5 new augmented images  
        for batch in train_generator.flow(x, batch_size = 1, 
                              save_to_dir ="./covid_aug",  
                              save_prefix ='covid_aug', save_format ='jpeg'):
            i += 1
            if i > 5:
                break
    except:
        failed_files.append(f)           