# Image processing and augmentation

In [1]:
# import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import math
import cv2
from matplotlib.image import imread
from datetime import datetime
from skimage import exposure
import random

In [2]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img ,save_img

Using TensorFlow backend.


## Load Data

In [3]:
test_data = pd.read_csv("../Data/test.csv")
train_data = pd.read_csv("../Data/train.csv")

## Unbalanced Data

In [4]:
Healthy = train_data['healthy'].value_counts()
Rust = train_data['rust'].value_counts()
Scab = train_data['scab'].value_counts()
Multiple_diseases = train_data['multiple_diseases'].value_counts()

df = pd.DataFrame([Healthy ,Rust,Scab,Multiple_diseases] )
df.columns = ['0', '1'] 
df['1']

healthy              516
rust                 622
scab                 592
multiple_diseases     91
Name: 1, dtype: int64

## Over Sampling

In [5]:
# A function to generate images

def Generate_Images(class_) :

    start=datetime.now()

    generator = ImageDataGenerator(horizontal_flip=True ,rotation_range=90, brightness_range=[0.2,1.0])

    class_count = train_data[train_data[class_]== 1]['image_id'].count()
    max_values_count = df['1'].max()
    
    diff =  max_values_count - class_count
    
    while  class_count < max_values_count:

        selected_randomly = train_data[train_data[class_]== 1].sample(n = 1) 

        img_id = selected_randomly['image_id'].iloc[0]
        image = load_img('..\images\\' + img_id + '.jpg') # this is a PIL image

        image = img_to_array(image)
        image = image.reshape((1,) + image.shape)

        generator.fit(image)
        iterator = generator.flow(image,batch_size=1)
        Epochs = 2 
        if diff > 200 :
            Epochs = 4
        for Epoch in range(Epochs):

            image = iterator.next()[0].astype('uint8')
        
            img_id = 'Add_Train_' +str(len(train_data.index)) 
            img_name = '..\images\\Add_Train_' +str(len(train_data.index)) +".jpg"
            # save the image with a new filename
            save_img(img_name, image)
            
            #healthy	multiple_diseases	rust	scab
            
            if class_ == 'healthy' :
                train_data.loc[len(train_data.index)] = [img_id,1,0 ,0,0] 
            elif class_ == 'multiple_diseases' :
                train_data.loc[len(train_data.index)] = [img_id,0,1 ,0,0] 
            elif class_ == 'scab' :
                train_data.loc[len(train_data.index)] = [img_id,0,0 ,0,1] 

        class_count = train_data[train_data[class_]== 1]['image_id'].count()
    print('Number of images added : ', diff)
    print ('Time : ', datetime.now()-start)

In [6]:
## scab 
print('Generate scab Images ')
Generate_Images('scab')

## helthy 
print('Generate helthy Images ')
Generate_Images('healthy')

## multiple diseases
print('Generate multiple diseases Images ')
Generate_Images('multiple_diseases')

Generate scab Images 
Number of images added :  30
Time :  0:00:31.896447
Generate helthy Images 
Number of images added :  106
Time :  0:01:43.632266
Generate multiple diseases Images 
Number of images added :  531
Time :  0:08:16.811861


In [7]:
Healthy = train_data['healthy'].value_counts()
Rust = train_data['rust'].value_counts()
Scab = train_data['scab'].value_counts()
Multiple_diseases = train_data['multiple_diseases'].value_counts()

df = pd.DataFrame([Healthy ,Rust,Scab,Multiple_diseases] )
df.columns = ['0', '1'] 
df['1']

healthy              622
rust                 622
scab                 622
multiple_diseases    623
Name: 1, dtype: int64

In [8]:
train_data.to_csv('..\\Data\\balance_train.csv', index=False)

# Image Augmentation

In [9]:
add_train_data = pd.read_csv("..\Data\\balance_train.csv")

In [10]:
def random_image():
    
    selected_randomly = train_data.sample(n = 1) 
    img_id = selected_randomly['image_id'].iloc[0]

    image = load_img('..\images\\' + img_id + '.jpg') # this is a PIL image
    image = img_to_array(image)
    
    return image

In [11]:
def add_to_df(img_id,class_):
    if class_ == 'healthy' :
        add_train_data.loc[len(add_train_data.index)] = [img_id,1,0 ,0,0] 
    elif class_ == 'multiple_diseases' :
        add_train_data.loc[len(add_train_data.index)] = [img_id,0,1 ,0,0] 
    elif class_ == 'rust' :
        add_train_data.loc[len(add_train_data.index)] = [img_id,0,0 ,1,0] 
    elif class_ == 'scab' :
        add_train_data.loc[len(add_train_data.index)] = [img_id,0,0 ,0,1] 

### Blur Train Images

In [12]:
%%time
def bluring(class_ ,loops):
    
    for i in range(loops):

        image = random_image()
        image = cv2.blur(image, (30, 30))
        img_id = 'blured_' +str(len(add_train_data.index)) 
        img_name = '..\images\\' + img_id +".jpg"
        # save the image with a new filename
        save_img(img_name, image)
        add_to_df(img_id, class_)

Wall time: 0 ns


### Brightness 

In [13]:
%%time
def brightness(class_ ,loops) :
    for i in range(loops):

        image = random_image()
        # adjust just the input max value for bright   
        image = exposure.rescale_intensity(image, in_range=(0,200), out_range=(0,255))
        
        img_id = 'bright_' +str(len(add_train_data.index)) 
        img_name = '..\images\\' + img_id +".jpg"
        # save the image with a new filename
        save_img(img_name, image)
        add_to_df(img_id ,class_)

Wall time: 0 ns


### Add Noise Images

In [14]:
%%time

#Add random noise to an image
def add_noise(class_no ,loops ):
    VARIABILITY = 50
    
    for i in range(loops):

        image = random_image()
   
        deviation = VARIABILITY*random.uniform(0.2,0.5)
        noise = np.random.normal(0, deviation, image.shape)
        image += noise
        np.clip(image, 0., 255.)

        img_id = 'noise_' +str(len(add_train_data.index)) 
        img_name = '..\images\\' + img_id +".jpg"
        
        # save the image with a new filename
        save_img(img_name, image)
        add_to_df(img_id, class_)

Wall time: 0 ns


### Iterate through images

In [15]:
loops =10 
classes = ['healthy','multiple_diseases','rust','scab']
for class_ in classes:
    print(class_)
    bluring(class_ ,loops)
    brightness(class_ ,loops)
    add_noise(class_ ,loops )

healthy
multiple_diseases
rust
scab


In [16]:
train_data.to_csv('..\\Data\\train_extra_imgs.csv', index=False)

### Crop Test Images using Canny Edge detection

In [17]:
# Canny Edge detection
for i in test_data['image_id']:
    img = cv2.imread( '..\images\\' + i + '.jpg')
    canny = cv2.Canny(img, 50, 200)
    ## find the non-zero min-max coords of canny
    pts = np.argwhere(canny>0)
    y1,x1 = pts.min(axis=0)
    y2,x2 = pts.max(axis=0)
    ## crop the region
    cropped = img[y1:y2, x1:x2]
    cv2.imwrite( '..\images\cropped_' + i + ".jpg", cropped)