Plant Pathology 2020 Kfold ResNet50

In [None]:
import numpy as np
import pandas as pd

import cv2
from tqdm import tqdm
tqdm.pandas()
import matplotlib.cm as cm
import matplotlib.pyplot as plt

import random
import plotly.figure_factory as ff

import numpy as np
import os
import cv2

from random import seed
from random import randint

from PIL import Image
from sklearn.utils import shuffle

In [None]:
MODEL_NAME = '448x448_rgb_ResNet50'
noise_typ='not'
#SIZE = 56, 56
#SIZE = 112, 112
#SIZE = 224, 224
SIZE = 448, 448
gridsize = 5

In [None]:
IMAGE_PATH = "Data/images/"
TEST_PATH = "Data/test.csv"
TRAIN_PATH = "Data/train.csv"
SUBMISSION_PATH = "Data/sample_submission.csv"

submission = pd.read_csv(SUBMISSION_PATH)
test_data = pd.read_csv(TEST_PATH)
train_data = pd.read_csv(TRAIN_PATH)

In [None]:
test_data.head()

In [None]:
train_data.head()

In [None]:
submission.head()

In [None]:
print("Number of images: ",len(train_data))
distrubution_plot = []

In [None]:
rush_train_data = train_data['image_id'][train_data['rust'] == 1]
print('Number of images plants with rush: ',len(rush_train_data))
distrubution_plot.append(len(rush_train_data))

In [None]:
scab_train_data = train_data['image_id'][train_data['scab'] == 1]
print('Number of images plants with scab: ',len(scab_train_data))
distrubution_plot.append(len(scab_train_data))

In [None]:
multiple_diseases_train_data = train_data['image_id'][train_data['multiple_diseases'] == 1]
print('Number of images plants with multiple diseases: ',len(multiple_diseases_train_data))
distrubution_plot.append(len(multiple_diseases_train_data))

In [None]:
healthy_train_data = train_data['image_id'][train_data['healthy'] == 1]
print('Number of images healthy plants: ',len(healthy_train_data))
distrubution_plot.append(len(healthy_train_data))

In [None]:
names = ['rush', 'scab', 'multiple_diseases', 'healthy']

plt.figure(figsize=(6, 6))

plt.bar(names, distrubution_plot)
plt.suptitle('distrubution plot')
plt.show()

In [None]:
#Add noise to images
def noisy(noise_typ,image):
    if noise_typ == "not":
        return image
    if noise_typ == "gauss":
        row,col,ch= image.shape
        mean = 0
        var = 0.1
        sigma = var**0.5
        gauss = np.random.normal(mean,sigma,(row,col,ch))
        gauss = gauss.reshape(row,col,ch)
        noisy = image + gauss
        return noisy
    elif noise_typ == "s&p":
        row,col,ch = image.shape
        s_vs_p = 0.5
        amount = 0.004
        out = np.copy(image)
        # Salt mode
        num_salt = np.ceil(amount * image.size * s_vs_p)
        coords = [np.random.randint(0, i - 1, int(num_salt))
              for i in image.shape]
        out[coords] = 1

        # Pepper mode
        num_pepper = np.ceil(amount* image.size * (1. - s_vs_p))
        coords = [np.random.randint(0, i - 1, int(num_pepper))
              for i in image.shape]
        out[coords] = 0
        return out
    elif noise_typ == "poisson":
        vals = len(np.unique(image))
        vals = 2 ** np.ceil(np.log2(vals))
        noisy = np.random.poisson(image * vals) / float(vals)
        return noisy
    elif noise_typ =="speckle":
        row,col,ch = image.shape
        gauss = np.random.randn(row,col,ch)
        gauss = gauss.reshape(row,col,ch)        
        noisy = image + image * gauss
    elif noise_typ =="clahe":    
        lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
        lab_planes = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(gridsize,gridsize))
        lab_planes[0] = clahe.apply(lab_planes[0])
        lab = cv2.merge(lab_planes)
        noisy = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
        return noisy

In [None]:
train = []
train_shape = []
for i in tqdm(range(len(train_data))):
    file_path = train_data["image_id"][i] + ".jpg"
    image = cv2.imread(IMAGE_PATH + file_path)
    image = cv2.resize(image, SIZE)
    image = noisy(noise_typ,image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = np.asarray(image)
    height, width, channels = image.shape
    train_shape.append([height, width, channels])
    image = Image.fromarray(image)
    image = np.asarray(image)
    train.append(image)

In [None]:
test_images = []
for i in tqdm(range(len(test_data))):
    file_path = test_data["image_id"][i] + ".jpg"
    image = cv2.imread(IMAGE_PATH + file_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, SIZE)
    image = noisy(noise_typ,image)
    image = np.asarray(image)
    height, width, channels = image.shape
    test_images.append(image)  
test_images = np.array(test_images)
test_images = test_images.astype("float32") / 255.0
test_images.shape

In [None]:
fig = plt.imshow(cv2.resize(train[random.randint(1,100)], (205, 136)))

In [None]:
fig = plt.imshow(cv2.resize(train[random.randint(1,100)], (205, 136)))

In [None]:
fig = plt.imshow(cv2.resize(train[random.randint(1,100)], (205, 136)))

In [None]:
fig = plt.imshow(cv2.resize(train[random.randint(1,100)], (205, 136)))

In [None]:
fig = plt.imshow(cv2.resize(train[random.randint(1,100)], (205, 136)))

In [None]:
fig = plt.imshow(cv2.resize(train[random.randint(1,100)], (205, 136)))

In [None]:
plt.figure(figsize = (20,20))
plt.imshow(cv2.resize(train[random.randint(1,100)], (205, 136)))

In [None]:
train = np.array(train)
train = train.astype("float32") / 255.0
train.shape

In [None]:
df_train_data = train_data
prepared_train_data = np.array(df_train_data)
prepared_train_data.shape

In [None]:
key_all = prepared_train_data[:, [0]]

In [None]:
train, prepared_train_data = shuffle(train, prepared_train_data, random_state=42)

In [None]:
key_train = prepared_train_data[:, [0]]
prepared_train_data = np.delete(prepared_train_data, 0, 1)
df_prepared_train_data = pd.DataFrame(prepared_train_data)
prepared_train_data = df_prepared_train_data.to_numpy()

In [None]:
import sys
from keras.models import Model
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.preprocessing import image
from keras.optimizers import adam
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input, decode_predictions
from keras.layers import Flatten
from keras.layers import Dense
from keras.callbacks import LearningRateScheduler
import math
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from keras.callbacks import History 
import tensorflow as tf


from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

import tensorflow as tf
import tensorflow.keras.layers as L

from keras.layers import Flatten
from keras.layers import Dense
from keras.callbacks import LearningRateScheduler
from keras.optimizers import adam

import keras
import keras.utils
from keras import utils as np_utils
from keras.optimizers import SGD

In [None]:
# plot diagnostic learning curves
def summarize_diagnostics(history):
    plt.subplot(211)
    plt.title( 'Cross Entropy Loss' )
    plt.plot(history.history[ 'loss' ], color= 'blue' , label= 'train' )
    plt.plot(history.history[ 'val_loss' ], color= 'orange' , label= 'test' )
    # plot accuracy
    plt.subplot(212)
    plt.title( 'Classification Accuracy')
    plt.plot(history.history[ 'accuracy' ], color= 'blue' , label= 'train' )
    plt.plot(history.history[ 'val_accuracy' ], color= 'orange' , label= 'test' )

In [None]:
# learning rate schedule
def step_decay(epoch):
    initial_lrate = 0.00000001
    drop = 0.5
    epochs_drop = 5.0
    lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
    return lrate

In [None]:
#define model
def define_model():
# load model
    model = ResNet50(weights='imagenet', include_top=False, input_shape=(SIZE[0], SIZE[1], 3))
# mark loaded layers as not trainable
    for layer in model.layers:
        #layer.trainable = False
        layer.trainable = True
# add new classifier layers
    flat1 = Flatten()(model.layers[-1].output)
    class1 = Dense(500, activation= 'relu' , kernel_initializer= 'he_uniform' )(flat1)
    output = Dense(4, activation= 'softmax' )(class1)
# define new model
    model = Model(inputs=model.inputs, outputs=output)
# compile model
    #opt = SGD(lr=0.001, momentum=0.9)
    opt=adam(lr=0.00000001)
    model.compile(optimizer=opt, loss= 'categorical_crossentropy' , metrics=[ 'accuracy' ])
    return model

In [None]:
!pip install --upgrade google-cloud-storage
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="Project-XXXXXXXXXX.json"
from google.cloud import storage


def upload_blob(bucket_name, source_file_name, destination_blob_name):
    """Uploads a file to the bucket."""
    # bucket_name = "your-bucket-name"
    # source_file_name = "local/path/to/file"
    # destination_blob_name = "storage-object-name"

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(source_file_name)

    print(
        "File {} uploaded to {}.".format(
            source_file_name, destination_blob_name
        )
    )
def download_blob(bucket_name, source_blob_name, destination_file_name):
    """Downloads a blob from the bucket."""

    # bucket_name = "your-bucket-name"
    # source_blob_name = "storage-object-name"
    # destination_file_name = "local/path/to/file"

    storage_client = storage.Client()

    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob_name)
    blob.download_to_filename(destination_file_name)

    print(
        "Blob {} downloaded to {}.".format(
            source_blob_name, destination_file_name
        )
    )

In [None]:
prepared_train_data = np.argmax(prepared_train_data, axis=1)

In [None]:
kfold = 5
skf = StratifiedKFold(n_splits=kfold,shuffle=False)
for m, (train_index, test_index) in enumerate(skf.split(train, prepared_train_data, key_train)):
    print('Fold %d/%d' % (m + 1, kfold))
    temp_i = str(m + 1)
    x_train, x_test = train[train_index], train[test_index]
    y_train, y_test = prepared_train_data[train_index], prepared_train_data[test_index]
    key_train_val, key_test_val = key_train[train_index], key_train[test_index]
    
    y_train = keras.utils.to_categorical(y_train, 4)
    y_test = keras.utils.to_categorical(y_test, 4)
    y_all = keras.utils.to_categorical(prepared_train_data, 4)
    
    # define model 
    model = define_model()
    # create data generator
    print(model.summary())

    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=180,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range=0,  # Randomly zoom image
        width_shift_range=0,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False,  # randomly flip images
    )  

    datagen.fit(x_train)
    
    # simple early stopping
    history = History()
    lrate = LearningRateScheduler(step_decay)
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20)
    
    callbacks_list = [lrate, es, history]

    

    historylist=[[],[],[],[],[],[],[]]
    model.fit_generator(datagen.flow(x_train, y_train, batch_size=32), validation_data = (x_test,y_test),epochs = 5,verbose = 1, callbacks=callbacks_list)
    listofkeys = [k for k in history.history.keys()]
    history.history.keys()
    for i in range(len(listofkeys)):
        for j in range(len(history.history[listofkeys[i]])):
            historylist[i].append(history.history[listofkeys[i]][j])
    model.fit_generator(datagen.flow(x_train, y_train, batch_size=32), validation_data = (x_test,y_test),epochs = 70,verbose = 1, callbacks=callbacks_list)
    for i in range(len(listofkeys)):
        for j in range(len(history.history[listofkeys[i]])):
            historylist[i].append(history.history[listofkeys[i]][j])

    _, acc = model.evaluate(x_train, y_train, verbose=1)
    print( 'Train > %.3f' % (acc * 100.0))
    _, acc = model.evaluate(x_test,y_test, verbose=1)
    print( 'Test > %.3f' % (acc * 100.0))
        
  

    #submission
    submission_predict = model.predict(test_images, verbose=1)
    submission = pd.read_csv(SUBMISSION_PATH)
    submission.loc[:, 'healthy':] = submission_predict
    name_of_sub=  'sub/sub/' + MODEL_NAME + '_sub_' + temp_i + '.csv'
    submission.to_csv(name_of_sub, index=False)
    print(name_of_sub)

    #save model
    name_of_model=  'models/pretrained/' + MODEL_NAME + '_sub_' + temp_i + '.h5'
    print(temp_i," ",name_of_model)
    model.save(name_of_model)
    print("Saved model to disk")
    summarize_diagnostics(history)
    tf.keras.backend.clear_session()
