* In this kernel we are going to use ImageDataGenerator to load images in batches of 16, and adding augmentation.
* *CROPPING IMAGE* : We will also use image cropping to crop the extra black part in the images in the training data.
* We will use the resnet50 model and then train it exclusively for our train set.

In [80]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
print(os.listdir("./input/aptos2019-blindness-detection/"))
from keras.applications import ResNet50, VGG19
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import keras
import csv
import gc
import cv2

['test_images', 'train_new', 'train_images']


In [81]:
from keras.layers import Input, Dense, GlobalMaxPool2D, GlobalAvgPool2D
from keras.layers import Concatenate, Multiply, Dropout, Subtract, Lambda, Flatten

from keras.applications import vgg16
from keras.applications import resnet50
from keras.applications import inception_v3

In [82]:
train_csv = "./input/train.csv"
test_csv = "./input/test.csv"
train_dir = "./input/aptos2019-blindness-detection/train_images/"
test_dir = "./input/aptos2019-blindness-detection/test_images/"
# size = 256,256 # input image size

In [83]:
df = pd.read_csv(train_csv)

* The function defined below takes the dataframe of training data as an argument and return two dataframes, one for Training and one for Validation. 
* Need of this function : we are using image data generator with flow_from_dataframe and 10% validation split so if we use same ImageDataGenerator for training and validation data generation it will also augment the validation data, thus we will define seperate dataframes for training and validation data generation.

In [85]:
image_ids = df["id_code"].values.tolist()
labels = df["diagnosis"].values.tolist()

for i in range(len(image_ids)):
    imgname = image_ids[i]
    newname = str(imgname) + ".png"
    image_ids[i] = newname

for i in range(len(labels)):
    new_name = labels[i]
    newname = str(new_name)
    labels[i] = newname    
# x_train, x_val, y_train, y_val = train_test_split(image_ids, labels, test_size = 0.25)


In [86]:
df = pd.DataFrame({
    'filename': image_ids,
    'label': labels
})

In [87]:
# x_train, x_val, y_train, y_val = train_test_split(image_ids, labels, test_size = 0.25)
train_df, validation_df = train_test_split(df, test_size=0.3, random_state = 42)

In [88]:
print(train_df)
train_df.shape

              filename label
462   207a580de0ea.png     2
3204  df3adfd6ba36.png     2
328   1891698febce.png     0
1397  61c667663f2f.png     0
2069  913b1890ed1e.png     2
...                ...   ...
1130  4f6abc40c72d.png     0
1294  5a091e8cd95c.png     1
860   3dfc50108072.png     2
3507  f47a2a4a0411.png     1
3174  dcc6c0ad5cad.png     0

[2563 rows x 2 columns]


(2563, 2)

In [89]:
batch_size = 64
train_num = len(train_df)
validation_num = len(validation_df)

In [90]:
def two_image_generator(generator, 
                        df, 
                        directory, 
                        batch_size,
                        x_col = 'filename', 
                        y_col = None, 
                        model = None, 
                        shuffle = False,
                        img_size1 = (224, 224), 
                        img_size2 = (299,299)):
    
    gen1 = generator.flow_from_dataframe(
        df,
        directory,
        x_col = x_col,
        y_col = y_col,
        target_size = img_size1,
#         class_mode = model,
        batch_size = batch_size,
        shuffle = shuffle,
        seed = 1)
    
    gen2 = generator.flow_from_dataframe(
        df,
        directory,
        x_col = x_col,
        y_col = y_col,
        target_size = img_size2,
#         class_mode = model,
        batch_size = batch_size,
        shuffle = shuffle,
        seed = 1)
    
    while True:
        X1i = gen1.next()
        X2i = gen2.next()
     
        if y_col:
            yield [X1i[0], X2i[0]], X1i[1]  #X1i[1] is the label
        else:
            yield [X1i, X2i]

In [91]:
#add data_augmentation
train_aug_datagen = ImageDataGenerator(
    rotation_range = 20,
    shear_range = 0.1,
    zoom_range = 0.2,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    horizontal_flip = True
)
train_generator = two_image_generator(train_aug_datagen, 
                                      train_df, 
                                      './input/train/',
                                      batch_size = batch_size, 
                                      y_col = 'label',
                                      model = 'None', 
                                      shuffle = True)

In [92]:
validation_datagen = ImageDataGenerator()

validation_generator = two_image_generator(validation_datagen, 
                                           validation_df,
                                           './input/train/', 
                                           batch_size = batch_size,
                                           y_col = 'label',
                                           model = 'None', 
                                           shuffle = True)

In [93]:
# df_train = pd.DataFrame({"id_code":x_train, "diagnosis":y_train})

# df_val = pd.DataFrame({"id_code":x_val, "diagnosis":y_val})

# df_train["diagnosis"] = df_train["diagnosis"].astype('str')

# df_val["diagnosis"] = df_val["diagnosis"].astype('str')

# print("Length of Training Data :",len(df_train))
# print("Length of Validation Data :",len(df_val))

In [94]:
df_train = df_train.reset_index(drop=True)
df_val = df_val.reset_index(drop=True)

In [95]:
gc.collect()

747

#### CROPPING FUNCTION :

In [96]:
def create_base_model(MODEL, img_size, lambda_fun = None):

    inp = Input(shape = (img_size[0], img_size[1], 3))
    x = inp
    if lambda_fun:
        x = Lambda(lambda_fun)(x)
    
    base_model = MODEL(input_tensor = x, weights = 'imagenet', include_top = False, pooling = 'avg')
        
    model = Model(inp, base_model.output)
    return model

In [97]:
model1 = create_base_model(vgg16.VGG16, (224, 224), vgg16.preprocess_input)
model2 = create_base_model(resnet50.ResNet50, (224, 224), resnet50.preprocess_input)
model3 = create_base_model(inception_v3.InceptionV3, (299, 299), inception_v3.preprocess_input)

model1.trainable = False
model2.trainable = False
model3.trainable = False

inpA = Input(shape = (224, 224, 3))
inpB = Input(shape = (299, 299, 3))

out1 = model1(inpA)
out2 = model2(inpA)
out3 = model3(inpB)

x = Concatenate()([out1, out2, out3]) 
x = Dropout(0.2)(x)
x = Dense(1, activation='sigmoid')(x)

model = Model([inpA, inpB], x)

opt = keras.optimizers.Adam(lr=2e-4)


In [98]:
filepath="pretained-models_original-weights-improvement-{epoch:02d}-{val_accuracy:.4f}.hdf5"

mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)

In [99]:
es = EarlyStopping(monitor='val_accuracy', mode='min', verbose=1,  patience=3 )

In [100]:
model.compile(loss = 'mse', optimizer=opt, metrics = ['accuracy'])

This kernel helped me choose the model parameters, and callbacks - [APTOS Blindness Detection - EDA and Keras ResNet50](https://www.kaggle.com/dimitreoliveira/aptos-blindness-detection-eda-and-keras-resnet50?scriptVersionId=16639594)

You need a generator that yields something of the form **([x1, x2], y)** So you need to write your own generator, for which you can reuse the original ImageDataGenerator for one or more input.

In [101]:
history = model.fit_generator(
    train_generator,
    epochs = 5,
    steps_per_epoch = train_num // batch_size,
    validation_data = validation_generator,
    validation_steps = validation_num // batch_size,
    verbose = 1,
    callbacks = [mc, es])


  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.
Epoch 1/5


ValueError: Error when checking target: expected dense_5 to have shape (1,) but got array with shape (0,)

TEST:

Processing test dataframe.

In [None]:
test_df_orig = pd.read_csv(test_csv)

def process_test_df(test_df):
    test_ids = test_df["id_code"].values.tolist()
    for i in range(len(test_ids)):
        imgname = test_ids[i]
        newname = str(imgname) + ".png"
        test_ids[i] = newname
    test_df["id_code"] = test_ids
    return test_df

test_df = process_test_df(test_df_orig)

Test Data Generator :

In [None]:
test_aug = ImageDataGenerator(rescale = 1./255)

test_generator = test_aug.flow_from_dataframe(dataframe = test_df, 
                                              directory = test_dir,
                                              x_col = "id_code",
                                              batch_size = 1,
                                              target_size = (256,256),
                                              shuffle = False,
                                              class_mode = None)

#### PREDICTION

In [None]:
predprobs = model.predict_generator(test_generator, steps=len(test_generator))

In [None]:
predictions = []
for i in predprobs:
    predictions.append(np.argmax(i)) 

In [None]:
test_df_orig["diagnosis"] = predictions

Submission :

In [None]:
test_df_orig.to_csv('submission.csv',index=False)