## Seedling - ResNet

In [1]:
import numpy as np 
import pandas as pd 
import os
from keras.utils.np_utils import to_categorical
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.applications import *
from keras.applications.resnet50 import preprocess_input
from sklearn.model_selection import StratifiedShuffleSplit

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
sharpTrainImage = np.load(os.path.join(os.getcwd(),'Image224/sharpTrainImage.npy'))
sharpTestImage = np.load(os.path.join(os.getcwd(),'Image224/sharpTestImage.npy'))
trainLabels = np.load(os.path.join(os.getcwd(),'Image224/trainLabels.npy'))

print("Numbers of sharpTrainImage images", len(sharpTrainImage))
print("Numbers of sharpTestImage images", len(sharpTestImage))
print("Numbers of train labels", len(trainLabels))

Numbers of sharpTrainImage images 4750
Numbers of sharpTestImage images 794
Numbers of train labels 4750


In [3]:
sharpTrainImage[0].shape

(224, 224, 3)

In [4]:
# one - hot coding
trainLabels = to_categorical(trainLabels, 12)

In [5]:
trainLabels.shape

(4750, 12)

In [6]:
#x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.01, random_state=42)

sss = StratifiedShuffleSplit(n_splits=1, test_size=0.16, random_state=42) # Want a balanced split for all the classes

for train_index, test_index in sss.split(sharpTrainImage, trainLabels):
    print("Using {} for training and {} for validation".format(len(train_index), len(test_index)))
    x_train, x_valid = sharpTrainImage[train_index], sharpTrainImage[test_index]
    y_train, y_valid = trainLabels[train_index], trainLabels[test_index]

Using 3990 for training and 760 for validation


In [7]:
datagen = ImageDataGenerator(
                            preprocessing_function = preprocess_input,
                            rotation_range=360.,
                            width_shift_range=0.3,
                            height_shift_range=0.3,
                            zoom_range=0.3,
                            horizontal_flip=True, 
                            vertical_flip=True)

In [23]:
epochs = 50
learning_rate = 0.0001
batch_size = 32
dim = 224

weights = os.path.join('', 'weights.h5')

callbacks = [ EarlyStopping(monitor='val_loss', patience=5, verbose=0), 
              ModelCheckpoint(weights, monitor='val_loss', save_best_only=True, verbose=0),
              ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)]

base_model = ResNet50(input_shape=(dim, dim, 3), include_top=False, weights='imagenet', pooling='avg') 
# Average pooling reduces output dimensions
x = base_model.output
x = Dense(dim, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(12, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

In [24]:
model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=learning_rate), metrics=['accuracy'])

In [25]:
import datetime
local_start = datetime.datetime.now()
# ------ TRAINING ------
model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                    steps_per_epoch=len(x_train)/batch_size, 
                    validation_data=datagen.flow(x_valid, y_valid, batch_size=batch_size), 
                    validation_steps=len(x_valid)/batch_size,
                    callbacks=callbacks,
                    epochs=epochs,
                    verbose=1)

print('running time: ', datetime.datetime.now()-local_start) 

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
running time:  0:41:50.202134


In [26]:
model.save('model_resnet50.h5')

In [27]:
def label_n2s(x):
    return {
        0: 'Black-grass',
        1: 'Charlock',
        2: 'Cleavers',
        3: 'Common Chickweed',
        4: 'Common wheat', 
        5: 'Fat Hen',
        6: 'Loose Silky-bent',
        7: 'Maize',
        8: 'Scentless Mayweed',
        9: 'Shepherds Purse',
        10: 'Small-flowered Cranesbill',
        11: 'Sugar beet'
    }.get(x, 'None')

In [28]:
predLabel = []
for i in range(len(sharpTestImage)):
    images = datagen.flow(np.expand_dims(sharpTestImage[i],axis=0))
    pred = np.zeros((1,12))
    for j,img in enumerate(images):
        pred += model.predict(img)
        if j > 100:
            break
    label = np.argmax(pred)
    predLabel.append(label_n2s(label))
    if (i/100==0):
        print("Peocess:",i)

Peocess: 0


In [30]:
len(predLabel)

794

In [31]:
df_test = pd.read_csv('sample_submission.csv')
testName = np.load(os.path.join(os.getcwd(),'Image/testName.npy'))
df_test['file'] = testName
df_test['species'] = predLabel
df_test.to_csv('submission_resnet50.csv', index=False)