## Seedling - ResNet - (Not default size)

In [1]:
import numpy as np 
import pandas as pd 
import os
from keras.utils.np_utils import to_categorical
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from keras.applications import *
from sklearn.model_selection import StratifiedShuffleSplit

Using TensorFlow backend.


In [3]:
sharpTrainImage = np.load(os.path.join(os.getcwd(),'Image/sharpTrainImage.npy'))
sharpTestImage = np.load(os.path.join(os.getcwd(),'Image/sharpTestImage.npy'))
trainLabels = np.load(os.path.join(os.getcwd(),'Image/trainLabels.npy'))

print("Numbers of sharpTrainImage images", len(sharpTrainImage))
print("Numbers of sharpTestImage images", len(sharpTestImage))
print("Numbers of train labels", len(trainLabels))

Numbers of sharpTrainImage images 4750
Numbers of sharpTestImage images 794
Numbers of train labels 4750


In [5]:
sharpTestImage[0].shape

(299, 299, 3)

In [6]:
# one - hot coding
trainLabels = to_categorical(trainLabels, 12)

In [7]:
trainLabels.shape

(4750, 12)

In [8]:
sharpTrainImage = np.load(os.path.join(os.getcwd(),'Image/sharpTrainImage.npy'))

In [9]:
#x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.01, random_state=42)

sss = StratifiedShuffleSplit(n_splits=1, test_size=0.16, random_state=42) # Want a balanced split for all the classes

for train_index, test_index in sss.split(sharpTrainImage, trainLabels):
    print("Using {} for training and {} for validation".format(len(train_index), len(test_index)))
    x_train, x_valid = sharpTrainImage[train_index], sharpTrainImage[test_index]
    y_train, y_valid = trainLabels[train_index], trainLabels[test_index]

Using 3990 for training and 760 for validation


In [10]:
datagen = ImageDataGenerator(
                            rotation_range=360.,
                            width_shift_range=0.3,
                            height_shift_range=0.3,
                            zoom_range=0.3,
                            horizontal_flip=True, 
                            vertical_flip=True)

In [11]:
epochs = 20
learning_rate = 0.0001
batch_size = 32
dim = 299

weights = os.path.join('', 'weights.h5')

callbacks = [ EarlyStopping(monitor='val_loss', patience=5, verbose=0), 
              ModelCheckpoint(weights, monitor='val_loss', save_best_only=True, verbose=0),
              ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)]

base_model = ResNet50(input_shape=(dim, dim, 3), include_top=False, weights='imagenet', pooling='avg') 
# Average pooling reduces output dimensions
x = base_model.output
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(12, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

In [12]:
model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=learning_rate), metrics=['accuracy'])

In [13]:
import datetime
local_start = datetime.datetime.now()
# ------ TRAINING ------
model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                    steps_per_epoch=len(x_train)/batch_size, 
                    validation_data=datagen.flow(x_valid, y_valid, batch_size=batch_size), 
                    validation_steps=len(x_valid)/batch_size,
                    callbacks=callbacks,
                    epochs=epochs,
                    verbose=1)

print('running time: ', datetime.datetime.now()-local_start) 

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
running time:  17:07:48.125721


In [14]:
pred_test = model.predict(sharpTestImage)

In [17]:
def label_n2s(x):
    return {
        0: 'Black-grass',
        1: 'Charlock',
        2: 'Cleavers',
        3: 'Common Chickweed',
        4: 'Common wheat', 
        5: 'Fat Hen',
        6: 'Loose Silky-bent',
        7: 'Maize',
        8: 'Scentless Mayweed',
        9: 'Shepherds Purse',
        10: 'Small-flowered Cranesbill',
        11: 'Sugar beet'
    }.get(x, 'None')

In [18]:
predLabel = []
for i in range(len(pred_test)):
    label = np.argmax(pred_test[i])
    predLabel.append(label_n2s(label))

In [28]:
from glob import glob
path='Dataset/test/*.png'
files = glob(path)

testName = []
j = 1
num = len(files)
for img in files:
    if (j >= num):
        print(str(j)+"/"+str(num), end="\r")
    testName.append(img.split('/')[-1])   #Depends on the path of dataset, MODIFY
    j = j + 1

794/794

In [30]:
df_test['file'] = testName
df_test['species'] = predLabel
df_test.to_csv('submission.csv', index=False)

In [112]:
predLabel = []
for i in range(len(sharpTestImage)):
    images = datagen.flow(np.expand_dims(sharpTestImage[i],axis=0))
    pred = np.zeros((1,12))
    for j,img in enumerate(images):
        pred += model.predict(img)
        if j > 50:
            break
    label = np.argmax(pred)
    predLabel.append(label_n2s(label))

KeyboardInterrupt: 

In [130]:
images = datagen.flow(np.expand_dims(sharpTestImage[794],axis=0))
pred = np.zeros((1,12))
for j,img in enumerate(images):
    pred += model.predict(img)
    if j > 50:
        break
label = np.argmax(pred)

IndexError: index 794 is out of bounds for axis 0 with size 794

In [127]:
label_n2s(label)

'Small-flowered Cranesbill'

In [128]:
predLabel.append(label_n2s(label))

In [129]:
len(predLabel)

794

In [131]:
df_test['species'] = predLabel
df_test.to_csv('submission.csv', index=False)

In [134]:
pred_valid = model.predict(x_valid)

In [138]:
predLabel_valid = []
for i in range(len(pred_valid)):
    label = np.argmax(pred_test[i])
    predLabel_valid.append(label)

In [156]:
validLable = []
for i in range(len(y_valid)):
    validLable.append(np.where(y_valid[i]==1)[0][0] )

In [158]:
from sklearn.metrics import accuracy_score
acc_valid = accuracy_score(validLable,predLabel_valid)

In [159]:
acc_valid

0.11052631578947368

In [142]:
from sklearn.metrics import confusion_matrix
cnf_matrix = confusion_matrix(y_valid, predLabel_valid)

ValueError: Can't handle mix of multilabel-indicator and multiclass

In [None]:
import itertools
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')