In [1]:
import pandas as pd
import numpy as np
import os

import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from keras.applications import VGG19
import keras
from keras.models import Model
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten

Using TensorFlow backend.


## Data Loading

In [2]:
train_dataset = "plant-seedlings-classification/train"
test_dataset = "plant-seedlings-classification/test"

image_size = 128
classes = 12

WIDTH = 128
HEIGHT = 128
DEPTH = 3
INPUT_SHAPE = (WIDTH, HEIGHT, DEPTH)

In [3]:
#Build train generator

image_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    validation_split=0
)

train_generator = image_datagenerator.flow_from_directory(train_dataset,
                                                    target_size=(image_size, image_size),
                                                    subset='training',
                                                    class_mode='categorical',
                                                    shuffle=True,
                                                    batch_size = 4750,
                                                    seed= 9)
X,Y = train_generator.next()

Found 4750 images belonging to 12 classes.


In [4]:
# Create test data set
class_mapping = { train_generator.class_indices[k]:k for k in train_generator.class_indices}

def create_test_dataset(test_images_dir, image_size, rescale = 1):
    filenames = []
    test_data = []
    
    for img_file in os.listdir(test_images_dir):
        filenames.append(img_file)

        img = tf.keras.preprocessing.image.load_img(test_images_dir + '/' + img_file)
        img_new = img.resize([image_size, image_size])
        img_array = tf.keras.preprocessing.image.img_to_array(img_new)
        if rescale is not 1:
            img_array = img_array * 1./255
        test_data.append(img_array)
    
    test_data = np.array(test_data,dtype=np.float32)
    
    return test_data, filenames 

test_data, filenames  = create_test_dataset(test_dataset,image_size, rescale = 1./255)

In [5]:
class_mapping

{0: 'Black-grass',
 1: 'Charlock',
 2: 'Cleavers',
 3: 'Common Chickweed',
 4: 'Common wheat',
 5: 'Fat Hen',
 6: 'Loose Silky-bent',
 7: 'Maize',
 8: 'Scentless Mayweed',
 9: 'Shepherds Purse',
 10: 'Small-flowered Cranesbill',
 11: 'Sugar beet'}

In [6]:
#Prepare submisiion file
def prepare_submission(submission_file, model):
    predicted_class = []
    for img in test_data:
        img_instance = np.expand_dims(img, axis=0)
        img_instance_class = model.predict(img_instance)
        img_instance_class = np.argmax(img_instance_class,axis=1)
        img_instance_class = class_mapping.get(img_instance_class[0], np.nan)
        predicted_class.append(img_instance_class)
        pass

    results = pd.DataFrame({"file":filenames,"species":predicted_class})
    results.to_csv(submission_file, index = False)

In [7]:
#Cross validation
num_folds = 5

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=42)

kfold = KFold(n_splits=num_folds, random_state=1, shuffle=True)

# Merge inputs and targets
inputs = np.concatenate((x_train, x_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)

In [8]:
#Train the model
def train_model(no_epochs,batch_size, model):
    
    # Define per-fold score containers
    acc_score_per_fold = []

    # K-fold Cross Validation model evaluation
    fold_no = 1
    for train, test in kfold.split(inputs, targets):

        # Fit data to model
        history = model.fit(inputs[train], targets[train],
                      batch_size=batch_size,
                      epochs=no_epochs,
                      verbose=2)

        # Generate generalization metrics
        loss, accuracy = model.evaluate(inputs[test], targets[test], verbose=0)
        acc_score_per_fold.append(accuracy)

        print(f'> Fold {fold_no} Accuracy score: {accuracy}')
        # Increase fold number
        fold_no = fold_no + 1

    # == Provide average scores ==
    print('Average accuracy score for all folds:')
    print(f'> Accuracy score: {np.mean(acc_score_per_fold)} (+- {np.std(acc_score_per_fold)})')
    print('------------------------------------------------------------------------')

In [9]:
vgg_model = VGG19(weights='imagenet', input_shape=INPUT_SHAPE, include_top=False)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.



In [10]:
# VGG19 Model summary
vgg_model.summary()

Model: "vgg19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 128, 128, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 32, 32, 128)       0     

In [11]:
def setTrainableLayersVGG(vgg_model):
    for layer in vgg_model.layers:
        set_trainable = False
        if layer.name in ['block5_conv1', 'block5_conv2','block5_conv3', 'block5_conv4','block5_pool']: # add layer name which are needed to retrain
            set_trainable = True
            
        if set_trainable:
            layer.trainable = True
        else:
            layer.trainable = False
    return vgg_model

In [12]:
def create_transfer_model(fineTune,number_of_hidden_layers, optimizer, activation ):
    #Clear any previous model from memory
    tf.keras.backend.clear_session()
    
    tranfer_model = VGG19(weights='imagenet', input_shape=INPUT_SHAPE, include_top=False)

    if fineTune == True: # Configured layers will be opened to modify
        tranfer_model = setTrainableLayersVGG(tranfer_model) 
    else: # All layers will be frozen
        for layer in tranfer_model.layers:
            layer.trainable = False 


    output = tranfer_model.layers[-1].output
    output = keras.layers.Flatten()(output)
    trans_model = Model(tranfer_model.input, output)

    model = Sequential()
    model.add(trans_model)

    for i in range(0,number_of_hidden_layers):
            model.add(Dense(512))
            model.add(Activation(activation))
            model.add(Dropout(0.3))

    model.add(Dense(12, activation='softmax'))
    
    # Model summary
    model.summary()
    
    #Specify Loass and Optimizer
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

## Create models with different configurations

In [20]:
# V4
model = create_transfer_model(fineTune=False,number_of_hidden_layers=0,optimizer='adam',activation='relu' )

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model_1 (Model)              (None, 8192)              20024384  
_________________________________________________________________
dense_1 (Dense)              (None, 12)                98316     
Total params: 20,122,700
Trainable params: 98,316
Non-trainable params: 20,024,384
_________________________________________________________________


In [16]:
# V5
model = create_transfer_model(fineTune=True,number_of_hidden_layers=0,optimizer='adam',activation='relu' )

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model_1 (Model)              (None, 8192)              20024384  
_________________________________________________________________
dense_1 (Dense)              (None, 12)                98316     
Total params: 20,122,700
Trainable params: 9,537,548
Non-trainable params: 10,585,152
_________________________________________________________________


In [17]:
# V6
model = create_transfer_model(fineTune=True,number_of_hidden_layers=1,optimizer='adam',activation='relu' )

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model_1 (Model)              (None, 8192)              20024384  
_________________________________________________________________
dense_1 (Dense)              (None, 512)               4194816   
_________________________________________________________________
activation_1 (Activation)    (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 12)                6156      
Total params: 24,225,356
Trainable params: 13,640,204
Non-trainable params: 10,585,152
_________________________________________________________________


## Train the model

In [21]:
train_model(10, 100, model) # V4


Epoch 1/10
 - 253s - loss: 2.1214 - accuracy: 0.2613
Epoch 2/10
 - 252s - loss: 1.5911 - accuracy: 0.4863
Epoch 3/10
 - 267s - loss: 1.3770 - accuracy: 0.5674
Epoch 4/10
 - 263s - loss: 1.2344 - accuracy: 0.6311
Epoch 5/10
 - 247s - loss: 1.1343 - accuracy: 0.6653
Epoch 6/10
 - 246s - loss: 1.0471 - accuracy: 0.6955
Epoch 7/10
 - 246s - loss: 0.9727 - accuracy: 0.7276
Epoch 8/10
 - 246s - loss: 0.9335 - accuracy: 0.7287
Epoch 9/10
 - 258s - loss: 0.8712 - accuracy: 0.7603
Epoch 10/10
 - 259s - loss: 0.8208 - accuracy: 0.7737
> Fold 1 Accuracy score: 0.5736842155456543
Epoch 1/10
 - 256s - loss: 0.9020 - accuracy: 0.7400
Epoch 2/10
 - 256s - loss: 0.8349 - accuracy: 0.7663
Epoch 3/10
 - 249s - loss: 0.7940 - accuracy: 0.7800
Epoch 4/10
 - 258s - loss: 0.7357 - accuracy: 0.8039
Epoch 5/10
 - 270s - loss: 0.7094 - accuracy: 0.8118
Epoch 6/10
 - 252s - loss: 0.6809 - accuracy: 0.8192
Epoch 7/10
 - 250s - loss: 0.6500 - accuracy: 0.8311
Epoch 8/10
 - 251s - loss: 0.6388 - accuracy: 0.8311


In [14]:
train_model(10, 100, model) # V5


Epoch 1/10
 - 298s - loss: 2.5094 - accuracy: 0.1250
Epoch 2/10
 - 302s - loss: 2.3932 - accuracy: 0.1405
Epoch 3/10
 - 303s - loss: 2.3491 - accuracy: 0.1718
Epoch 4/10
 - 304s - loss: 2.2668 - accuracy: 0.1926
Epoch 5/10
 - 304s - loss: 1.9568 - accuracy: 0.2942
Epoch 6/10
 - 304s - loss: 1.6319 - accuracy: 0.3895
Epoch 7/10
 - 304s - loss: 1.3973 - accuracy: 0.4853
Epoch 8/10
 - 337s - loss: 1.1559 - accuracy: 0.5858
Epoch 9/10
 - 333s - loss: 0.9559 - accuracy: 0.6563
Epoch 10/10
 - 313s - loss: 0.7194 - accuracy: 0.7474
> Fold 1 Accuracy score: 0.551578938961029
Epoch 1/10
 - 306s - loss: 0.7981 - accuracy: 0.7287
Epoch 2/10
 - 305s - loss: 0.5543 - accuracy: 0.8097
Epoch 3/10
 - 305s - loss: 0.4055 - accuracy: 0.8618
Epoch 4/10
 - 305s - loss: 0.3144 - accuracy: 0.8900
Epoch 5/10
 - 305s - loss: 0.2618 - accuracy: 0.9100
Epoch 6/10
 - 305s - loss: 0.1585 - accuracy: 0.9487
Epoch 7/10
 - 305s - loss: 0.1343 - accuracy: 0.9534
Epoch 8/10
 - 306s - loss: 0.1554 - accuracy: 0.9466
E

In [18]:
train_model(10, 100, model) # V6

Epoch 1/10
 - 309s - loss: 2.6976 - accuracy: 0.1155
Epoch 2/10
 - 305s - loss: 2.4973 - accuracy: 0.1205
Epoch 3/10
 - 305s - loss: 2.4793 - accuracy: 0.1324
Epoch 4/10
 - 306s - loss: 2.4242 - accuracy: 0.1429
Epoch 5/10
 - 307s - loss: 2.4150 - accuracy: 0.1429
Epoch 6/10
 - 307s - loss: 2.4137 - accuracy: 0.1426
Epoch 7/10
 - 306s - loss: 2.4121 - accuracy: 0.1421
Epoch 8/10
 - 306s - loss: 2.4139 - accuracy: 0.1429
Epoch 9/10
 - 306s - loss: 2.4138 - accuracy: 0.1426
Epoch 10/10
 - 306s - loss: 2.4133 - accuracy: 0.1416
> Fold 1 Accuracy score: 0.1168421059846878
Epoch 1/10
 - 306s - loss: 2.4149 - accuracy: 0.1342
Epoch 2/10
 - 306s - loss: 2.4163 - accuracy: 0.1342
Epoch 3/10
 - 306s - loss: 2.4156 - accuracy: 0.1363
Epoch 4/10
 - 306s - loss: 2.4145 - accuracy: 0.1295
Epoch 5/10
 - 306s - loss: 2.4151 - accuracy: 0.1361
Epoch 6/10
 - 307s - loss: 2.4155 - accuracy: 0.1321
Epoch 7/10
 - 307s - loss: 2.4151 - accuracy: 0.1305
Epoch 8/10
 - 307s - loss: 2.4145 - accuracy: 0.1324
E

## Prepare the submission

In [19]:
prepare_submission("submissions/tl_submission_v3.csv", model)

In [None]:
# V4 - Accuracy score: 0.7886315822601319 (+- 0.11797064998216382)
# V5 - Accuracy score: 0.8469473600387574 (+- 0.16445676867893905)
# V6 - Accuracy score: 0.13768420815467836 (+- 0.012830017812346071)