# Plant Seedlings Classification

https://www.kaggle.com/c/plant-seedlings-classification/data

Determine the species of a seedling from an image


|Checkpoint File | Training Accuracy | Validation Accuracy  | Kaggle Score | 
| :--- | :--- | :--- | :--- |
| ./checkpoints/checkpoint_m1_1 | 0.9684 |  0.9556 | 0.95214 | 

## 1. Load Data

In [1]:
import tensorflow as tf
from tensorflow import keras
import pathlib
import numpy as np
import matplotlib.pyplot as plt
import os
import PIL.Image as Image
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [2]:
tf.keras.backend.clear_session()

In [3]:
data_dir = pathlib.Path('/notebooks/storage/kaggle-solutions/Kaggle-Solutions/data/plant-seedlings-classification')
image_count = len(list(data_dir.glob('train/*/*.png')))

In [4]:
BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224
STEPS_PER_EPOCH = np.ceil(image_count/BATCH_SIZE)

In [22]:
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        horizontal_flip=True,
        vertical_flip=True,
        rotation_range=360,
        validation_split=0.2)
train_dir = '/notebooks/storage/kaggle-solutions/Kaggle-Solutions/data/plant-seedlings-classification/train'
classes = ['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed',
       'Common wheat', 'Fat Hen', 'Loose Silky-bent', 'Maize',
       'Scentless Mayweed', 'Shepherds Purse',
       'Small-flowered Cranesbill', 'Sugar beet']

train_data_gen = image_generator.flow_from_directory(directory=train_dir,
                                                     batch_size=BATCH_SIZE,
                                                     shuffle=True,
                                                     target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     classes = classes,
                                                     subset="training")

valid_data_gen = image_generator.flow_from_directory(directory=train_dir,
                                                     batch_size=BATCH_SIZE,
                                                     shuffle=True,
                                                     target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     classes = classes,
                                                     subset="validation")

Found 4750 images belonging to 12 classes.
Found 0 images belonging to 12 classes.


## 2. Create Model

### 2.1 Convolutional Neural Net

In [6]:
model_v1 = keras.Sequential()

model_v1.add(keras.layers.Conv2D(64, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model_v1.add(keras.layers.MaxPooling2D((2, 2)))
model_v1.add(keras.layers.BatchNormalization())
model_v1.add(keras.layers.Dropout(0.1))

model_v1.add(keras.layers.Conv2D(128, (3, 3), activation='relu'))
model_v1.add(keras.layers.MaxPooling2D((2, 2)))
model_v1.add(keras.layers.BatchNormalization())
model_v1.add(keras.layers.Dropout(0.1))

model_v1.add(keras.layers.Conv2D(256, (3, 3), activation='relu'))
model_v1.add(keras.layers.MaxPooling2D((2, 2)))
model_v1.add(keras.layers.BatchNormalization())
model_v1.add(keras.layers.Dropout(0.1))

model_v1.add(keras.layers.Conv2D(512, (3, 3), activation='relu'))
model_v1.add(keras.layers.MaxPooling2D((2, 2)))
model_v1.add(keras.layers.BatchNormalization())
model_v1.add(keras.layers.Dropout(0.1))

model_v1.add(keras.layers.Flatten())

model_v1.add(keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.0001)))
model_v1.add(keras.layers.Dropout(0.1))

model_v1.add(keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.0001)))
model_v1.add(keras.layers.Dropout(0.1))

model_v1.add(keras.layers.Dense(12, activation='softmax'))

model_v1.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 222, 222, 64)      1792      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 111, 111, 64)      0         
_________________________________________________________________
batch_normalization (BatchNo (None, 111, 111, 64)      256       
_________________________________________________________________
dropout (Dropout)            (None, 111, 111, 64)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 109, 109, 128)     73856     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 54, 54, 128)       0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 54, 54, 128)       5

## 3. Train Model

In [7]:
checkpoint_path_m1_1 = './checkpoints/checkpoint_m1_1'
checkpoint_path_m1_2 = './checkpoints/checkpoint_m1_2'

In [8]:
model_v1.load_weights(checkpoint_path_m1_1)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f28849b1d68>

In [14]:
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=1, 
    save_weights_only=True,
    save_freq=5)

In [28]:
lr_reduce = keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.1, min_delta=1e-5, patience=3, verbose=1)

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3, min_delta=1e-5)

In [8]:
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(
  0.001,
  decay_steps=STEPS_PER_EPOCH*1000,
  decay_rate=1,
  staircase=False)

def get_optimizer():
  return tf.keras.optimizers.Adam(lr_schedule)

In [24]:
optimizer_early_training = tf.keras.optimizers.Adam(0.0001)
optimizer_final_training = tf.keras.optimizers.SGD(0.000001, momentum=0.9)

In [25]:
model_v1.compile(optimizer=optimizer_final_training,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [29]:
model_v1_history = model_v1.fit(train_data_gen, epochs=30, steps_per_epoch=None, validation_data=valid_data_gen, callbacks=[lr_reduce])

Train for 149 steps
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 00011: ReduceLROnPlateau reducing learning rate to 9.999999974752428e-08.
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 00014: ReduceLROnPlateau reducing learning rate to 1.0000000116860975e-08.
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 00017: ReduceLROnPlateau reducing learning rate to 9.999999939225292e-10.
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 00023: ReduceLROnPlateau reducing learning rate to 9.999999717180686e-11.
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 00026: ReduceLROnPlateau reducing learning rate to 9.99999943962493e-12.
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 00029: ReduceLROnPlateau reducing learning rate to 9.999999092680235e-13.
Epoch 30/30


In [36]:
model_v1.save_weights(checkpoint_path_m1_1)

## 4. Evaluate on Test Data

### 4.1 Predicting with Convolutional Neural Net 2.1

In [30]:
test_dir = '/notebooks/storage/kaggle-solutions/Kaggle-Solutions/data/plant-seedlings-classification/'
image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
test_data_gen = image_generator.flow_from_directory(directory=test_dir,
                                                     batch_size=BATCH_SIZE,
                                                     shuffle=False,
                                                     target_size=(IMG_HEIGHT, IMG_WIDTH),
                                                     classes = ['test'])

Found 794 images belonging to 1 classes.


In [31]:
pred = model_v1.predict(
    test_data_gen, batch_size=None, verbose=0, steps=None, callbacks=None, max_queue_size=10,
    workers=1, use_multiprocessing=False,
)

In [32]:
y = np.argmax(pred, axis=1)

In [33]:
predicted_classes = [classes[i] for i in y]
files = [path[5:] for path in test_data_gen.filenames]

In [34]:
subm = np.stack([files,predicted_classes], axis=1)
np.savetxt('submission.csv', subm, fmt='%s,%s', header='file,species', comments='')

## 5. Analysis

### 5.1 Analysis of Convolutional Neural Net 2.1

In [None]:
epochs = range(0, len(model_v1_history.history['loss']))
training_loss = model_v1_history.history['loss']
validation_loss = model_v1_history.history['val_loss']

# training loss 
plt.plot(epochs, training_loss)
plt.xlabel("Number of epoch")
plt.ylabel("Training Loss")
plt.title("Training Loss")
plt.show()

# validation loss
plt.plot(epochs , validation_loss)
plt.xlabel("Number of epoch")
plt.ylabel("Validation Loss")
plt.title("Validation Loss")
plt.show()