In [0]:
from google.colab import drive

In [2]:
drive.mount('/content/drive/') # Mounting the Drive onto colab

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/


In [0]:
import os
os.chdir('/content/drive/My Drive/')

In [0]:
os.chdir('/content/drive/My Drive/train')

In [5]:
os.listdir() # Eyeballing the class labels.

['Maize',
 'Common Chickweed',
 'Small-flowered Cranesbill',
 'Fat Hen',
 'Sugar beet',
 'Shepherds Purse',
 'Cleavers',
 'Common wheat',
 'Loose Silky-bent',
 'Scentless Mayweed',
 'Black-grass',
 'Charlock']

In [7]:
# Extracting training data from the directories and storing the features and thier class labels.
# I have trained multiple models in  this notebook and it should be noted that I have changed the dimensions of the images for different models. 
# The model which I've built has inputs the shape of (128X128X3), while the ResNets50 and VGG19 (from the keras API) uses (224X224X3) as input shape.
# The data has class imbalance, as I got to know from the dataset information. 
# The total number of images from the training data is also less.
# Also, the project questions asks us to evaluate accuraices for the training and validation data. Here I have NOT evaluated on the testset.



x_train = []
y_train = []
import cv2
for i in os.listdir():
    print(i)
    if (os.path.isdir(i)):
            for j in os.listdir(i):
                try:
                    dummy = cv2.imread('/content/drive/My Drive/train/' + i + "/" + j)
                    dummy = cv2.resize(dummy,(224,224))
                    x_train.append(dummy)
                    y_train.append(i)
                except Exception as e:
                    print(e)


Maize
Common Chickweed
Small-flowered Cranesbill
Fat Hen
Sugar beet
Shepherds Purse
Cleavers
Common wheat
Loose Silky-bent
Scentless Mayweed
Black-grass
Charlock


In [8]:
x_train[0].shape

(224, 224, 3)

In [0]:
import pandas as pd
dum = pd.get_dummies(y_train) # converting the class labels to categorical variables

In [0]:
encoded_labels = dum
y_train = dum

In [0]:
import numpy as np
y_train = np.array(y_train)

In [0]:
x_train = np.array(x_train)

In [13]:
x_train[0].shape

(224, 224, 3)

In [14]:
from sklearn.model_selection import train_test_split
x_train2, x_val, y_train2, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=2) # Splitting data into training and validation sets.
print (len(x_train2))
print (len(x_val))

3808
952


In [15]:
x_train2[0].shape

(224, 224, 3)

In [0]:
x_train2 = x_train2.reshape(x_train2.shape[0],224,224,3) # Reshaping dataset so that it can be fed to the model.

In [0]:
x_val = x_val.reshape(x_val.shape[0],224,224,3)

In [0]:
x_train2 = x_train2/255. # Normalizing the features.

x_val = x_val/255.

In [20]:
print (x_train2.shape)
print (x_val.shape)

(3808, 224, 224, 3)
(952, 224, 224, 3)


In [21]:
print(y_train2.shape)
print(y_val.shape)

(3808, 12)
(952, 12)


In [22]:
# I've employed Data Augmentation to make the training more robust.

from keras.preprocessing.image import ImageDataGenerator
datagen=ImageDataGenerator(featurewise_center=True,featurewise_std_normalization=True,zca_whitening=False,zoom_range=0.2,
    rotation_range=50,  
    width_shift_range=0.1,  
    height_shift_range=0.1,  
    horizontal_flip=True, 
    vertical_flip=True)

datagen.fit(x_train2)
datagen.fit(x_val)

Using TensorFlow backend.


In [0]:
import tensorflow as tf


In [0]:
tf.keras.backend.clear_session()

In [0]:
# Initializing the seqential model -
model = tf.keras.models.Sequential()


# Adding three convolutional layers with increasing depth , ReLU activation function applied to all Conv layers.
model.add(tf.keras.layers.Conv2D(32,kernel_size=(3,3),activation='relu',input_shape=(128,128,3),name='conv_1'))
model.add(tf.keras.layers.Conv2D(64,kernel_size=(3,3),activation='relu',name='conv_2'))

# I've commented out the dropout layer, it was not added to the model.
#model.add(tf.keras.layers.Dropout(0.3,name='drop_1'))

# Adding a Pooling layer to reduce inputs to relevant dimensions-
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2),name='max_1'))


# Adding more depth to the CNN model 
model.add(tf.keras.layers.Conv2D(128,kernel_size=(3,3),activation='relu',name='conv_5'))
model.add(tf.keras.layers.Conv2D(128,kernel_size=(3,3),activation='relu',name='conv_6'))


# Adding another pooling layer-
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2),name='max_3'))


# Adding more convolutional layers of increasing depth -
model.add(tf.keras.layers.Conv2D(256,kernel_size=(3,3),activation='relu',name='conv_7'))
model.add(tf.keras.layers.Conv2D(256,kernel_size=(3,3),activation='relu',name='conv_8'))

# Adding the third pooling layer -
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2),name='max_4'))

# Adding the final convolutional layer -
model.add(tf.keras.layers.Conv2D(512,kernel_size=(3,3),activation='relu',name='conv_9'))

# Adding the final pooling layer - 
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2),name='max_5'))


# Flattening the Convolved/Pooled layers to 1D-
model.add(tf.keras.layers.Flatten())

# Adding Fully Connected layers with relU activation functions -
model.add(tf.keras.layers.Dense(150,activation='relu',name='dense_0'))

# Adding a BatchNormlayer
model.add(tf.keras.layers.BatchNormalization())

# Dropout not added to the model as done for the convolutional layers above.
#model.add(tf.keras.layers.Dropout(0.4,name='drop_2'))

# Adding the next FC layers-
model.add(tf.keras.layers.Dense(100,activation='relu',name='dense_1'))
model.add(tf.keras.layers.Dense(50,activation='relu',name='dense_2'))
model.add(tf.keras.layers.Dense(20,activation='relu',name='dense_3'))


# Adding the output layer with number of classes as output and the activation function as softmax-
model.add(tf.keras.layers.Dense(12,activation='softmax',name='dense_4'))

In [0]:
# Compiling the model with adam optimizer optimizing the categorical crossentropy loss function.

model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [0]:
# Training and validating the model -
model.fit(x_train2, y_train2, batch_size=128,epochs=10,validation_data=(x_val,y_val))

Train on 3808 samples, validate on 952 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f84403389e8>

In [0]:
# OBSERVATIONS AND MODEL TUNING -


# It has to be noted that first, the dataset is very small especially for the multivariate classes. The class labels are also imbalanced.
# I got very poor accurices with running the data as it was, therefore I employed data augmentation and noticed Immediate improvement.
# I also fitted the whole data, not just in batches as the data was sparce, this also helped imprive accuracies. The accuracies were still low (around 25%).
# I built a model mimicking VGG16, but the model perfomed even more poorly ( due to vanishing gradient). We have to keep in mind that the VGG researchers added untrained layers 
# inbetween trained layers (mimicking the yet to be develped skip connections of the Resnet). As I did not do this, my model performed poorly.
# Thus I started to progressively stack more convolution layers based upon my increasing accuracy rate with each cycle. ( I had about 50% accuracy at this stage)
# What really aided my inrease in accuracy to about 85% as seen above, was to either add Dropout layers or BatchNorm, not both together.
# Out of both, though BatchNorm overfitted more, this acctucally helped to increase the validation accuracy.
# The main problem overall was that the model wasnt able to converge fast even with ADAM. The loss occillated quite a bit with every runtime.
# Due to time constraints, I am unable to babysit the model further, but in my opinion getting an accuracy above 95 % would be tough with the above architecture.
# Thus, one can appreciate the need for a large dataset with balanced classes and the leap forward that was achived with ResNets 

In [0]:
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

In [0]:
# The models below are beyond the purview of the project and I am running and tuning the models just to see how I can play with them. 
# At this stage I am still continuing to tune the models. 
# Interestingly the Resnet model perfomed more poorly when I used the pruned model, freezing the first five layers and adding the top layers when compared 
# to the whole model [include_top=True]. It seems that the weights learned during for the imagent challenge is quite valuable.

In [0]:
import keras

In [0]:
from keras import applications
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense
from keras import backend as k 
from keras.callbacks import ModelCheckpoint, EarlyStopping


In [62]:
model = applications.ResNet50(weights = None, include_top=False, input_shape = (224, 224, 3))



In [63]:
# Freezing the first 25 layers.
for layer in model.layers[:25]:
    layer.trainable = False

#Adding custom Layers 
x = model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(64, activation="relu")(x)
x = Dense(32, activation="relu")(x)
predictions = Dense(12, activation="softmax")(x)

# creating the final model 
model_final = Model(input = model.input, output = predictions)



# compile the model 
model_final.compile(loss = "categorical_crossentropy", optimizer =  keras.optimizers.SGD(lr=0.001, nesterov=True), metrics=["accuracy"])


  


In [64]:
# Save the model 
checkpoint = ModelCheckpoint("ResNet_best.h5", monitor='val_acc', verbose=1, save_best_only=True, mode='auto')
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=5, verbose=1, mode='auto')

epochs=20
# Train the model 
model_final.fit(x_train2, y_train2, epochs = epochs, validation_data=(x_val, y_val), callbacks = [checkpoint, early])

Train on 3808 samples, validate on 952 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.10924, saving model to ResNet_best.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.10924 to 0.12290, saving model to ResNet_best.h5
Epoch 3/20

Epoch 00003: val_acc improved from 0.12290 to 0.13025, saving model to ResNet_best.h5
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.13025
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.13025
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.13025
Epoch 7/20

Epoch 00007: val_acc did not improve from 0.13025
Epoch 8/20

Epoch 00008: val_acc did not improve from 0.13025
Epoch 00008: early stopping


<keras.callbacks.History at 0x7f9ace67d240>

In [0]:
# As seen by the accuray above, freezing first 25 layers also perfomes poorly.

In [0]:
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

In [0]:
# Applying the VGG19  model ------ 

In [0]:
model = applications.VGG19(weights = "imagenet", include_top=False, input_shape = (224, 224, 3))

In [50]:
# Freeze the layers which you don't want to train. In this we are freezing the first 5 layers.
for layer in model.layers[:5]:
    layer.trainable = False

#Adding custom Layers 
x = model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(64, activation="relu")(x)
x = Dense(32, activation="relu")(x)
predictions = Dense(12, activation="softmax")(x)

# creating the final model 
model_final = Model(input = model.input, output = predictions)

#optim=(tf.keras.optimizers.SGD(learning_rate=0.01, nesterov=True))

# compile the model 
model_final.compile(loss = "categorical_crossentropy", optimizer = keras.optimizers.SGD(lr=0.001, nesterov=True), metrics=["accuracy"])


  


In [52]:
# Save the model 
checkpoint = ModelCheckpoint("vgg16_best.h5", monitor='val_acc', verbose=1, save_best_only=True, mode='auto')
early = EarlyStopping(monitor='val_acc', min_delta=0, patience=5, verbose=1, mode='auto')

epochs=20
# Train the model 
model_final.fit(x_train2, y_train2, epochs = epochs, validation_data=(x_val, y_val), callbacks = [checkpoint, early])

Train on 3808 samples, validate on 952 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.88445, saving model to vgg16_best.h5
Epoch 2/20

Epoch 00002: val_acc did not improve from 0.88445
Epoch 3/20

Epoch 00003: val_acc improved from 0.88445 to 0.88971, saving model to vgg16_best.h5
Epoch 4/20

Epoch 00004: val_acc improved from 0.88971 to 0.89076, saving model to vgg16_best.h5
Epoch 5/20

Epoch 00005: val_acc improved from 0.89076 to 0.89286, saving model to vgg16_best.h5
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.89286
Epoch 7/20

Epoch 00007: val_acc improved from 0.89286 to 0.90021, saving model to vgg16_best.h5
Epoch 8/20

Epoch 00008: val_acc did not improve from 0.90021
Epoch 9/20

Epoch 00009: val_acc improved from 0.90021 to 0.90126, saving model to vgg16_best.h5
Epoch 10/20

Epoch 00010: val_acc did not improve from 0.90126
Epoch 11/20

Epoch 00011: val_acc did not improve from 0.90126
Epoch 12/20

Epoch 00012: val_acc did not improve from 0.90126

<keras.callbacks.History at 0x7f9ad484d128>

In [0]:
# Accuraices much better than my model, (with trained weights and architecture from the ImageNet challenge), proving the usefullness of transfer learning.