In [0]:
import pandas as pd
import numpy as np
import matplotlib

import tensorflow as tf
from zipfile import ZipFile
import os

from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.backend import clear_session
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers import BatchNormalization
from keras import backend as K

import random
import sys
import cv2
from keras.utils import to_categorical

In [0]:

from glob import glob 
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
import math


In [10]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
os.chdir('/content/gdrive/My Drive/Colab Notebooks/project2')

In [13]:
os.getcwd()

'/content/gdrive/My Drive/Colab Notebooks/project2'

In [14]:
print(os.listdir("../project2"))

['test', 'sample_submission.csv', 'ComputerVisionWithCNN_R7_Project1.ipynb', 'Untitled0.ipynb', 'train']


In [0]:

train_path = '../project2/train/*/*.png' 
test_path = '../project2/test/*.png' 
train_files = glob(train_path)
test_files = glob(test_path)

In [0]:
train_images = []
train_label = []
for img in train_files:
    train_images.append(cv2.resize(cv2.imread(img), (80,80)))
    train_label.append(img.split('/')[-2])

train_images = np.asarray(train_images)
train_label = pd.DataFrame(train_label)

#print("The type of train images is:",type(train_images.shape))
#print("The shape of train images is:",train_images.shape)
#print("The type of train labels is:",type(train_label))
#print(("The shape of train labels is:",train_label.shape))

test_images = []
test_label = []
for img1 in test_files:
    test_images.append(cv2.resize(cv2.imread(img1), (80,80)))
    test_label.append(img.split('/')[-2])
test_images = np.asarray(test_images)
test_label = pd.DataFrame(test_label)

#print("The type of test images is:",type(test_images.shape))
#print("The shape of test images is:",test_images.shape)
#print("The type of test labels is:",type(test_label))
#print(("The shape of test labels is:",test_label.shape))

In [0]:
train_images = train_images/255
test_images = test_images/255

In [19]:

from keras.utils import np_utils
from sklearn import preprocessing

label_encoder = preprocessing.LabelEncoder()
label_encoder.fit(train_label[0])
print("The different species of plants are: " + str(label_encoder.classes_))
encoded_train_labels = label_encoder.transform(train_label[0])

#Convert it to categorical encoders of 0s and 1s
encoded_train_labels = np_utils.to_categorical(encoded_train_labels)
num_clases = encoded_train_labels.shape[1]
print("The total plant categories are: " + str(num_clases))

The different species of plants are: ['Black-grass' 'Charlock' 'Cleavers' 'Common Chickweed' 'Common wheat'
 'Fat Hen' 'Loose Silky-bent' 'Maize' 'Scentless Mayweed'
 'Shepherds Purse' 'Small-flowered Cranesbill' 'Sugar beet']
The total plant categories are: 12


In [0]:
#Creating the training and validation splits:
from sklearn.model_selection import train_test_split
seed = 100
trainX, validationX, trainY, validationY = train_test_split(train_images, encoded_train_labels, 
                                                test_size=0.08, random_state=seed, 
                                                stratify = encoded_train_labels)

In [21]:
print(trainX.shape)
print(trainY.shape)
print(validationX.shape)
print(validationY.shape)

(1452, 80, 80, 3)
(1452, 12)
(127, 80, 80, 3)
(127, 12)


**Build the sequential model**

In [30]:
# Clear out tensorflow memory
clear_session()

# Define Model
model = Sequential()
model.add(BatchNormalization(input_shape = (80, 80, 3)))

# 1st Conv Layer
model.add(Conv2D(32, (3,3), activation='relu', input_shape=(80, 80, 3), padding="same"))
#kernel_initializer = 'he_normal'

# Max Pooling layer
model.add(MaxPooling2D(pool_size=2))

# Dropout
model.add(Dropout(rate = 0.2))

# 2nd Conv Layer
model.add(Conv2D(filters=64, kernel_size=5, kernel_initializer = 'he_normal', padding="same"))
model.add(Activation("relu"))

# Max Pooling layer
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

# Dropout
model.add(Dropout(rate = 0.2))

# Flattening the data
model.add(Flatten())

# 1st dense layer
model.add(Dense(128, kernel_initializer = 'he_normal'))
model.add(Activation("relu"))

# Dropout
model.add(Dropout(rate = 0.3))

# 2nd dense layer
model.add(Dense(64, kernel_initializer = 'he_normal'))
model.add(Activation("relu"))

# Output layer
model.add(Dense(output_dim=12, activation = 'softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_1 (Batch (None, 80, 80, 3)         12        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 80, 80, 32)        896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 40, 40, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 40, 40, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 40, 40, 64)        51264     
_________________________________________________________________
activation_1 (Activation)    (None, 40, 40, 64)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 20, 20, 64)       



In [31]:
# Loss and Optimizer
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Training the model
model.fit(trainX, trainY, batch_size=60, epochs=10, validation_data=(validationX, validationY))

Train on 1452 samples, validate on 127 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f8b5091cda0>

**Optimize the Model**

In [26]:
from keras import backend as K
clear_session()
#K.set_image_dim_ordering('tf')

model = Sequential()

model.add(Conv2D(filters=64, kernel_size=(3,3), input_shape=(80, 80, 3), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.12))

model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=128, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.12))

model.add(Conv2D(filters=256, kernel_size=(3,3), activation='relu'))
model.add(BatchNormalization(axis=3))
model.add(Conv2D(filters=256, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization(axis=3))
model.add(Dropout(0.12))

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.25))

model.add(Dense(num_clases, activation='softmax'))

model.summary()

# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 78, 78, 64)        1792      
_________________________________________________________________
batch_normalization_1 (Batch (None, 78, 78, 64)        256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 76, 76, 64)        36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 38, 38, 64)        0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 38, 38, 64)        256       
_________________________________________________________________
dropout_1 (Dropout)          (None, 38, 38, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 36, 36, 128)      

In [27]:

#Fit the model on the above architecture. 
plant_model = model.fit(trainX, trainY, epochs=15, batch_size=128, validation_data = (validationX, validationY))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 1452 samples, validate on 127 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [28]:
loss_and_metrics = model.evaluate(validationX, validationY)
print(loss_and_metrics)


[0.9576881274463623, 0.7637795289670388]


In [29]:
loss_and_metrics = model.evaluate(trainX, trainY)
print(loss_and_metrics)

[0.21231539067158028, 0.9221763083757448]
