# – Image classification using CNNs in Keras


In [None]:
# Importing the necessary packages

import pandas as pd
import numpy as np
import matplotlib

import tensorflow as tf
from zipfile import ZipFile
import os

from keras.models import Sequential
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers import Activation, Flatten, Dense, Dropout, BatchNormalization, LeakyReLU
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.backend import clear_session
from sklearn.metrics import accuracy_score

import random
import sys
import cv2
from keras.utils import to_categorical

In [7]:
# Mounting the google drive

from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
#  Converting labels to classes and assigning numbers

def classes_to_int(label):
    # label = classes.index(directory)
    label = label.strip()
    if label == "Black-grass":  return 0
    if label == "Charlock":  return 1
    if label == "Cleavers":  return 2
    if label == "Common Chickweed":  return 3
    if label == "Common wheat":  return 4
    if label == "Fat Hen":  return 5
    if label == "Loose Silky-bent": return 6
    if label == "Maize":  return 7
    if label == "Scentless Mayweed": return 8
    if label == "Shepherds Purse": return 9
    if label == "Small-flowered Cranesbill": return 10
    if label == "Sugar beet": return 11
    print("Invalid Label", label)
    return 12


#  Converting back to labels from numbers

def int_to_classes(i):
    if i == 0: return "Black-grass"
    elif i == 1: return "Charlock"
    elif i == 2: return "Cleavers"
    elif i == 3: return "Common Chickweed"
    elif i == 4: return "Common wheat"
    elif i == 5: return "Fat Hen"
    elif i == 6: return "Loose Silky-bent"
    elif i == 7: return "Maize"
    elif i == 8: return "Scentless Mayweed"
    elif i == 9: return "Shepherds Purse"
    elif i == 10: return "Small-flowered Cranesbill"
    elif i == 11: return "Sugar beet"
    print("Invalid class ", i)
    return "Invalid Class"

## 1. Read the images and generate the train and test dataset

In [None]:
# Extracted the data of Zip file through the commands:
#with ZipFile('test.zip', 'r') as z:
#  z.extractall()

In [None]:
# Opening train folder
os.chdir('/content/train')

In [12]:
# Listing the contents of the train folder
os.listdir()

['Maize',
 'Shepherds Purse',
 'Fat Hen',
 'Common wheat',
 'Loose Silky-bent',
 'Cleavers',
 'Charlock',
 'Sugar beet',
 'Scentless Mayweed',
 'Black-grass',
 'Common Chickweed',
 'Small-flowered Cranesbill']

#### TRAIN DATA

In [None]:
# Loading all the images, pre-processing them, and storing them in a list of train data

def readTrainData(trainDir):
    data = []
    labels = []
    directories = os.listdir() 
    
    for directory in directories:
        absDirPath = os.path.join(os.path.sep, trainDir, directory)
        images = os.listdir(absDirPath)
        
        for imageFileName in images:
            imageFullPath = os.path.join(trainDir, directory, imageFileName)
            img = load_img(imageFullPath)
            arr = img_to_array(img)  #Converting image to array
            arr = cv2.resize(arr, (128, 128)) #Resizing the array
            data.append(arr)
            label = classes_to_int(directory)
            labels.append(label)
    return data, labels

In [None]:
path = os.getcwd()
X, Y = readTrainData(path)

In [None]:
# Scaling the data
X = np.array(X, dtype="float") / 255.0
Y = np.array(Y)

In [None]:
# Converting the target column to 12 categorical classes
Y =  to_categorical(Y, num_classes=12)

#### TEST DATA

In [None]:
# Loading all the images, pre-processing them, and storing them in a list of test data

def readTestData(testDir):
    data2 = []
    filenames = []
    images = os.listdir(testDir)
    
    for imageFileName in images:
        imageFullPath = os.path.join(testDir, imageFileName)
        img = load_img(imageFullPath)
        arr = img_to_array(img)
        arr = cv2.resize(arr, (128, 128)) 
        data2.append(arr)
        filenames.append(imageFileName)
    return data2, filenames

path2 = '/content/gdrive/My Drive/Colab Notebooks/plant-seedlings-classification/test/'
X_test, filenames = readTestData(path2)

# Scaling the data
X_test = np.array(X_test, dtype="float") / 255.0

## 2. Divide the data set into Train and validation data sets

In [None]:
# Dividing the data set into train and validation datasets

(X_train, X_val, Y_train, Y_val) = train_test_split(X, Y, test_size = 0.3, random_state = 47)

## 3. Initialize & build the model

In [19]:
# Clear out tensorflow memory
clear_session()

# Define Model
model = Sequential()
model.add(BatchNormalization(input_shape = (128,128,3)))

# 1st Conv Layer
model.add(Conv2D(32, (3,3), activation='relu', input_shape=(128, 128, 3), padding="same"))
#kernel_initializer = 'he_normal'

# Max Pooling layer
model.add(MaxPooling2D(pool_size=2))

# Dropout
model.add(Dropout(rate = 0.2))

# 2nd Conv Layer
model.add(Conv2D(filters=64, kernel_size=5, kernel_initializer = 'he_normal', padding="same"))
model.add(Activation("relu"))

# Max Pooling layer
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

# Dropout
model.add(Dropout(rate = 0.2))

# Flattening the data
model.add(Flatten())

# 1st dense layer
model.add(Dense(128, kernel_initializer = 'he_normal'))
model.add(Activation("relu"))

# Dropout
model.add(Dropout(rate = 0.3))

# 2nd dense layer
model.add(Dense(64, kernel_initializer = 'he_normal'))
model.add(Activation("relu"))

# Output layer
model.add(Dense(output_dim=12, activation = 'softmax'))

model.summary()














Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization_1 (Batch (None, 128, 128, 3)       12        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 128, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 64, 64, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 64, 64)        51264     
_________________________________________________________________
activation_1 (Activation)    (None, 64, 64, 64)        0 



In [20]:
# Loss and Optimizer
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Training the model
model.fit(X_train, Y_train, batch_size=60, epochs=10, validation_data=(X_val, Y_val))



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 3325 samples, validate on 1425 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f66ac803710>

## 4. Optimize the model

In [25]:
# Clear out tensorflow memory
clear_session()

# Define Model
model = Sequential()
model.add(BatchNormalization(input_shape = (128,128,3)))

# 1st Conv Layer
model.add(Conv2D(32, (3,3), input_shape=(128, 128, 3)))
model.add(LeakyReLU(alpha=0.1))

# Max Pooling layer
model.add(MaxPooling2D(pool_size=2))

# Dropout
model.add(Dropout(rate = 0.2))

# 2nd Conv Layer
model.add(Conv2D(filters=64, kernel_size=5, padding="same"))
model.add(LeakyReLU(alpha=0.1))

# Max Pooling layer
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

# Dropout
model.add(Dropout(rate = 0.2))

# Flattening the data
model.add(Flatten())

# 1st dense layer
model.add(Dense(128, kernel_initializer = 'he_normal'))
model.add(LeakyReLU(alpha=0.1))

# Dropout
model.add(Dropout(rate = 0.3))

# 2nd dense layer
model.add(Dense(64, kernel_initializer = 'he_normal'))
model.add(LeakyReLU(alpha=0.1))

# 3rd dense layer
model.add(Dense(32, kernel_initializer = 'he_normal'))
model.add(LeakyReLU(alpha=0.1))

# Output layer
model.add(Dense(output_dim=12, activation = 'softmax'))

# Loss and Optimizer
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Training the model
model.fit(X_train, Y_train, batch_size=60, epochs=30, validation_data=(X_val, Y_val))



Train on 3325 samples, validate on 1425 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f6525e3cf60>

## 5. Predict the accuracy for both train and validation data

In [None]:
Y_predict1 = model.predict(X_val)
Y_predict2 = model.predict(X_train)

In [32]:
# Finding the accuracy:

accuracy1 = accuracy_score(Y_val.argmax(axis=1), Y_predict1.argmax(axis=1))
print("The accuracy of validation data is", round(accuracy1*100, 2))

accuracy2 = accuracy_score(Y_train.argmax(axis=1), Y_predict2.argmax(axis=1))
print("The accuracy of train data is", round(accuracy2*100, 2))

The accuracy of validation data is 73.26
The accuracy of train data is 99.88


In [1]:
# the Accuaracy of Validation data is 73.26 percent
# where as the Accuracy of train data is 99.88 percent
# Hence we can say that this predicts better in classifying the given images.