# Image categorisation

*  JantaHack Computer Vision by analyticsvidhya
*  Classify emergency and non-emergency vehicle from images

In [1]:
import pandas as pd
import numpy as np
import random
import math
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPool2D, BatchNormalization
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils.np_utils import to_categorical

import seaborn as sns
from matplotlib import pyplot as plt

Using TensorFlow backend.
  import pandas.util.testing as tm


In [0]:
# numpy random number geneartor seed
# for reproducibility
np.random.seed(123)

# set plot rc parameters
# jtplot.style(grid=False)
plt.rcParams['figure.facecolor'] = 'white'
plt.rcParams['axes.facecolor'] = '#232323'
#plt.rcParams['axes.edgecolor'] = '#FFFFFF'
plt.rcParams['figure.figsize'] = 10, 7
plt.rcParams['legend.loc'] = 'best'
plt.rcParams['legend.framealpha'] = 0.2
plt.rcParams['text.color'] = '#666666'
plt.rcParams['axes.labelcolor'] = '#666666'
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['axes.titlesize'] = 16
plt.rcParams['xtick.color'] = '#666666'
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.color'] = '#666666'
plt.rcParams['ytick.labelsize'] = 14

# plt.rcParams['font.size'] = 16

sns.color_palette('dark')
%matplotlib inline

## Load data

In [3]:
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
data_dir = '/content/drive/My Drive/JantaHack Computer Vision/data'
img_dir = '/content/drive/My Drive/JantaHack Computer Vision/data/images'

In [5]:
train_img = pd.read_csv(data_dir+'/train.csv')
test_img = pd.read_csv(data_dir+'/test_vc2kHdQ.csv')
train_img.shape, test_img.shape

((1646, 2), (706, 1))

In [0]:
train_img['emergency_or_not'] = train_img['emergency_or_not'].apply(lambda x: str(x))

In [7]:
train_img.head()

Unnamed: 0,image_names,emergency_or_not
0,1503.jpg,0
1,1420.jpg,0
2,1764.jpg,0
3,1356.jpg,0
4,1117.jpg,0


## Load Image data

### Load image to array

In [8]:
im1 = load_img(img_dir+'/23.jpg')
im1_array = img_to_array(im1)
im1_array.shape

(224, 224, 3)

In [0]:
# x = np.zeros(shape = [1646, 224, 224, 3])

In [0]:
# for i, img in enumerate(train_img['image_names'].values):
#     x[i] = img_to_array(load_img('data/images/'+img))

### Load image using generators

In [0]:
datagen = ImageDataGenerator(rescale=1./255.,validation_split=0.25)

In [12]:
# train generator
train_generator=datagen.flow_from_dataframe(dataframe=train_img,
                                            directory=img_dir,
                                            x_col="image_names",
                                            y_col="emergency_or_not",
                                            subset="training",
                                            batch_size=32,
                                            seed=42,
                                            shuffle=True,
                                            class_mode="binary",
                                            target_size=(224,224),
                                            color_mode='rgb')
# validation data generator
valid_generator=datagen.flow_from_dataframe(dataframe=train_img,
                                            directory=img_dir,
                                            x_col="image_names",
                                            y_col="emergency_or_not",
                                            subset="validation",
                                            batch_size=32,
                                            seed=42,
                                            shuffle=True,
                                            class_mode="binary",
                                            target_size=(224,224),
                                            color_mode='rgb')

Found 1235 validated image filenames belonging to 2 classes.
Found 411 validated image filenames belonging to 2 classes.


In [13]:
# test data generator
test_datagen = ImageDataGenerator(rescale=1./255.)
test_generator = test_datagen.flow_from_dataframe(dataframe=test_img,
                                                  directory=img_dir,
                                                  x_col="image_names",
                                                  y_col=None,
                                                  batch_size=32,
                                                  seed=42,
                                                  shuffle=False,
                                                  class_mode=None,
                                                  target_size=(224,224),
                                                  color_mode='rgb')

Found 706 validated image filenames.


## CNN model

In [14]:
# initiate sequential model
model = Sequential()
# add convolutional layer
# 16 sliding windows each of 3X3 size
# default step is 1X1
model.add(Conv2D(filters = 32,
                 kernel_size = (3, 3),
                 activation='relu',
                 input_shape = (224, 224,3),
                 padding='same'))
# add batch normalization to normalize output of the layer
model.add(BatchNormalization())
# add another convolutional layer
model.add(Conv2D(filters = 32,
                 kernel_size = (3, 3),
                 activation='relu',
                 padding='same'))
# batchnormalize
model.add(BatchNormalization())
# add maxpooling layer
# this layer picks max value for every 2X2 window
model.add(MaxPool2D(pool_size=(2,2)))
# add dropout layer
model.add(Dropout(0.3))
# repeat above sequence once more
model.add(Conv2D(filters = 64,
                 kernel_size = (3, 3),
                 activation='relu',
                 padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 64,
                 kernel_size = (3, 3),
                 activation='relu',
                 padding='same'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.3))
# another set of convolutional layers
model.add(Conv2D(filters = 128,
                 kernel_size = (3, 3),
                 activation='relu',
                 padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 128,
                 kernel_size = (3, 3),
                 activation='relu',
                 padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 128,
                 kernel_size = (3, 3),
                 activation='relu',
                 padding='same'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.3))
# another set of convolutional layers
model.add(Conv2D(filters = 256,
                 kernel_size = (3, 3),
                 activation='relu',
                 padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 256,
                 kernel_size = (3, 3),
                 activation='relu',
                 padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(filters = 256,
                 kernel_size = (3, 3),
                 activation='relu',
                 padding='same'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.3))
# flatten cnn layers
model.add(Flatten())
# add dense layer
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.3))
# finally add a softmax layer which will predict probability of each class
model.add(Dense(1, activation='sigmoid'))
# print model summary
model.summary()

# compile model
model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 224, 224, 32)      896       
_________________________________________________________________
batch_normalization_1 (Batch (None, 224, 224, 32)      128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 224, 224, 32)      9248      
_________________________________________________________________
batch_normalization_2 (Batch (None, 224, 224, 32)      128       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 112, 112, 32)      0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 112, 112, 32)      0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 112, 112, 64)     

## Train CNN model

In [0]:
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

In [16]:
model.fit_generator(generator=train_generator,
                    validation_data=valid_generator,
                    epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.callbacks.History at 0x7f0a62083c88>

In [17]:
model.evaluate_generator(generator=valid_generator,
                         steps=STEP_SIZE_TEST)

[0.5338988304138184, 0.8483548164367676]

## Submissions

In [18]:
test_generator.reset()
pred=model.predict_generator(test_generator,
                             verbose=1)



In [19]:
pred.shape

(706, 1)

In [20]:
pred[pred >= 0.5] = 1
pred[pred < 0.5] = 0
pred[:10]

array([[0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.]], dtype=float32)

In [0]:
pred = pred.ravel()

In [0]:
# labels = (train_generator.class_indices)
# labels = dict((v,k) for k,v in labels.items())
# predictions = [labels[k] for k in predicted_class_indices]

In [0]:
out_dir = '/content/drive/My Drive/JantaHack Computer Vision/'
filenames=test_generator.filenames
results=pd.DataFrame({"image_names":filenames,
                      "emergency_or_not":pred})
results.to_csv(out_dir+"Submissions2.csv",index=False)