In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os 
import shutil
from scipy.io import loadmat


from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense, Conv2D, Activation, MaxPooling2D, Flatten, Dropout
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from keras.layers import Flatten, Dense, BatchNormalization, Activation, Dropout

Using TensorFlow backend.


In [17]:
#Unpacking labels from mat file and storing in 'data' array
labels1 = loadmat('imagelabels.mat')
data = [[row.flat[0] for row in line] for line in labels1['labels']]


#Creating array to store names of all images in dataset
arr = []
for i in range(1,8190):
    if(i<=9):
        arr.append('image_0000{}.jpg'.format(i))
        continue
    if(i<=99):
        arr.append('image_000{}.jpg'.format(i))
        continue
    if(i<=999):
        arr.append('image_00{}.jpg'.format(i))
        continue
    if(i<=9999):
        arr.append('image_0{}.jpg'.format(i))
        continue


#Combining labels and names of images in a dataframe
train = pd.DataFrame(data = [data[0], arr])
train = train.transpose()
columns = ['Category', 'Name']
train.columns = columns


#Spliting dataset in train and test
train1, test1 = train_test_split(train, test_size=0.35)
TRAIN_INSTANCES = np.shape(train1)[0]
TEST_INSTANCES = np.shape(test1)[0]


In [19]:
#RUN ONLY ONCE

#Creating different directories for train and test and all the subdirectories-class 1 to 102

os.mkdir('jpg/train')
for x in train1['Name']:
    shutil.move(src = 'jpg/{}'.format(x), dst = 'jpg/train')
    
os.mkdir('jpg/test')
for x in test1['Name']:
    shutil.move(src = 'jpg/{}'.format(x), dst = 'jpg/test')
    

#Creating subdirectories from class 1 to 102
uni = []
uni = train1['Category'].unique()
for i in uni:
    os.mkdir('jpg/train/{}'.format(i))
    os.mkdir('jpg/test/{}'.format(i))
    
#Shifting the images into their directories
for x in range(0, TRAIN_INSTANCES):
    shutil.move('jpg/train/{}'.format(train1.iloc[x][1]), 'jpg/train/{}'.format(train1.iloc[x][0]))

for x in range(0, TEST_INSTANCES):
    shutil.move('jpg/test/{}'.format(test1.iloc[x][1]), 'jpg/test/{}'.format(test1.iloc[x][0]))
    
    
#After running this cell we get a tree structure of the data for the function 'flow_from_directory'
#jpg
# -train
#   -1 to 102 (classes)
# -test
#   -1 to 102(classes)

In [10]:
#Image preprocessing and data augmentation is easily done with ImageDataGenerator, which takes in parameters for preprocessing

datagen = ImageDataGenerator(horizontal_flip=True, rescale=1./255, zoom_range=0.2)

train_it = datagen.flow_from_directory('jpg/train/', class_mode='binary', batch_size=32, target_size=(200, 200))
test_it = datagen.flow_from_directory('jpg/test/', class_mode='binary', batch_size=32, target_size=(200, 200))

Found 5322 images belonging to 102 classes.
Found 2867 images belonging to 102 classes.


In [11]:
#Defining our model with CNN blocks, Pooling layers and lastly the fully connected layers


model=Sequential()

#Input shape is the resized image shape we have defined with parameter 'target_size' in the flow_from_directory function
model.add(Conv2D(50, (4,4),input_shape=(200,200,3), padding='same', activation='relu', strides=(2,2)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Conv2D(60, (3,3), activation='relu', strides=(2,2)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Dropout(0.3))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(102))
model.add(Activation('softmax'))


model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 100, 100, 50)      2450      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 50, 50, 50)        0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 50, 50, 50)        200       
_________________________________________________________________
dropout_3 (Dropout)          (None, 50, 50, 50)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 24, 24, 60)        27060     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 12, 12, 60)        0         
_________________________________________________________________
batch_normalization_4 (Batch (None, 12, 12, 60)       

In [12]:
#Loss is 'sparse_categorical' because one instance of image belongs to exactly one class
model.compile(loss='sparse_categorical_crossentropy',  optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

In [13]:
#Saving model 
model.save("model.h5")

In [14]:
#This function is to reduce learning rate automatically if overshooting or plateauing of validation loss is observed
lrr= ReduceLROnPlateau(monitor='val_loss', factor=.01, patience=3, min_lr=1e-5)

#ModelCheckpoint helps us to save the weights after each iteration
checkpointer = ModelCheckpoint(filepath = 'weights.hdf5', verbose = 1)

In [15]:
#Total instance//batchsize
STEP_SIZE_TRAIN = 5322//32
STEP_SIZE_TEST = 2867//32

166

In [16]:
#Accidentally went ahead with 50 epochs to stopped after 20 with a keyboard interrupt
train_history = model.fit(train_it, steps_per_epoch = STEP_SIZE_TRAIN, epochs = 50, validation_data = test_it, validation_steps = STEP_SIZE_TEST, callbacks = [lrr, checkpointer])

Epoch 1/50

Epoch 00001: saving model to weights_4Feb.hdf5
Epoch 2/50

Epoch 00002: saving model to weights_4Feb.hdf5
Epoch 3/50

Epoch 00003: saving model to weights_4Feb.hdf5
Epoch 4/50

Epoch 00004: saving model to weights_4Feb.hdf5
Epoch 5/50

Epoch 00005: saving model to weights_4Feb.hdf5
Epoch 6/50

Epoch 00006: saving model to weights_4Feb.hdf5
Epoch 7/50

Epoch 00007: saving model to weights_4Feb.hdf5
Epoch 8/50

Epoch 00008: saving model to weights_4Feb.hdf5
Epoch 9/50

Epoch 00009: saving model to weights_4Feb.hdf5
Epoch 10/50

Epoch 00010: saving model to weights_4Feb.hdf5
Epoch 11/50

Epoch 00011: saving model to weights_4Feb.hdf5
Epoch 12/50

Epoch 00012: saving model to weights_4Feb.hdf5
Epoch 13/50

Epoch 00013: saving model to weights_4Feb.hdf5
Epoch 14/50

Epoch 00014: saving model to weights_4Feb.hdf5
Epoch 15/50

Epoch 00015: saving model to weights_4Feb.hdf5
Epoch 16/50

Epoch 00016: saving model to weights_4Feb.hdf5
Epoch 17/50

Epoch 00017: saving model to weights

KeyboardInterrupt: 