In [None]:
#loading all required libraries

import numpy as np
import pandas as pd
import os
import PIL
from PIL import Image

from keras.models import Model
from keras.layers import Flatten, Dense, Dropout
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import BatchNormalization, GlobalAveragePooling2D
from keras.utils import to_categorical
from keras.optimizers import Adam
import matplotlib.pyplot as plt



Reading data files(csv files which contain image urls)


data=pd.read_csv('Images_Train.csv')
t_data= pd.read_csv('Images_Test.csv')


data.shape

t_data.shape

#Displaying few top 5 records of test data
data.head()

#Displaying few top 5 records of test data
t_data.head()
 
Preprocessing
#Handing NA values
train_data = data.dropna(subset=['Link_to_the_image'],  how= 'any') #dropping empty link rows in train data
train_data.reset_index()
train_data.shape
 
test_data = t_data.dropna(subset=['Link_to_the_image'],  how= 'any') #dropping empty link rows in test data
test_data.reset_index()
test_data.shape
 
#Displaying row count for all classes in train data

print(train_data.Sub_category.value_counts()) 
print(len(train_data.Sub_category.unique()))

 
#Displaying row count for all classes in test data
print(test_data.Sub_category.value_counts())
print(len(test_data.Sub_category.unique()))
 
%matplotlib inline
#checking the distribution for the total number of classes in train data
plt.figure() 




train_data.Sub_category.value_counts().plot(kind='bar',
                                  figsize=(8,6),
                                  color=["Green"],
                                  alpha = 0.7,
                                  fontsize=10)

plt.title('Train data Tshirt graphic types')
plt.grid()

plt.xticks(rotation =90)

 

#checking the distribution for the total number of classes in test data

plt.figure()
test_data.Sub_category.value_counts().plot(kind='bar',
                                  figsize=(8,6),
                                  color=["Green"],
                                  alpha = 0.7,
                                  fontsize=10)
plt.title('Test data Tshirt graphic types')
plt.grid()

plt.xticks(rotation =90)
 
We have class Imbalance and one Unknown Class in Test. We have to take care of both. **
# Handling class Imbalance
series = pd.value_counts(train_data.Sub_category) 

mask = (series/series.sum() * 100).lt(2.5)
train_data['Sub_category']=np.where(train_data['Sub_category'].isin(series[mask].index),'Other',train_data['Sub_category'])
train_data['Sub_category'].value_counts()
 
#considering all high frequency labels from train data and changed all the low frequency label names as 'other' 
train_labels = train_data['Sub_category'].unique().tolist()
train_labels
 
reading images for model building
#defining function for reading the images
size= 299
def read_image(f):
    im = Image.open(f)
    im = im.resize((size, size), PIL.Image.NEAREST)
    im = np.asarray(im, dtype='float64')
    return(im)

img_files = []
for root, dirs, files in os.walk('/home/B49gpu1/2364/Train_images/'):
    img_files.extend(files)
im = read_image('Train_images/img_1.png')
print(im.shape)
 
#printing the image file names
print(img_files[:10])
 
n_files = len(img_files)
print('Total number of images:', n_files) #checking total number of images
 
#Converting Target to Numeric

train_labels = list(train_data.Sub_category.unique()) # saving all the target classes from train data
 
#sorting the lables alphabitical
train_labels.sort()
print(train_labels)
 
#creating the target labels dictonary to label encoding those manually
y_dict = {'Sub_category' : {train_labels[i] : i for i in range(0, len(train_labels))}}
y_dict
 
#replacing the encoding values of target in the train data
train_data.replace(y_dict,inplace=True)
train_data.head()
 
Reading Images

i=0
x_train = []
y_train = []
print('Reading train images ...')
for file in img_files:
    im = read_image(' /Train_images/'+file)
    x_train.append(im)
    i+=1
    
y_train= train_data.Sub_category.tolist() 

#Reading train images ...

x_train = np.array(x_train)
print(x_train.shape, 'x_train Shape')

y_train = to_categorical(y_train)
print(y_train.shape, 'y_train Shape')
 
# checking image count
print(i)
 
#Model Building
#Model

#We will use the available pretrained models in Keras, trained over ImageNet dataset and we will fine tune it for our task. This is because the top layers learn simple basic features and we need not to train those layers and it can be directly applied to our task.

#Note that the input image format for this model is different than for the VGG16 and ResNet models (299x299 instead of 224x224),

#Arguments

#include_top: whether to include the fully-connected layer at the top of the network.

#weights: one of None (random initialization), 'imagenet' (pre-training on ImageNet),

### Get inception architecture from keras.applications
from keras.applications.inception_v3 import InceptionV3,GlobalAveragePooling2D,decode_predictions

trained_model = InceptionV3(include_top=False,weights='imagenet')
# print(trained_model.summary())

x = trained_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(300,activation='relu')(x)
x = BatchNormalization()(x)
pred_inception= Dense(9,activation='softmax')(x)
model = Model(inputs=trained_model.input,outputs=pred_inception)
# print(model.summary())
#making the layers of inception non-trainable
for layer in trained_model.layers:
    layer.trainable=False
    
#compiling the model
adam = Adam(lr=0.003)
model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer=adam)
#fitting the model
Model_1=model.fit(x_train,y_train,epochs=5,batch_size=100,validation_split=0.2)
 
    
    
# confusion matrix
#from sklearn.metrics import confusion_matrix

#prediction_train=model.predict(x_train)
#print(confusion_matrix(y_train,prediction_train))
#checking Accuracy curves for the model

import matplotlib.pyplot as plt
plt.switch_backend('agg')
%matplotlib inline
fig1 = plt.figure()
plt.plot(Model_1.history['acc'],'r',linewidth=3.0)
plt.plot(Model_1.history['val_acc'],'b',linewidth=3.0)
plt.legend(['Training Accuracy', 'Validation Accuracy'],fontsize=18)
plt.xlabel('Epochs ',fontsize=12)
plt.ylabel('Accuracy',fontsize=12)
plt.title('Accuracy Curves ',fontsize=16)
fig1.savefig('accuracy.png')
plt.show()

##Evaluate Test Data
# Manually Handling one extra class in Test (changing S
ports and Team Jercy to Sports)

test_data['Sub_category'].replace(
    to_replace=['Sports and Team Jersey'],
    value='Sports',
    inplace=True
)
 
#considering all high frequency labels from test data and changed all the low frequency label names as 'other' 

test_data['Sub_category']=np.where(np.isin(test_data['Sub_category'], train_labels),test_data['Sub_category'],'Other')
test_data['Sub_category'].unique()
 


#checking counts of labels in test data after modification
test_data['Sub_category'].value_counts()
 
#Converting them into numeric.
test_data.replace(y_dict,inplace=True)
test_data.head()
 
# Varifying Target Variable

test_data.Sub_category.unique()
 
#Reading Test Images

img_files_test = []
for root, dirs, files in os.walk(' /Test_images/'):
    img_files_test.extend(files)
i=0
x_test = []
y_test = []
print('Reading test images ...')
for file in img_files_test:
    im = read_image('/home/B49gpu1/2364/Test_images/'+file)
    x_test.append(im)
    i+=1
    
y_test= test_data.Sub_category.tolist() 

x_test = np.array(x_test)
print(x_test.shape, 'x_test Shape')

y_test = to_categorical(y_test)
print(y_test.shape, 'y_test Shape')
 
# checking image count
print(i)
 
model.evaluate(x_test,y_test)
 
model.save(filepath='image_model_2')