# ML Challenge Prediction Notebook

In [1]:
%matplotlib inline
%run ./../../plugins/widgets.py

import cv2
import glob
import ipywidgets
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn
import sklearn.model_selection
import time
import tensorflow as tf
import tensorflow.keras as keras

from absl import logging
logging._warn_preinit_stderr = 0
logging.warning('Worrying Stuff')

Widget Loaded


W0811 18:23:01.039221  6748 <ipython-input-1-dedcc1a1dfa7>:20] Worrying Stuff


## Global parameters and variables

In [2]:
tf.enable_eager_execution()
plt.rcParams['figure.figsize'] = [16, 9]
plt.rcParams['font.size'] = 14
plt.rcParams['axes.grid'] = True
plt.rcParams['figure.facecolor'] = 'white'
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

In [3]:
IMG_SIZE = 224
IMG_SHAPE = [IMG_SIZE, IMG_SIZE, 3]

NUM_CLASSES = 102
BATCH_SIZE = 64

## Loading Filenames and Labels

In [4]:
df = pd.read_csv("./data/train.csv")
df['filename'] = './data/train/'+ df['image_id'].astype(str) + '.jpg'
df.head(5)

Unnamed: 0,image_id,category,filename
0,0,77,./data/train/0.jpg
1,1,81,./data/train/1.jpg
2,2,52,./data/train/2.jpg
3,3,72,./data/train/3.jpg
4,4,58,./data/train/4.jpg


In [5]:
filenames = []
labels = []
for index, row in df.iterrows():
    filenames.append(row['filename'])
    labels.append(row['category']-1)
    
print("Sample filenames : ", filenames[:4])
print("Sample labels : ", labels[:4])

Sample filenames :  ['./data/train/0.jpg', './data/train/1.jpg', './data/train/2.jpg', './data/train/3.jpg']
Sample labels :  [76, 80, 51, 71]


In [6]:
train_x, val_x, train_y, val_y = sklearn.model_selection.train_test_split(filenames, labels, test_size=0.25, random_state=64)
print("{} examples in training set".format(len(train_y)))
print("{} examples in validation set".format(len(val_y)))

13905 examples in training set
4635 examples in validation set


## Defining a DataGenerator

In [7]:
class DataGenerator(keras.utils.Sequence):
    
    # Generates data for Keras
    def __init__(self, filenames, labels, batch_size=BATCH_SIZE, n_classes=NUM_CLASSES, shuffle=True):   
        self.filenames = filenames
        self.labels = labels
        self.batch_size = batch_size
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    # Generate one batch of data
    def __getitem__(self, batch_index):
        
        # Generate indexes of the batch
        batch_start = batch_index*self.batch_size
        batch_end = (batch_index+1)*self.batch_size
        
        batch_indices = self.indexes[batch_start:batch_end]
        
        batch_filenames = [self.filenames[i] for i in batch_indices]
        batch_labels = [self.labels[i] for i in batch_indices]

        batch_x = []
        batch_y = []
        
        # Generate data
        for i in range(self.batch_size):
            pil_image = keras.preprocessing.image.load_img(batch_filenames[i], target_size=(IMG_SIZE, IMG_SIZE))
            image_array = keras.preprocessing.image.img_to_array(pil_image)
            batch_x.append(image_array)
            batch_y.append(batch_labels[i])
        
        batch_x = np.array(batch_x)
        batch_y = keras.utils.to_categorical(batch_y, num_classes=self.n_classes)
        
        return batch_x, batch_y

    # Updates indexes after each epoch
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.filenames))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
          
    # Denotes the number of batches per epoch
    def __len__(self):
        return int(np.floor(len(self.filenames) / self.batch_size))

In [8]:
training_generator = DataGenerator(train_x, train_y)
validation_generator  = DataGenerator(val_x, val_y)

## Load and Tweak Pre-trained Model

In [9]:
# Loads the VGG19 Model without the last 2 FC layers when include_top=False
base_model = keras.applications.VGG19(input_shape=IMG_SHAPE, include_top=False, weights='imagenet')

In [10]:
x = base_model.output
x = keras.layers.Flatten()(x)
predictions = keras.layers.Dense(NUM_CLASSES, activation='softmax')(x)
model = keras.Model(inputs=base_model.input, outputs=predictions)

In [11]:
for layer in base_model.layers:
    layer.trainable = False
    
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

## Train Model

In [None]:
model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
#                     use_multiprocessing=True, workers=6, 
                    verbose=2)

W0811 18:23:04.486792  6748 deprecation.py:323] From C:\Program Files\Python 3.6.3\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
