In [0]:
# Access to Google Colab
!pip install -U -q PyDrive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [2]:
# Test if GPU is active
import tensorflow as tf
tf.test.gpu_device_name()

import keras
print (keras.__version__)

2.1.6


Using TensorFlow backend.


In [0]:
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import io
import zipfile
from skimage import color
from matplotlib import pyplot
import time
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import SGD


%matplotlib inline

In [4]:
# Import data and label files

# Train labels
training_labels_downloaded = drive.CreateFile({'id': '1rJCglyD-6uTow7RFsj5K7WxSl1YFyg8M'})
training_labels_downloaded.GetContentFile('train.txt')
train_labels = pd.read_csv('train.txt', sep=" ", header=None)

# Test labels
test_labels_downloaded = drive.CreateFile({'id': '15a5U1SsguFSdQim7ifoA9vpuhyL5AgTZ'})
test_labels_downloaded.GetContentFile('vali.txt')
test_labels = pd.read_csv('vali.txt', sep=" ", header=None)

# Train data
train_data_downloaded = drive.CreateFile({'id': '1-X94bMCmjMShVVN5VJXPZ936Zly0BOP_'})
train_data_downloaded.GetContentFile('train-set.zip')
train_data = zipfile.ZipFile('train-set.zip', 'r')
train_data.extractall()

# Test data
test_data_downloaded = drive.CreateFile({'id': '1lSMxDwZFD40ceFAlWCoqT1TlJ9gqgqOx'})
test_data_downloaded.GetContentFile('vali-set.zip')
test_data = zipfile.ZipFile('vali-set.zip', 'r')
test_data.extractall()

rm: cannot remove 'train-set/__MACOSX': No such file or directory


In [5]:
!ls -a

.	.config   .ipython  __MACOSX  train-set      vali-set
..	datalab   .keras    .nv       train-set.zip  vali-set.zip
.cache	.forever  .local    .rnd      train.txt      vali.txt


In [6]:
%%time
# Import and convert data images into RGB arrays
# Under LeNet, we read the sample in binary form with only one channel using mode ='L'
import scipy.misc

# Train data
train_images = []
for i in range (0, len(train_labels)):
  im = scipy.misc.imread('train-set/train-set/'+train_labels[0][i], flatten=False, mode='L')
  train_images.append(im.astype('uint8'))
   

# Test data
test_images = []
for i in range (0, len(test_labels)):
  im = scipy.misc.imread('vali-set/vali-set/'+test_labels[0][i], flatten=False, mode='L')
  test_images.append(im.astype('uint8'))

CPU times: user 14.7 s, sys: 1.71 s, total: 16.4 s
Wall time: 16.4 s


In [0]:
# Define train and test sets
y_train = train_labels[1][:]
y_test = test_labels[1][:]
x_train = (np.asarray(train_images)).reshape(len(train_images), len(train_images[0]), len(train_images[0]), 1)
x_test = (np.asarray(test_images)).reshape(len(test_images), len(test_images[0]), len(test_images[0]), 1)


class_num = len(np.unique(y_train))

In [8]:
# One-hot code the labels
y_train = keras.utils.to_categorical(y_train,class_num)
y_test = keras.utils.to_categorical(y_test,class_num)

# Print shapes of data and labels
print('Train data: ',x_train.shape)
print('Train labels: ',y_train.shape)
print('Test data: ',x_test.shape)
print('Test labels: ',y_test.shape)

Train data:  (37882, 128, 128, 1)
Train labels:  (37882, 62)
Test data:  (6262, 128, 128, 1)
Test labels:  (6262, 62)


In [9]:
# Build a modified LeNet architecture with additional Dropout, batch normalization, Relu activiation function
# Details of the full model is decribed in the report
# When conduct experiments, we could simply delete the layer by adding '#' tounscript orline add a layer by using 'modellent5.add()' 
# Results from the experiments were also included in the report


# Use Max Pooling Layers to resize the inputs to 32*32
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import SGD


modellenet5 = Sequential()
# Use Max Pooling Layers to resize the inputs to 32*32
modellenet5.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), input_shape=(128, 128, 1)))
modellenet5.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
modellenet5.add(BatchNormalization())

# Start of Original Lenet5
modellenet5.add(Conv2D(6, (5, 5), activation='relu'))
modellenet5.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
modellenet5.add(Dropout(0.25))
modellenet5.add(BatchNormalization())

modellenet5.add(Conv2D(16, (5, 5), activation='relu'))
modellenet5.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

modellenet5.add(Flatten())
modellenet5.add(Dense(256, activation='relu'))
modellenet5.add(Dropout(0.25))
modellenet5.add(BatchNormalization())

modellenet5.add(Dense(62, activation='softmax'))

# Define 2 optimizing functions: 'adam' & 'sgd'
# Hypermeters within these functions can be changed
# Another optimzer 'rmsprop' we use default hyperparater values

adam = keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-6, amsgrad=False)
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)


# Complie the model
modellenet5.compile(loss='categorical_crossentropy', optimizer= 'rmsprop',metrics=['accuracy'])



# Check runtime
start = time.time()

# Train model
modellenet5.fit(x_train,y_train,batch_size=128,epochs=20,
          validation_data=(x_test, y_test),
          verbose=1)
score = modellenet5.evaluate(x_test, y_test, verbose=0)
print('Test loss: {:0.2f}%'.format(score[0]*100))
print('Test accuracy: {:0.2f}%'.format(score[1]*100))

# Print runtime
end = time.time()
print('Total runtime is {:0.2f} minutes'.format(int(end-start)/60))

Train on 37882 samples, validate on 6262 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20

Epoch 5/20
Epoch 6/20
Epoch 7/20

Epoch 8/20
Epoch 9/20
Epoch 10/20

Epoch 11/20
Epoch 12/20
Epoch 13/20

Epoch 14/20
Epoch 15/20
Epoch 16/20

Epoch 17/20
Epoch 18/20
Epoch 19/20

Epoch 20/20
Test loss: 45.21%
Test accuracy: 85.76%
Total runtime is 2.05 minutes


In [10]:
# Another way to resize the original images to 32*32 for the LeNet 5 model is to use a resize library


# Use scipy.misc.imresize() approach
from PIL import Image
import os, sys

def imread(path):  
    img = scipy.misc.imread(path).astype(np.float)  
    return img  
  

train_images_resize = []
for i in range (0, len(train_labels)):       
        content_image = imread('train-set/train-set/'+train_labels[0][i],)
        im = scipy.misc.imresize(content_image, (32,32))
        train_images_resize.append(im.astype('uint8'))
        
        
test_images_resize = []
for i in range (0, len(test_labels)):       
        content_image = imread('vali-set/vali-set/'+test_labels[0][i])
        im = scipy.misc.imresize(content_image, (32,32))
        test_images_resize.append(im.astype('uint8'))


# Define new training & testing set (resized)        
x_train_resize = (np.asarray(train_images_resize)).reshape(len(train_images_resize), len(train_images_resize[0]), len(train_images_resize[0]), 1)
x_test_resize = (np.asarray(test_images_resize)).reshape(len(test_images_resize), len(test_images_resize[0]), len(test_images_resize[0]), 1)

  if issubdtype(ts, int):
  elif issubdtype(type(size), float):


In [12]:
# Check shape of the new inputs
print('Train data: ',x_train_resize.shape)
print('Test data: ',x_test_resize.shape)

Train data:  (37882, 32, 32, 1)
Test data:  (6262, 32, 32, 1)


In [13]:
# Build a modified LeNet architecture with additional Dropout, batch normalization, Relu activiation function
# Details of the full model is decribed in the report, we used a different model name 'modellenet6' for this model
# When conduct experiment, we could simply delete the layer by adding '#' tounscript orline add a layer by using 'modellent6.add()'
# Results from the experiments were also included in the report

#### This cell showed the optimal model and hyper-parameter settings ####

modellenet6 = Sequential()

modellenet6.add(Conv2D(6, (5, 5), activation='relu', input_shape=(32, 32, 1)))
modellenet6.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
modellenet6.add(Dropout(0.25))
modellenet6.add(BatchNormalization())

modellenet6.add(Conv2D(16, (5, 5), activation='relu'))
modellenet6.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
modellenet6.add(Dropout(0.25))
modellenet6.add(BatchNormalization())

modellenet6.add(Flatten())
modellenet6.add(Dense(256, activation='relu'))
modellenet6.add(Dropout(0.25))
modellenet6.add(BatchNormalization())
modellenet6.add(Dense(62, activation='softmax'))


adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=1e-6, amsgrad=False)
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

# Compiled the model
modellenet6.compile(loss='categorical_crossentropy', optimizer= adam, metrics=['accuracy'])

# Check runtime
start = time.time()

# Train model 
# Using the new resized datasets
modellenet6.fit(x_train_resize,y_train,batch_size=256,epochs=35,
          validation_data=(x_test_resize, y_test),
          verbose=1)
score = modellenet6.evaluate(x_test_resize, y_test, verbose=0)
print('Test loss: {:0.2f}%'.format(score[0]*100))
print('Test accuracy: {:0.2f}%'.format(score[1]*100))

# Print runtime
end = time.time()
print('Total runtime is {:0.2f} minutes'.format(int(end-start)/60))

Train on 37882 samples, validate on 6262 samples
Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35

Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35

Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
 7936/37882 [=====>........................] - ETA: 2s - loss: 0.4474 - acc: 0.8427

Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
 7936/37882 [=====>........................] - ETA: 2s - loss: 0.4329 - acc: 0.8396

Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
 7936/37882 [=====>........................] - ETA: 2s - loss: 0.3884 - acc: 0.8551

Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
 7936/37882 [=====>........................] - ETA: 2s - loss: 0.3691 - acc: 0.8598

Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
 7936/37882 [=====>........................] - ETA: 2s - loss: 0.3489 - acc: 0.8666

Epoch 35/35
Test loss: 29.69%
Test accuracy: 88.92%
Total runtime is 1.77 minutes
