# **Libraries We'll Be Using**

In [0]:
import numpy as np
import h5py

from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.metrics import accuracy_score

from keras import regularizers
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten, BatchNormalization, Input
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Activation
from keras.utils import np_utils


from keras.optimizers import RMSprop
from keras.metrics import categorical_accuracy

# **Loading Preprocessed Dataset**

In [0]:
# open our uploaded file
svhn_data = h5py.File('SVHN_Preprocessed_Multi.h5', 'r')

# load the training, testing and validation set
X_train = svhn_data['X_train'][:]
y_train = svhn_data['y_train'][:]
X_test = svhn_data['X_test'][:]
y_test = svhn_data['y_test'][:]
X_val = svhn_data['X_val'][:]
y_val = svhn_data['y_val'][:]

# close the file
svhn_data.close()


# check that our datasets are correct
print('Training X Shape: ', X_train.shape)
print('Training Y Shape: ', y_train.shape)
print('Testing X Shape: ', X_test.shape)
print('Testing Y Shape: ', y_test.shape)
print('Validation X Shape: ', X_val.shape)
print('Validation Y Shape: ', y_val.shape)

Training X Shape:  (26720, 32, 32, 1)
Training Y Shape:  (26720, 5)
Testing X Shape:  (13068, 32, 32, 1)
Testing Y Shape:  (13068, 5)
Validation X Shape:  (6681, 32, 32, 1)
Validation Y Shape:  (6681, 5)


# **Prepare Train/Test/Validation Labels for Multi-Digit CNN Model**

In [0]:
def create_output_labels(labels):

  # 11 possible classes
  # classes for digits 0-9 and a class for no digit 
  classes = 11
  
  # output arrays
  dig1 = np.ndarray(shape=(len(labels), classes))
  dig2 = np.ndarray(shape=(len(labels), classes))
  dig3 = np.ndarray(shape=(len(labels), classes))
  dig4 = np.ndarray(shape=(len(labels), classes))
  dig5 = np.ndarray(shape=(len(labels), classes))

  # create one hot encoded labels
  for index,label in enumerate(labels):

      dig1[index,:] = np_utils.to_categorical(label[0], classes)
      dig2[index,:] = np_utils.to_categorical(label[1], classes)
      dig3[index,:] = np_utils.to_categorical(label[2], classes)
      dig4[index,:] = np_utils.to_categorical(label[3], classes)
      dig5[index,:] = np_utils.to_categorical(label[4], classes)

  return [dig1, dig2, dig3, dig4, dig5]
  

  
# create the train/test/val output arrays
train_labels = create_output_labels(y_train)
test_labels = create_output_labels(y_test)
val_labels = create_output_labels(y_val)


# check that the output shape is fine
print(np.shape(train_labels[0]))

(26720, 11)


# **Create a CNN Model**

**Multi-Output Model**

In [0]:
def cnn_model_multi():

  weight_decay = 1e-4
  classes = 11


  # LAYER 1
  model_input = Input(shape=X_train.shape[1:])

  M = Conv2D(32, (3,3), activation='elu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(model_input)
  M = BatchNormalization()(M)
  M = Conv2D(32, (3,3), activation='elu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(M)
  M = BatchNormalization()(M)
  M = MaxPooling2D(pool_size=(2,2))(M)
  M = Dropout(0.2)(M)


  # LAYER 2
  M = Conv2D(64, (3,3), activation='elu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(M)
  M = BatchNormalization()(M)
  M = Conv2D(64, (3,3), activation='elu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(M)
  M = BatchNormalization()(M)
  M = MaxPooling2D(pool_size=(2,2))(M)
  M = Dropout(0.3)(M)


  # LAYER 3
  M = Conv2D(128, (3,3), activation='elu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(M)
  M = BatchNormalization()(M)
  M = Conv2D(128, (3,3), activation='elu', padding='same', kernel_regularizer=regularizers.l2(weight_decay))(M)
  M = BatchNormalization()(M)
  M = MaxPooling2D(pool_size=(2,2))(M)
  M = Dropout(0.4)(M)


  # OUTPUT
  Mout = Flatten()(M)

  dig1 = Dense(classes, activation='softmax', name = 'dig1')(Mout)
  dig2 = Dense(classes, activation='softmax', name = 'dig2')(Mout)
  dig3 = Dense(classes, activation='softmax', name = 'dig3')(Mout)
  dig4 = Dense(classes, activation='softmax', name = 'dig4')(Mout)
  dig5 = Dense(classes, activation='softmax', name = 'dig5')(Mout)

  out = [dig1, dig2, dig3, dig4, dig5]
  
  model = Model(input=model_input, output=out)
  return model

# **Fit the Model and Display Accuracy**

**Compile, train, and save our CNN model**

In [0]:
# create and compile our model
model = cnn_model_multi()
model.compile(loss='categorical_crossentropy', 
              optimizer = RMSprop(lr=0.001, decay=1e-6), 
              metrics=["accuracy"])


# parameters for model fitting
batch_size = 128
epochs = 20

# train the model
model.fit(x=X_train, y=train_labels,
          validation_data=(X_val, val_labels),
          batch_size=batch_size,
          epochs=epochs,
          verbose=1)

# save the model
model.save("SVHN_model_multi.h5")



Train on 26720 samples, validate on 6681 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fe2b37998d0>

**Check Model Accuracy**

In [0]:
# display final accuracy on validation set
scores = model.evaluate(X_val, val_labels, verbose=0)
print("Validation Accuracy: %.2f%%" % (scores[1]*100))

# make our predictions on the testing set
test_predictions = model.predict(X_test)
accuracy = accuracy_score(test_labels, np.round(test_predictions))
print(accuracy)

Accuracy: 48.71%


**Display some of the predictions our model makes**

In [0]:
import random

# randomly select labels from the test dataset and see what the model predicts
for i in random.sample(range(0, len(test_labels[0])), 5):

  actual_labels = []
  predicted_labels = []

  for j in range(0,5):
      actual_labels.append(np.argmax(test_labels[j][i]))
      predicted_labels.append(np.argmax(predictions[j][i]))

  print("Actual labels:\t\t{}".format(actual_labels))
  print("Predicted labels:\t{}\n".format(predicted_labels))

Actual labels:		[1, 9, 1, 6, 10]
Predicted labels:	[1, 9, 6, 6, 10]

Actual labels:		[3, 4, 10, 10, 10]
Predicted labels:	[8, 3, 4, 10, 10]

Actual labels:		[1, 7, 10, 10, 10]
Predicted labels:	[1, 7, 10, 10, 10]

Actual labels:		[3, 4, 10, 10, 10]
Predicted labels:	[3, 4, 10, 10, 10]

Actual labels:		[4, 2, 10, 10, 10]
Predicted labels:	[4, 2, 10, 10, 10]

