# 1: Read the data from h5py file and understand the train/test splits

# Import necessary libraries

In [1]:
import tensorflow as tf
import keras
import h5py
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
h5f = h5py.File('SVHN_single_grey1.h5','r')
h5f.keys()

<KeysViewHDF5 ['X_test', 'X_train', 'X_val', 'y_test', 'y_train', 'y_val']>

In [3]:
#Load the splited train, validation and test data
X_train = h5f['X_train'][:]
y_train = h5f['y_train'][:]

X_val = h5f['X_val'][:]
y_val = h5f['y_val'][:]

X_test = h5f['X_test'][:]
y_test = h5f['y_test'][:]


In [4]:
print(f'size of X_train is {X_train.shape}')
print(f'size of y_train is {y_train.shape}')

print(f'size of X_val is {X_val.shape}')
print(f'size of y_val is {y_val.shape}')

print(f'size of X_test is {X_test.shape}')
print(f'size of y_test is {y_test.shape}')

size of X_train is (42000, 32, 32)
size of y_train is (42000,)
size of X_val is (60000, 32, 32)
size of y_val is (60000,)
size of X_test is (18000, 32, 32)
size of y_test is (18000,)


Here we have the training dataset(X_train )has 42k records and the test dataset has 18k records each record being 32*32 in size.

# 2: Reshape and normalize the train and test features

# Reshape the data

In [5]:
X_train = X_train.reshape(X_train.shape[0], 32*32)
X_val = X_val.reshape(X_val.shape[0], 32*32)
X_test = X_test.reshape(X_test.shape[0], 32*32)

print(f'shape of X_train is {X_train.shape}')
print(f'shape of X_val is {X_val.shape}')
print(f'shape of X_test is {X_test.shape}')

shape of X_train is (42000, 1024)
shape of X_val is (60000, 1024)
shape of X_test is (18000, 1024)


# Normalize the data

In [6]:
X_train = X_train.astype('float32')
X_val = X_val.astype('float32')
X_test = X_test.astype('float32')

X_train /=255
X_val /=255
X_test /=255

In [7]:
print(f'min value is {X_train.min()}')
print(f'max value is {X_train.max()}')

min value is 0.0
max value is 0.9998999834060669


After normalization : Min value is 0.0 and Max value is 0.9998

# 3: One hot encode the labels for train and test data

In [8]:
# Convert class vectors to binary class matrix
# Convert y_train, y_val and  y_test
# number of classes : 10
# we are doing this to use categorical_crossentropy as loss
from tensorflow.keras.utils import to_categorical

In [9]:
y_train = to_categorical(y_train, num_classes = 10)
y_val = to_categorical(y_val, num_classes = 10)
y_test = to_categorical(y_test, num_classes = 10)

print("Shape of y_train:", y_train.shape)
print("One value of y_train:", y_train[0])

Shape of y_train: (42000, 10)
One value of y_train: [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]


# 4: Define the model architecture using Tensorflow with a flatten layer followed by dense layers with activation as Relu and softmax

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization
from tensorflow.keras import regularizers, optimizers

Number of neurons in first layer : 256
Number of neurons in last layer : number of classes
Activation function in first layer : relu
Activation function in last layer : softmax

In [11]:
    model = Sequential()
    model.add(Flatten())
    model.add(BatchNormalization())
    model.add(Dense(256, activation = 'relu'))
    model.add(Dense(10, activation = 'softmax'))

# 5: Compile the model with loss as categorical cross-entropy and adam optimizers. Use accuracy as the metric for evaluation

Compile our model
    Loss : "Categorical_crossentropy"
  Metrics: "accuracy"
         Optimizer: "adam"

In [12]:
adam = optimizers.Adam

In [13]:
# Compile the model
model.compile(loss="categorical_crossentropy", metrics=["accuracy"], optimizer='adam')

In [14]:
# Fit the model
model.fit(X_train,y_train, epochs = 20, batch_size = 700, verbose=1, validation_data = (X_test,y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x19984be6c08>

# 6: Print the loss and accuracy for the test data

In [15]:
model.evaluate(X_test,y_test)



[0.553717851638794, 0.8495555520057678]