# Cnn trial

In [93]:
import numpy as np
import mnist
import keras

In [94]:
train_images = mnist.train_images()

In [95]:
train_labels = mnist.train_labels()

In [96]:
print(train_images.shape)

(60000, 28, 28)


In [97]:
print(train_labels, train_labels.shape)

[5 0 4 ... 5 6 8] (60000,)


In [98]:
test_images=mnist.test_images()
test_labels=mnist.test_labels()

In [99]:
print(test_images.shape, test_labels.shape)

(10000, 28, 28) (10000,)


In [100]:
print(train_images.min(), train_images.max())

0 255


In [101]:
#Normalize images from 0-255 to [-0.5,0.5]
# makes the network easier to train, when values are :
# smaller and centered > this usually leads to better results.

train_images = (train_images/255) - 0.5
test_images = (test_images/255) - 0.5

print(train_images.min(), train_images.max())

-0.5 0.5


In [102]:
# Reshape the images from (28,28) to (28,28,1) as Keras requires the thrid dimension
# 1 = gray image, 3 = RGB image n = higher dimension image

# np.expand_dims() adds an axis

train_images = np.expand_dims(train_images, axis=3)
test_images = np.expand_dims(test_images, axis=3)

print(train_images.shape, test_images.shape)


(60000, 28, 28, 1) (10000, 28, 28, 1)


In [103]:
# CNN part of the code
# we will use Sequential model, that presents linear stack of layers 
# output goes to input, no cyckling in the models (such as LSTM is impossible with this model)

from keras import Sequential

In [104]:
# Sequential model/constructor takes an array of Keras layers.
# We use 3 layers: convolutional, max pooling and softmax

from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout

# call model and pass an instance to it
#define parameters for model

# padding='same' means adding 0's around the image producing same sized filters
# as the image fed to the network
# adding second conv2d layer increased the accuracy by from 0.96 to 0.97
# adding dropout: prevents overfitting= learning test set well, not understanding
# test set. rate = fraction of the input units to drop 0.971
# adding dense(64..) i.e. fully connected layer after convolutional layer and
# before output layer ( a typical CNN structure). units = number of nodes 0.979

num_filters = 8
filter_size = 3
pool_size = 2

model = Sequential([
   Conv2D(filters=num_filters, kernel_size=filter_size, padding='same',input_shape=(28,28,1)),
    Conv2D(num_filters, filter_size, padding='same'),
    MaxPooling2D(pool_size=pool_size),
    Dropout(rate = 0.2),
    Flatten(),
    Dense(units=64, activation='relu'),
    Dense(units=10, activation='softmax')    
])

In [105]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 28, 28, 8)         80        
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 28, 28, 8)         584       
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 14, 14, 8)         0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 14, 14, 8)         0         
_________________________________________________________________
flatten_10 (Flatten)         (None, 1568)              0         
_________________________________________________________________
dense_14 (Dense)             (None, 64)                100416    
_________________________________________________________________
dense_15 (Dense)             (None, 10)              

In [106]:
# compiling the model i.e, configurint the training process
# what optimizer, loss functions and metrics are used

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [107]:
# training labels for the images fed to the network
train_labels = mnist.train_labels()
print(train_labels, test_labels)

[5 0 4 ... 5 6 8] [7 2 1 ... 4 5 6]


In [108]:
# our labels are a list of integers, where each number represents the class for one image.
# this needs to be changed to categorical form by using one-hot vectors.
# this way we can feed the data to Keras, as it expects these labels to be categorical vectors.
# i.e. instead saying image_0's label is 5, way say that it is [0,0,0,0,0,1,0,0,0,0], in one-hot vector
# This representation we can feed to keras model without errors.

from keras.utils import to_categorical

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)
print(train_labels, train_labels.shape, test_labels)

[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]] (60000, 10) [[0. 0. 0. ... 1. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [109]:
print(train_labels.shape, test_labels.shape)

(60000, 10) (10000, 10)


In [89]:
# training the model, by calling fit() -function and specifying parameters: training data, 
# number of epochs (i.e. iterations over the entire dataset), validation data

model.fit(
    train_images,
    train_labels,
    epochs = 3,
    validation_split = 0.2
)

Train on 48000 samples, validate on 12000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x15b31c5b400>

In [19]:
# package to save model weigths in .h5 format.
import h5py


In [20]:
#Save the model weights, ready-trained model, so that it can be used again anytime

model.save_weights('cnn.h5')

In [23]:
# reload the trained model 
weights = model.load_weights('cnn.h5')
print(weights)

None


In [30]:
# using the trained model to make predictions by passing an array of inputs to predict()
# that returns an array of outputs.

prediction = model.predict(test_images[:5])

# print the prediction for each 10 classes
print(prediction)

[[2.6867338e-06 1.4287612e-08 1.2044975e-05 1.5760382e-04 1.0440846e-06
  1.0789305e-07 4.6235227e-11 9.9982482e-01 4.9584223e-08 1.6443012e-06]
 [3.2097840e-04 9.4206611e-05 9.9547487e-01 2.8281490e-04 8.5492458e-09
  1.5680055e-04 3.4933856e-03 7.9347570e-12 1.7694257e-04 1.7613157e-08]
 [2.1520598e-05 9.9520314e-01 1.9347651e-03 1.6155311e-04 4.5565888e-04
  9.5155032e-05 1.6669280e-04 1.7921421e-03 1.3775448e-04 3.1694803e-05]
 [9.9768305e-01 2.1226189e-08 1.0001265e-03 1.2441712e-05 4.7176195e-06
  2.8484213e-04 8.3691417e-04 1.5011485e-04 1.2694619e-05 1.5007169e-05]
 [6.5808679e-04 6.5386685e-06 2.2377414e-03 8.7697217e-06 9.8572111e-01
  1.1097582e-05 8.0070563e-04 8.0966437e-03 1.4100330e-04 2.3184104e-03]]


In [35]:
# np.argmax() returns the maximum value from a list/ axis wanted
# i.e. as we have ten classes, each of them have a number of prediction
# when we use the argmax, it returns the column number that has the highest 
# number, i.e. the best guess of the network of what the output might be.
# axis=1 means it will take each list from list and return its highest value,
# not the whole list of lists highest value (only 1 number)

print(np.argmax(prediction, axis=1))

[7 2 1 0 4]


In [34]:
print(np.argmax(test_labels[:5], axis=1))

[7 2 1 0 4]
