In [3]:
import numpy as np
import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.utils import to_categorical

In [4]:
train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()

In [5]:
# Normalize the images.
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5

# Reshape the images.
train_images = np.expand_dims(train_images, axis=3)
test_images = np.expand_dims(test_images, axis=3)

num_filters = 8
filter_size = 3
pool_size = 2

# Build the model.
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fbb70cf9670>

In [6]:
# TESTING #1
# Save the model to disk.
model.save_weights('cnn.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:5])

# Print our model's predictions.
print(np.argmax(predictions, axis=1)) # [7, 2, 1, 0, 4]

# Check our predictions against the ground truths.
print(test_labels[:5]) # [7, 2, 1, 0, 4]

[7 2 1 0 4]
[7 2 1 0 4]


# Recommended Experiments

## 1. Varying network depth

In [7]:
# Build the model.
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fbb531b7400>

In [9]:
# Save the model to disk.
model.save_weights('cnn2.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 5 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## 2. Adding Dropout layers

In [10]:
# Build the model.
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fbb52b75460>

In [11]:
# Save the model to disk.
model.save_weights('cnn4_1.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## 3. Adding Fully-connected Layers

In [12]:
# Build the model.
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fbb4178c670>

In [13]:
# Save the model to disk.
model.save_weights('cnn4.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## 4. Messing with convolution parameters 

In [17]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x17925fba9d0>

In [18]:
# Save the model to disk.
model.save_weights('cnn2.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


# Personal experiments

In [14]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
    MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fbb32122ca0>

In [15]:
# Save the model to disk.
model.save_weights('cnn5.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 8 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## Experimento 2

In [35]:
train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()

In [38]:
# Normalize the images.
##train_images = (train_images / 255) - 0.5
##test_images = (test_images / 255) - 0.5

# Reshape the images.
##train_images = np.expand_dims(train_images, axis=3)
##test_images = np.expand_dims(test_images, axis=3)

num_filters = 8
filter_size = 3
pool_size = 2

In [37]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fbb0e3ef310>

In [39]:
# Save the model to disk.
model.save_weights('cnn6.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 5 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## Experimento 3
Adding one more filter (num_filters)

In [40]:
# Normalize the images.
#train_images = (train_images / 255) - 0.5
#test_images = (test_images / 255) - 0.5

# Reshape the images.
##train_images = np.expand_dims(train_images, axis=3)
##test_images = np.expand_dims(test_images, axis=3)

num_filters = 9
filter_size = 3
pool_size = 2

In [41]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fbb1cf66850>

In [42]:
# Save the model to disk.
model.save_weights('cnn7.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 5 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## Experimento 4
Adding 3 more filter (num_filters=12)

filter_size and pool size not modified because we think those sizes are perfect since their objective is to detect image features.

In [43]:
# Normalize the images.
#train_images = (train_images / 255) - 0.5
#test_images = (test_images / 255) - 0.5

# Reshape the images.
##train_images = np.expand_dims(train_images, axis=3)
##test_images = np.expand_dims(test_images, axis=3)

num_filters = 12
filter_size = 3
pool_size = 2

In [44]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fbb0e0836a0>

In [45]:
# Save the model to disk.
model.save_weights('cnn8.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## Experimento 5
filter_size and pool size not modified because we think those sizes are perfect since their objective is to detect image features.

Incremented epochs to 10.

We also added another con2D layer.

We also changed loss function to 'binary crossentropy'.

In [46]:
# Normalize the images.
#train_images = (train_images / 255) - 0.5
#test_images = (test_images / 255) - 0.5

# Reshape the images.
##train_images = np.expand_dims(train_images, axis=3)
##test_images = np.expand_dims(test_images, axis=3)

num_filters = 12
filter_size = 3
pool_size = 2

In [47]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='binary_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=10,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fbb1231ea30>

In [48]:
# Save the model to disk.
model.save_weights('cnn9.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 8 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


In [50]:
import tensorflow

In [51]:
tensorflow.keras.losses.MeanSquaredError(reduction="auto", name="mean_squared_error")

<tensorflow.python.keras.losses.MeanSquaredError at 0x7fbb13038760>

## Experimento 6

Same as experiment 5, but we changed the loss function back to 'categorical crossentropy'.

In [52]:
# Normalize the images.
#train_images = (train_images / 255) - 0.5
#test_images = (test_images / 255) - 0.5

# Reshape the images.
##train_images = np.expand_dims(train_images, axis=3)
##test_images = np.expand_dims(test_images, axis=3)

num_filters = 12
filter_size = 3
pool_size = 2

In [53]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=10,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fbb1302f820>

In [54]:
# Save the model to disk.
model.save_weights('cnn10.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
