In [7]:
import numpy as np
import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.utils import to_categorical

In [5]:
train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()

In [6]:
# Normalize the images.
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5

# Reshape the images.
train_images = np.expand_dims(train_images, axis=3)
test_images = np.expand_dims(test_images, axis=3)

num_filters = 8
filter_size = 3
pool_size = 2

# Build the model.
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x17920c58e50>

In [9]:
# TESTING #1
# Save the model to disk.
model.save_weights('cnn.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:5])

# Print our model's predictions.
print(np.argmax(predictions, axis=1)) # [7, 2, 1, 0, 4]

# Check our predictions against the ground truths.
print(test_labels[:5]) # [7, 2, 1, 0, 4]

[7 2 1 0 4]
[7 2 1 0 4]


# Recommended Experiments

## 1. Varying network depth

In [10]:
# Build the model.
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1792312b9d0>

In [12]:
# Save the model to disk.
model.save_weights('cnn2.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## 2. Adding Dropout layers

In [8]:
# Build the model.
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1cc014c2640>

In [9]:
# Save the model to disk.
model.save_weights('cnn4_1.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## 3. Adding Fully-connected Layers

In [15]:
# Build the model.
model = Sequential([
  Conv2D(num_filters, filter_size, input_shape=(28, 28, 1)),
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x179256168b0>

In [16]:
# Save the model to disk.
model.save_weights('cnn4.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## 4. Messing with convolution parameters 

In [17]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x17925fba9d0>

In [18]:
# Save the model to disk.
model.save_weights('cnn2.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


# Personal experiments

In [19]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
    MaxPooling2D(pool_size=pool_size),
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x17923115d30>

In [21]:
# Save the model to disk.
model.save_weights('cnn5.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 8 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## Experimento 2

In [5]:
train_images = mnist.train_images()
train_labels = mnist.train_labels()
test_images = mnist.test_images()
test_labels = mnist.test_labels()

In [6]:
# Normalize the images.
##train_images = (train_images / 255) - 0.5
##test_images = (test_images / 255) - 0.5

# Reshape the images.
##train_images = np.expand_dims(train_images, axis=3)
##test_images = np.expand_dims(test_images, axis=3)

num_filters = 8
filter_size = 3
pool_size = 2

In [10]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1cc0390ec70>

In [11]:
# Save the model to disk.
model.save_weights('cnn6.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 5 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## Experimento 3
Adding one more filter (num_filters)

In [12]:
# Normalize the images.
#train_images = (train_images / 255) - 0.5
#test_images = (test_images / 255) - 0.5

# Reshape the images.
##train_images = np.expand_dims(train_images, axis=3)
##test_images = np.expand_dims(test_images, axis=3)

num_filters = 9
filter_size = 3
pool_size = 2

In [13]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1cc1e693880>

In [14]:
# Save the model to disk.
model.save_weights('cnn7.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## Experimento 4
Adding 3 more filter (num_filters=12)

filter_size and pool size not modified because we think those sizes are perfect since their objective is to detect image features.

In [15]:
# Normalize the images.
#train_images = (train_images / 255) - 0.5
#test_images = (test_images / 255) - 0.5

# Reshape the images.
##train_images = np.expand_dims(train_images, axis=3)
##test_images = np.expand_dims(test_images, axis=3)

num_filters = 12
filter_size = 3
pool_size = 2

In [17]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=3,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x1cc00210940>

In [18]:
# Save the model to disk.
model.save_weights('cnn8.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 5 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


## Experimento 5
filter_size and pool size not modified because we think those sizes are perfect since their objective is to detect image features.

Incremented epochs to 10.

In [23]:
# Normalize the images.
#train_images = (train_images / 255) - 0.5
#test_images = (test_images / 255) - 0.5

# Reshape the images.
##train_images = np.expand_dims(train_images, axis=3)
##test_images = np.expand_dims(test_images, axis=3)

num_filters = 12
filter_size = 3
pool_size = 2

In [24]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='binary_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=10,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1cc19ad38b0>

In [26]:
# Save the model to disk.
model.save_weights('cnn9.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 5 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


In [None]:
tensorflow.keras.losses.MeanSquaredError(reduction="auto", name="mean_squared_error")

## Experimento 6

In [28]:
# Normalize the images.
#train_images = (train_images / 255) - 0.5
#test_images = (test_images / 255) - 0.5

# Reshape the images.
##train_images = np.expand_dims(train_images, axis=3)
##test_images = np.expand_dims(test_images, axis=3)

num_filters = 12
filter_size = 3
pool_size = 2

In [29]:
# Build the model.
model = Sequential([
  Conv2D(
    num_filters,
    filter_size,
    input_shape=(28, 28, 1),
    strides=2,    padding='same',    activation='relu',  ), #Modified and added parameters
  Conv2D(num_filters, filter_size), #Layer added
  Conv2D(num_filters, filter_size), #Layer added
  MaxPooling2D(pool_size=pool_size),
  Dropout(0.5), #Dropout layer
  Flatten(),
  Dense(64, activation='relu'), #Fully connected layer
  Dense(10, activation='softmax'),
])

# Compile the model.
model.compile(
  'adam',
  loss='categorical_crossentropy',
  metrics=['accuracy'],
)

# Train the model.
model.fit(
  train_images,
  to_categorical(train_labels),
  epochs=10,
  validation_data=(test_images, to_categorical(test_labels)),
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1cc041be940>

In [31]:
# Save the model to disk.
model.save_weights('cnn10.h5')

# Load the model from disk later using:
# model.load_weights('cnn.h5')

# Predict on the first 5 test images.
predictions = model.predict(test_images[:20])

# Print our model's predictions.
print(np.argmax(predictions, axis=1))

# Check our predictions against the ground truths.
print(test_labels[:20])

[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]


# Implementing a Convolutional Neural Network

Basandonos en el código de https://victorzhou.com/blog/keras-cnn-tutorial/ los primeros resultados obtenidos para el "val_accuracy" fueron entre 95% y 97%

 -- Resultados obtenidos ejecutando el notebook en dos computadoras.

Con los "Recommended Experiments" del tutorial, fuimos variando distintos valores, parámetros, capas, por separado en distintos experimentos:

- Varying network depth: Se agrego una capa de convolución 2D obteniendo un "val_accuracy" maximo de 0.9727 y 0.9764 en cada computadora respectivamente.
  
- Adding Dropout layers: Obtuvimos un "val_accuracy" maximo de 0.9534 y 0.9573 en cada computadora respectivamente.

- Adding Fully-connected Layers: Con la función de activación "Relu", obtuvimos un "val_accuracy" maximo de 0.9782 y 0.9774 en cada computadora respectivamente.

- Messing with convolution parameters: Añadimos varios parametros a la capa de convolución 2D, obteniendo un "val_accuracy" maximo de 0.9587 en ambas computadoras.

Haciendo experimentos propios de la misma manera variando valores, parámetros, capas, etc, y juntando varios de estos parametros en un mismo modelo, tenemos los siguientes resultados:

- Experimento 1: Sin dropout obtuvimos un "val_accuracy" maximo de : 0.9800 y 0.9830
- Experimento 2: Con dropout y manteniendo los demas parametros, obtuvimos un "val_accuracy" maximo de : 0.9818 y 0.9821
- Experimento 3: Añadiendo un filtro (num_filters = 9) y manteniendo los anteriores parametros obtuvimos un "val_accuracy" maximo de : 0.9850 y 0.9822
- Experimento 4: Añadiendo 3 filtros más (num_filters = 12), filter_size y pool size no se modifican porque creemos que esos tamaños son perfectos ya que su objetivo es detectar características de la imagen. Obtuvimos un "val_accuracy" maximo de: 0.9859 y 0.9851

  -- Observacion: hasta este punto los mejores valores de "val_accuracy" fueron obtenidos en el 3er epoch

- Experimento 5: Cambiando el loss_function a 'binary_crossentropy' y el número de epochs a 10 y manteniendo los anteriores parametros, obtuvimos: 0.9907 (Epoch 9) y 0.9908 (Epoch 10)
- Experimento 6: Cambiando el loss_function a 'categorical_crossentropy' y manteniendo los anteriores parametros, obtuvimos 0.9923 (Epoch 7) y 0.9917 (Epoch 10)
