In [171]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras import regularizers

from keras import backend as K 

import matplotlib.pyplot as plt

K.clear_session()

In [172]:
from keras import backend as K
K.clear_session()

### Setting Up GPU as a training device

In [173]:
physical_devices = tf.config.list_physical_devices()

print(physical_devices)

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [174]:
tf.config.set_visible_devices(physical_devices[1], 'GPU')

### Loading Data

In [175]:
N_TRAIN_EXAMPLES = 60000
N_VALIDATION_EXAMPLES = 0
N_TEST_EXAMPLES = 10000

CLASSES=10

In [176]:

from sklearn.model_selection import train_test_split

(X_train, Y_train), (X_test, Y_test) = keras.datasets.mnist.load_data()

X_train

# Concatenate train and test images
X = np.concatenate((X_train, X_test))
y = np.concatenate((Y_train, Y_test))

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=N_TRAIN_EXAMPLES+N_VALIDATION_EXAMPLES, test_size=N_TEST_EXAMPLES, random_state=1)

### Setting up the model

AlexNet implementation in keras:

```py
  keras.Input(shape=(227, 227, 3)),
  layers.Conv2D(96, kernel_size=11, strides=4, padding='same', activation="relu"),
  layers.MaxPooling2D(pool_size=3, strides=2),
  layers.Conv2D(256, kernel_size=5, padding='same', activation="relu"),
  layers.MaxPooling2D(pool_size=3, strides=2),
  layers.Conv2D(384, kernel_size=3, padding='same', activation="relu"),
  layers.Conv2D(384, kernel_size=3, padding='same', activation="relu"),
  layers.Conv2D(256, kernel_size=3, padding='same', activation="relu"),
  layers.MaxPooling2D(pool_size=3, strides=2),
  layers.Dropout(0.5),
  layers.Dense(4096, activation="relu"),
  layers.Dropout(0.5),
  layers.Dense(4096, activation="relu"),
  layers.Dense(CLASSES, activation="softmax")
```

In [177]:
conv_regularizer = regularizers.l2(l=0.01)
dense_regularizer = regularizers.l2(l=0.01)
normalization_momentum = 0.9

model = keras.Sequential(
  [
    keras.Input(shape=(28, 28, 1)),
    layers.BatchNormalization(axis=1, momentum=normalization_momentum),
    layers.Conv2D(28, kernel_size=2, strides=1, padding='same', activation="relu", kernel_regularizer=conv_regularizer),
    layers.BatchNormalization(axis=1, momentum=normalization_momentum),
    layers.MaxPooling2D(pool_size=2, strides=1),
    layers.BatchNormalization(axis=1, momentum=normalization_momentum),
    # layers.Dropout(0.4),
    layers.Conv2D(48, kernel_size=3, strides=2, padding='same', activation="relu", kernel_regularizer=conv_regularizer),
    layers.BatchNormalization(axis=1, momentum=normalization_momentum),
    layers.MaxPooling2D(pool_size=2, strides=1),
    layers.BatchNormalization(axis=1, momentum=normalization_momentum),
    layers.Flatten(),
    # layers.Dropout(0.4),
    layers.Dense(10, activation="relu", kernel_regularizer=dense_regularizer),
    layers.BatchNormalization(axis=1, momentum=normalization_momentum),
    layers.Dense(1, activation="softmax"),    
  ]
)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization (BatchN  (None, 28, 28, 1)        112       
 ormalization)                                                   
                                                                 
 conv2d (Conv2D)             (None, 28, 28, 28)        140       
                                                                 
 batch_normalization_1 (Batc  (None, 28, 28, 28)       112       
 hNormalization)                                                 
                                                                 
 max_pooling2d (MaxPooling2D  (None, 27, 27, 28)       0         
 )                                                               
                                                                 
 batch_normalization_2 (Batc  (None, 27, 27, 28)       108       
 hNormalization)                                        

In [178]:
from keras import optimizers

optimizer = optimizers.Adam()

model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

### Training model

In [179]:
CHECKPOINT_PATH = 'models/checkpoints/mnist_best.keras'

In [180]:
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
  filepath=CHECKPOINT_PATH,
  save_weights_only=False,
  monitor='val_accuracy',
  mode='max',
  save_best_only=True)

In [181]:
%%time
batch_size = 200
epochs = 10
validation_split = 0.2
history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.2, callbacks=[model_checkpoint_callback])

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10

KeyboardInterrupt: 

In [None]:
# Evaluate the best checkpoint

best_checkpoint = keras.models.load_model(CHECKPOINT_PATH)

loss, score = best_checkpoint.evaluate(X_test, y_test)

print(loss, score)

In [None]:
# Plotting the loss curve
plt.figure(figsize=[6, 4])
plt.plot(history.history['loss'], 'black', linewidth=2.0)
plt.plot(history.history['val_loss'], 'blue', linewidth=2.0)
plt.legend(['Training Loss', 'Validation Loss'], fontsize=12)
plt.xlabel('Epochs', fontsize=10)
plt.ylabel('Loss', fontsize=10)
plt.title('Loss Curves', fontsize=12)

In [None]:
# Plotting the accuracy curve
plt.figure(figsize=[6, 4])
plt.plot(history.history['accuracy'], 'black', linewidth=2.0)
plt.plot(history.history['val_accuracy'], 'blue', linewidth=2.0)
plt.legend(['Training Accuracy', 'Validation Accuracy'], fontsize=12)
plt.xlabel('Epochs', fontsize=10)
plt.ylabel('Accuracy', fontsize=10)
plt.title('Accuracy Curves', fontsize=12)

### Saving model to file

In [None]:
# model.save("models/cifar10_test_model_L_v1.0.2.keras")

In [None]:
import json

with open('models/checkpoints/cifar10_test_model_L_v1_best_history.json', 'w') as f:
  json.dump(history.history, f)