# TensorFlow crash course
### **PART 7**

## Custom training loops

### Load MNIST

In [1]:
import numpy as np 
import tensorflow as tf
from tensorflow.keras.datasets import mnist 
from tensorflow.keras.utils import to_categorical 

(X_train, y_train), (X_test, y_test) = mnist.load_data() 
X_train, X_test = X_train[:10000], X_test[:1000]
y_train, y_test = y_train[:10000], y_test[:1000]
X_train, X_test = X_train.reshape(-1, 28*28) / 255.0, X_test.reshape(-1, 28*28) / 255.0
y_train, y_test = to_categorical(y_train, 10), to_categorical(y_test, 10)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(10000, 784) (1000, 784) (10000, 10) (1000, 10)


In [2]:
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Nadam
from tensorflow.keras.models import Sequential
from tensorflow.keras.losses import CategoricalCrossentropy

#  Implement a simple classification model

model = Sequential()
model.add(Dense(128, input_shape=(784, ), activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dense(10, activation="softmax"))                  
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_2 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_3 (Dense)              (None, 10)                330       
Total params: 111,146
Trainable params: 111,146
Non-trainable params: 0
_________________________________________________________________


In [3]:
def random_batch(X, y, batch_size=32): #  Select batches randomly
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]

In [4]:
n_epochs = 10
batch_size = 32
n_steps = len(X_train) // batch_size
optimizer = Nadam(learning_rate=0.01)
loss_fn = CategoricalCrossentropy()

In [5]:
for epoch in range(1, n_epochs + 1): #  For epochs
    print(f"Epoch {epoch}/{n_epochs}") 
    losses = []
    for step in range(1, n_steps + 1): #  For batches
        X_batch, y_batch = random_batch(X_train, y_train)
        with tf.GradientTape() as tape:
            y_pred = model(X_batch, training=True) #  Make prediction
            loss = loss_fn(y_batch, y_pred) #  Compute the loss
            losses.append(loss)
            '''Note : model.trainable_variables and model.trainable_weights are almost
            the same except in some special layers like BatchNomalization'''
        gradients = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(gradients, model.trainable_weights))
    print(f"loss : {tf.reduce_mean(losses)}")
    losses = []

Epoch 1/10
loss : 0.4633212387561798
Epoch 2/10
loss : 0.21464140713214874
Epoch 3/10
loss : 0.1655184030532837
Epoch 4/10
loss : 0.1328078806400299
Epoch 5/10
loss : 0.1285111904144287
Epoch 6/10
loss : 0.11869336664676666
Epoch 7/10
loss : 0.1372356116771698
Epoch 8/10
loss : 0.09185465425252914
Epoch 9/10
loss : 0.07806721329689026
Epoch 10/10
loss : 0.09695207327604294
