In [92]:
%load_ext autoreload
%autoreload 2

In [116]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

In [146]:
train_data, test_data = tf.keras.datasets.mnist.load_data(path="mnist.npz")
x_train = train_data[0].reshape(-1,784).astype(float) / 255
y_train = train_data[1]
x_test = test_data[0].reshape(-1,784).astype(float) / 255
y_test = test_data[1]

In [147]:
np.random.seed(42)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, shuffle=True)
# y_train, y_val = train_test_split(y_train, test_size=0.1, shuffle=True)


In [148]:
print(len(x_train), len(x_val), len(x_test))

54000 6000 10000


In [149]:
# source: https://keras.io/guides/training_with_built_in_methods/

inputs = keras.Input(shape=(784,), name="digits")
x = layers.Dense(32, activation="relu", name="dense_1")(inputs)
x = layers.Dense(32, activation="relu", name="dense_2")(x)
outputs = layers.Dense(10, activation=None, name="predictions")(x)
model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")
loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
#def loss(true, pred):
#    val = 0.
#    for i in range(len(true)):
#        val -= np.log(pred[i, true[i]])
#    return val / len(true)
model.summary()

Model: "mnist_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 digits (InputLayer)         [(None, 784)]             0         
                                                                 
 dense_1 (Dense)             (None, 32)                25120     
                                                                 
 dense_2 (Dense)             (None, 32)                1056      
                                                                 
 predictions (Dense)         (None, 10)                330       
                                                                 
Total params: 26506 (103.54 KB)
Trainable params: 26506 (103.54 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [150]:
# untrained model
logits = model(x_train)
np.argmax(logits, 1)[0:100]

array([0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 6, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0])

In [151]:
# what we would like to get
y_train[0:100]

array([1, 6, 6, 7, 8, 4, 2, 7, 7, 6, 7, 3, 6, 9, 6, 1, 2, 1, 1, 0, 6, 3,
       0, 5, 3, 9, 2, 4, 1, 3, 3, 8, 7, 2, 0, 8, 5, 2, 5, 5, 6, 1, 1, 2,
       2, 7, 6, 6, 4, 2, 1, 1, 7, 2, 0, 4, 9, 4, 6, 5, 9, 2, 3, 0, 0, 0,
       6, 4, 1, 6, 2, 9, 4, 3, 3, 9, 7, 8, 9, 5, 3, 4, 4, 6, 1, 2, 9, 4,
       5, 6, 3, 1, 9, 4, 0, 9, 0, 3, 3, 4], dtype=uint8)

In [152]:
## Computing a stochastic gradient

# Source: https://keras.io/guides/writing_a_training_loop_from_scratch/

# Example of gradient computation using automatic differentiation

x_train_batch = x_train[:8]
y_train_batch = y_train[:8]
with tf.GradientTape() as tape:
    # Run the forward pass of the layer.
    # The operations that the layer applies
    # to its inputs are going to be recorded
    # on the GradientTape.
    logits = model(x_train_batch)  # Logits for this minibatch

    # Compute the loss value for this minibatch.
    loss_value = loss(y_train_batch, logits)

# Use the gradient tape to automatically retrieve
# the gradients of the trainable variables with respect to the loss.
grad = tape.gradient(loss_value, model.trainable_weights)

## Updating weights

def update_weights(model, update):
    new_weights = model.trainable_weights.copy()
    for i in range(len(new_weights)):
        new_weights[i] = new_weights[i] + update[i]
    model.set_weights(new_weights)


## Evaluate model
samples_for_evaluation = np.random.randint(0, 10000, 1000)
logits = model(x_train[samples_for_evaluation])
loss(y_train[samples_for_evaluation], logits).numpy()
    

2.3407905

In [153]:
from tqdm import tqdm

In [154]:
alpha = 1e-3

In [155]:
def loop(x_train,y_train, epochs=1000, alpha=1e-3, batch_size=128):
    train_losses = []
    val_losses = []
    for epoch in range(epochs):
        train_loss = 0
        for i in tqdm(range(0, x_train.shape[0], batch_size)):
            x_train_batch = x_train[i:i+batch_size]
            y_train_batch = y_train[i:i+batch_size]
            with tf.GradientTape() as tape:
            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are going to be recorded
            # on the GradientTape.
                logits = model(x_train_batch)  # Logits for this minibatch

                # Compute the loss value for this minibatch.
                loss_value = loss(y_train_batch, logits)

            # Use the gradient tape to automatically retrieve
            # the gradients of the trainable variables with respect to the loss.
            grad = tape.gradient(loss_value, model.trainable_weights)
            update_weights(model,  [-alpha * g for g in grad])
            train_loss += loss_value.numpy() * len(x_train_batch)
        

        logits = model(x_val)
        val_loss = loss(y_val, logits).numpy()
        train_losses.append(train_losses)
        val_losses.append(val_loss)
        print(f"Epoch {epoch}: train_loss = {train_loss / len(x_train)}, val_loss = {val_loss}")

    return train_losses, val_losses

In [156]:
loop(x_train, y_train, alpha=1e-1)

  0%|          | 0/422 [00:00<?, ?it/s]

100%|██████████| 422/422 [00:03<00:00, 134.61it/s]


Epoch 0: train_loss = 0.5576892976672561, val_loss = 0.2911528944969177


100%|██████████| 422/422 [00:03<00:00, 138.56it/s]


Epoch 1: train_loss = 0.2636889814933141, val_loss = 0.22873498499393463


100%|██████████| 422/422 [00:02<00:00, 141.99it/s]


Epoch 2: train_loss = 0.21227641629289698, val_loss = 0.19601862132549286


100%|██████████| 422/422 [00:02<00:00, 143.03it/s]


Epoch 3: train_loss = 0.17963848579812933, val_loss = 0.17466990649700165


100%|██████████| 422/422 [00:02<00:00, 142.52it/s]


Epoch 4: train_loss = 0.15668327428455706, val_loss = 0.15749308466911316


100%|██████████| 422/422 [00:03<00:00, 140.16it/s]


Epoch 5: train_loss = 0.13972837180119974, val_loss = 0.14732830226421356


100%|██████████| 422/422 [00:03<00:00, 139.33it/s]


Epoch 6: train_loss = 0.1267466254455072, val_loss = 0.13854371011257172


100%|██████████| 422/422 [00:02<00:00, 141.66it/s]


Epoch 7: train_loss = 0.11621285991757005, val_loss = 0.13304109871387482


100%|██████████| 422/422 [00:02<00:00, 142.88it/s]


Epoch 8: train_loss = 0.10754200982826727, val_loss = 0.12732470035552979


100%|██████████| 422/422 [00:02<00:00, 142.03it/s]


Epoch 9: train_loss = 0.10022160996551867, val_loss = 0.12310492247343063


100%|██████████| 422/422 [00:02<00:00, 141.07it/s]


Epoch 10: train_loss = 0.09399044301112493, val_loss = 0.11969570070505142


100%|██████████| 422/422 [00:02<00:00, 145.04it/s]


Epoch 11: train_loss = 0.08833422609722173, val_loss = 0.11645076423883438


100%|██████████| 422/422 [00:02<00:00, 143.38it/s]


Epoch 12: train_loss = 0.08321138916633747, val_loss = 0.11494147032499313


100%|██████████| 422/422 [00:02<00:00, 142.01it/s]


Epoch 13: train_loss = 0.07873139342776052, val_loss = 0.11511734127998352


100%|██████████| 422/422 [00:02<00:00, 142.48it/s]


Epoch 14: train_loss = 0.0747775954692452, val_loss = 0.11433117836713791


100%|██████████| 422/422 [00:02<00:00, 143.12it/s]


Epoch 15: train_loss = 0.07120463445010008, val_loss = 0.11299682408571243


100%|██████████| 422/422 [00:02<00:00, 142.42it/s]


Epoch 16: train_loss = 0.06790592925967993, val_loss = 0.11173362284898758


100%|██████████| 422/422 [00:03<00:00, 140.16it/s]


Epoch 17: train_loss = 0.06494210557694788, val_loss = 0.11091043800115585


100%|██████████| 422/422 [00:02<00:00, 142.96it/s]


Epoch 18: train_loss = 0.06221987408289203, val_loss = 0.11193273961544037


100%|██████████| 422/422 [00:02<00:00, 143.42it/s]


Epoch 19: train_loss = 0.05964339427705164, val_loss = 0.11105043441057205


100%|██████████| 422/422 [00:02<00:00, 143.46it/s]


Epoch 20: train_loss = 0.05720480127191102, val_loss = 0.11111991107463837


100%|██████████| 422/422 [00:02<00:00, 141.99it/s]


Epoch 21: train_loss = 0.05485468047470958, val_loss = 0.11056917160749435


100%|██████████| 422/422 [00:02<00:00, 141.95it/s]


Epoch 22: train_loss = 0.05272752586117497, val_loss = 0.11065635085105896


100%|██████████| 422/422 [00:02<00:00, 144.84it/s]


Epoch 23: train_loss = 0.050585573249944935, val_loss = 0.11037347465753555


100%|██████████| 422/422 [00:02<00:00, 142.06it/s]


Epoch 24: train_loss = 0.04863454449618304, val_loss = 0.11141843348741531


100%|██████████| 422/422 [00:03<00:00, 133.74it/s]


Epoch 25: train_loss = 0.046694919094995214, val_loss = 0.11220327764749527


100%|██████████| 422/422 [00:03<00:00, 135.38it/s]


Epoch 26: train_loss = 0.044761873135964074, val_loss = 0.11148402839899063


100%|██████████| 422/422 [00:03<00:00, 133.63it/s]


Epoch 27: train_loss = 0.04297049460035783, val_loss = 0.11335457116365433


100%|██████████| 422/422 [00:03<00:00, 137.85it/s]


Epoch 28: train_loss = 0.04110511695731569, val_loss = 0.11221466958522797


100%|██████████| 422/422 [00:03<00:00, 130.96it/s]


Epoch 29: train_loss = 0.03944330466511073, val_loss = 0.11418955773115158


100%|██████████| 422/422 [00:03<00:00, 136.76it/s]


Epoch 30: train_loss = 0.03780153607328733, val_loss = 0.11465596407651901


100%|██████████| 422/422 [00:03<00:00, 135.92it/s]


Epoch 31: train_loss = 0.03607945402408088, val_loss = 0.11606520414352417


100%|██████████| 422/422 [00:03<00:00, 137.35it/s]


Epoch 32: train_loss = 0.034495869820868524, val_loss = 0.11670166254043579


100%|██████████| 422/422 [00:03<00:00, 134.03it/s]


Epoch 33: train_loss = 0.032999468775259125, val_loss = 0.11691178381443024


100%|██████████| 422/422 [00:03<00:00, 136.90it/s]


Epoch 34: train_loss = 0.03161614460608474, val_loss = 0.11840306967496872


100%|██████████| 422/422 [00:04<00:00, 86.61it/s] 


Epoch 35: train_loss = 0.03019600029951996, val_loss = 0.12015260756015778


100%|██████████| 422/422 [00:02<00:00, 142.96it/s]


Epoch 36: train_loss = 0.028908736889009124, val_loss = 0.12033936381340027


100%|██████████| 422/422 [00:04<00:00, 85.53it/s] 


Epoch 37: train_loss = 0.02752893277147302, val_loss = 0.12164362519979477


100%|██████████| 422/422 [00:02<00:00, 141.88it/s]


Epoch 38: train_loss = 0.02645166484846009, val_loss = 0.1241089478135109


100%|██████████| 422/422 [00:04<00:00, 86.75it/s] 


Epoch 39: train_loss = 0.025296294763684272, val_loss = 0.12424009293317795


100%|██████████| 422/422 [00:03<00:00, 140.11it/s]


Epoch 40: train_loss = 0.024207438354138976, val_loss = 0.1258022040128708


100%|██████████| 422/422 [00:04<00:00, 86.29it/s] 


Epoch 41: train_loss = 0.023233045382080254, val_loss = 0.12662602961063385


100%|██████████| 422/422 [00:04<00:00, 86.61it/s] 


Epoch 42: train_loss = 0.022201394406457742, val_loss = 0.12899869680404663


100%|██████████| 422/422 [00:02<00:00, 143.58it/s]


Epoch 43: train_loss = 0.021241268930887736, val_loss = 0.1300019472837448


100%|██████████| 422/422 [00:03<00:00, 107.46it/s]


Epoch 44: train_loss = 0.020151988192013016, val_loss = 0.13114316761493683


100%|██████████| 422/422 [00:04<00:00, 86.23it/s] 


Epoch 45: train_loss = 0.019292659854999294, val_loss = 0.133144810795784


100%|██████████| 422/422 [00:04<00:00, 86.82it/s] 


Epoch 46: train_loss = 0.018423815212040034, val_loss = 0.13448739051818848


100%|██████████| 422/422 [00:03<00:00, 108.72it/s]


Epoch 47: train_loss = 0.017749448702291205, val_loss = 0.13652199506759644


100%|██████████| 422/422 [00:04<00:00, 87.04it/s] 


Epoch 48: train_loss = 0.016885214831818034, val_loss = 0.1379549354314804


100%|██████████| 422/422 [00:04<00:00, 86.78it/s] 


Epoch 49: train_loss = 0.016132430233061313, val_loss = 0.13948014378547668


100%|██████████| 422/422 [00:04<00:00, 87.01it/s] 


Epoch 50: train_loss = 0.015322676639865945, val_loss = 0.14031562209129333


100%|██████████| 422/422 [00:04<00:00, 87.08it/s] 


Epoch 51: train_loss = 0.014689484405848715, val_loss = 0.14178821444511414


100%|██████████| 422/422 [00:02<00:00, 141.03it/s]


Epoch 52: train_loss = 0.0139727190564628, val_loss = 0.14364659786224365


100%|██████████| 422/422 [00:04<00:00, 87.22it/s] 


Epoch 53: train_loss = 0.013253970627155569, val_loss = 0.1446199119091034


100%|██████████| 422/422 [00:05<00:00, 71.78it/s] 


Epoch 54: train_loss = 0.012561330766037658, val_loss = 0.1468716263771057


100%|██████████| 422/422 [00:04<00:00, 85.90it/s] 


Epoch 55: train_loss = 0.012061702706747585, val_loss = 0.1473756730556488


100%|██████████| 422/422 [00:04<00:00, 84.82it/s] 


Epoch 56: train_loss = 0.01152870019801237, val_loss = 0.14778506755828857


100%|██████████| 422/422 [00:04<00:00, 85.14it/s] 


Epoch 57: train_loss = 0.010845509670122907, val_loss = 0.14906133711338043


100%|██████████| 422/422 [00:05<00:00, 81.49it/s] 


Epoch 58: train_loss = 0.01036678871553805, val_loss = 0.1511555314064026


100%|██████████| 422/422 [00:04<00:00, 84.91it/s] 


Epoch 59: train_loss = 0.009944918279018666, val_loss = 0.15097714960575104


100%|██████████| 422/422 [00:06<00:00, 63.22it/s] 


Epoch 60: train_loss = 0.009493786349754643, val_loss = 0.1523677110671997


100%|██████████| 422/422 [00:04<00:00, 90.98it/s] 


Epoch 61: train_loss = 0.009021853705937112, val_loss = 0.1533328890800476


100%|██████████| 422/422 [00:05<00:00, 81.67it/s] 


Epoch 62: train_loss = 0.008538059627568281, val_loss = 0.1547957956790924


100%|██████████| 422/422 [00:05<00:00, 83.82it/s] 


Epoch 63: train_loss = 0.00815411880598576, val_loss = 0.15629081428050995


100%|██████████| 422/422 [00:04<00:00, 84.85it/s] 


Epoch 64: train_loss = 0.007712439779882078, val_loss = 0.15786884725093842


100%|██████████| 422/422 [00:05<00:00, 81.33it/s] 


Epoch 65: train_loss = 0.007354120419257217, val_loss = 0.1585610955953598


  6%|▋         | 27/422 [00:01<00:19, 20.57it/s] 


KeyboardInterrupt: 

In [None]:
## Evaluate model
samples_for_evaluation = np.random.randint(0, 10000, 1000)
logits = model(x_train[samples_for_evaluation])
loss(y_train[samples_for_evaluation], logits).numpy()
    

2.053952