In [1]:
import logging
import tensorflow as tf
import keras

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
log = logging.getLogger()

%config Completer.use_jedi = False # make autocompletion works in jupyter

tf.__version__

'2.4.1'

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path

import argparse
args = argparse.Namespace()
args.raw_train = pd.read_csv('../data/train.csv.zip')
args.raw_test = pd.read_csv('../data/test.csv.zip')
args.predictions_folder = Path('../predictions')
args.n_splits = 5
args.n_jobs = 3
args.seed = 101
args.val_fraction=0.1
args.epochs = 50

args.train = args.raw_train.iloc[:, 1:].copy()
args.labels = args.raw_train['label'].copy()
args.test = args.raw_test.copy()


args.predictions_folder.mkdir(parents=True, exist_ok=True) 

In [3]:
X = args.train.to_numpy().reshape(args.train.shape[0], 28, 28, 1)
y = pd.get_dummies(args.labels, prefix='label').to_numpy()
y_sparse = args.labels.to_numpy()
x = args.test.to_numpy().reshape(args.test.shape[0], 28, 28, 1)

In [4]:
t = tf.constant([[1, 2], [3, 4], [5, 6]])
tf.square(t)
keras.backend.transpose(t)
t + tf.cast(tf.constant(1.), tf.int32)
v = tf.Variable([[1, 2], [3, 4], [5, 6]])
v[0,0].assign(100)
tf.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
tf.constant(['019A'])
# tf.strings.unicode_decode(tf.constant(['019A']), input_encoding='UTF-8')

<tf.Tensor: shape=(1,), dtype=string, numpy=array([b'019A'], dtype=object)>

### Custom training loops

In [5]:
import math

def build_simple_model():
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=(28, 28)))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(300, activation='elu', kernel_initializer='he_normal'))
    model.add(keras.layers.Dense(10, activation='softmax'))
    return model

def custom_training_loop_my():
    def shuffle(X, y):
        assert len(X) == len(y)
        idx = np.random.permutation(len(X))
        return X[idx], y[idx]
    
    n_epochs = 5
    batch_size = 32
    optimizer=keras.optimizers.Nadam(learning_rate=0.001)
    loss_fn = keras.losses.sparse_categorical_crossentropy
    metrics=[keras.metrics.SparseCategoricalAccuracy()]
    model = build_simple_model()
    
    # compute grad, autodiff
    # perform grad step
    # loss, metric
    for epoch in range(1, n_epochs + 1):
        log.info(f'Epoch {epoch}...')
        n_steps = len(X) // 32 + 1
        X_shuffled, y_shuffled = shuffle(X, y_sparse)
        for step in range(1, n_steps + 1):
            ceiling = step * batch_size if step * batch_size < len(X_shuffled) else len(X_shuffled)
            floor = (step - 1) * batch_size
            X_batch, y_batch = X_shuffled[floor: ceiling], y_shuffled[floor: ceiling]
            with tf.GradientTape() as tape:
                y_pred = model(X_batch, training=True)
                losses = loss_fn(y_batch, y_pred, from_logits=False)
            trainable_weights = [weight for l in model.layers for weight in l.trainable_variables]
            grads = tape.gradient(losses, trainable_weights)
            optimizer.apply_gradients(zip(grads, trainable_weights))
            for metric in metrics:
                metric.update_state(y_true=y_batch, y_pred=y_pred)
        log.info(f'Accuracy: {metrics[0].result():.{5}f}')
            
custom_training_loop_my()

2021-04-07 13:42:16,022 : INFO : Epoch 1...


KeyboardInterrupt: 

In [32]:
from tqdm import tqdm

def custom_training_loop_revised_by_the_book():
    def shuffle(X, y):
        assert len(X) == len(y)
        idx = np.random.permutation(len(X))
        return X[idx], y[idx]
    def print_val(model, X_val, y_sparse_val, loss_cl, metrics):
        y_pred = model.predict(X_val)
        loss = loss_cl(y_sparse_val, y_pred)
        for m in metrics:
            m(y_sparse_val, y_pred)
        metrics_str = '; '.join([f'{m.name}: {m.result():.{5}f}' for m in metrics])
        log.info(f'Validation loss: {loss:.{5}f}. Validation metrics: {metrics_str}')
    
    n_epochs = 5
    batch_size = 32
    optimizer=keras.optimizers.Nadam(learning_rate=0.001)
    loss_fn = keras.losses.sparse_categorical_crossentropy
    loss_cl = keras.losses.SparseCategoricalCrossentropy()
    metrics=[keras.metrics.SparseCategoricalAccuracy()]
    model = build_simple_model()
    
    split_border_idx = int(len(X) * (1 - args.val_fraction))
    X_train = X[:split_border_idx]
    y_sparse_train = y_sparse[:split_border_idx]
    X_val = X[split_border_idx:]
    y_sparse_val = y_sparse[split_border_idx:]
    
    # compute grad, autodiff
    # perform grad step
    # loss, metric
    for epoch in range(1, n_epochs + 1):
        log.info(f'Epoch {epoch}...')
        X_shuffled, y_shuffled = shuffle(X_train, y_sparse_train)
        n_steps = len(X_shuffled) // batch_size + 1
        pbar = tqdm(range(1, n_steps + 1))
        for step in pbar:
            ceiling = step * batch_size if step * batch_size < len(X_shuffled) else len(X_shuffled)
            floor = (step - 1) * batch_size
            # shuffle samples for better backpropagation
            X_batch, y_batch = X_shuffled[floor: ceiling], y_shuffled[floor: ceiling]
            # compute grad
            with tf.GradientTape() as tape:
                y_pred = model(X_batch, training=True)
                main_losses = loss_fn(y_batch, y_pred, from_logits=False)
                main_mean_loss = tf.reduce_mean(main_losses)
                regularization_loss = model.losses
                loss = tf.add_n([main_mean_loss] + regularization_loss)
            # autodiff
            grads = tape.gradient(loss, model.trainable_variables)
            # make a step
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            # check metrics
            for m in metrics:
                m(y_batch, y_pred)
            if step % 10 == 0: 
                metrics_str = '; '.join([f'{m.name}: {m.result():.{5}f}' for m in metrics])
                pbar.set_postfix({'Loss': f'{loss:.{5}f}', 'Metrics': f'{metrics_str}'})
        print_val(model, X_val, y_sparse_val, loss_cl, metrics)
        
custom_training_loop_revised_by_the_book()

2021-04-08 16:36:16,409 : INFO : Epoch 1...
100%|██████████| 1182/1182 [00:12<00:00, 91.76it/s, Loss=0.41453, Metrics=sparse_categorical_accuracy: 0.89023]
2021-04-08 16:36:29,913 : INFO : Validation loss: 0.83410. Validation metrics: sparse_categorical_accuracy: 0.89279
2021-04-08 16:36:29,914 : INFO : Epoch 2...
100%|██████████| 1182/1182 [00:12<00:00, 95.17it/s, Loss=0.10251, Metrics=sparse_categorical_accuracy: 0.91384]
2021-04-08 16:36:42,934 : INFO : Validation loss: 0.58041. Validation metrics: sparse_categorical_accuracy: 0.91439
2021-04-08 16:36:42,935 : INFO : Epoch 3...
100%|██████████| 1182/1182 [00:12<00:00, 93.11it/s, Loss=0.40746, Metrics=sparse_categorical_accuracy: 0.92462]
2021-04-08 16:36:56,237 : INFO : Validation loss: 0.40640. Validation metrics: sparse_categorical_accuracy: 0.92518
2021-04-08 16:36:56,237 : INFO : Epoch 4...
100%|██████████| 1182/1182 [00:12<00:00, 94.15it/s, Loss=0.01300, Metrics=sparse_categorical_accuracy: 0.93111]
2021-04-08 16:37:09,432 : IN

### Custom layer

In [35]:
import keras

class LayerNormalizationLayer(keras.layers.Layer):
    
    def build(self, batch_input_shape):
        log.info(f'batch_input_shape: {batch_input_shape}')
        self.alpha = self.add_weight(name='alpha', shape=batch_input_shape[-1:], dtype=tf.float32, initializer='ones')
        self.beta = self.add_weight(name='beta', shape=batch_input_shape[-1:], dtype=tf.float32, initializer='zeros')
        super().build(batch_input_shape)
    
    def call(self, X):
        mean, variance = tf.nn.moments(X, axes=-1, keepdims=True)
        # it's preferable to compute tf.sqrt(variance + self.eps) rather than tf.sqrt(variance) + self.eps. 
        # Indeed, the derivative of sqrt(z) is undefined when z=0, so training will bomb whenever the variance
        # vector has at least one component equal to 0. Adding ε within the square root guarantees it'll not happen 
        eps = 1e-3
        return self.alpha * (X - mean) / (tf.sqrt(variance + eps)) + self.beta

lnl = LayerNormalizationLayer()
ln = tf.keras.layers.LayerNormalization()

test = tf.constant([[0.1, 0.2], [0.3, 0.4]])
log.info(lnl(test))
log.info(ln(test))

2021-04-11 10:25:16,397 : INFO : batch_input_shape: (2, 2)
2021-04-11 10:25:16,404 : INFO : tf.Tensor(
[[-0.84515435  0.84515417]
 [-0.8451545   0.845154  ]], shape=(2, 2), dtype=float32)
2021-04-11 10:25:16,420 : INFO : tf.Tensor(
[[-0.84515435  0.8451542 ]
 [-0.84515446  0.845154  ]], shape=(2, 2), dtype=float32)
