In [57]:
import logging
import tensorflow as tf
import keras

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
log = logging.getLogger()

%config Completer.use_jedi = False # make autocompletion works in jupyter

tf.__version__

'2.4.1'

In [58]:
import numpy as np
import pandas as pd
from pathlib import Path

import argparse
args = argparse.Namespace()
args.raw_train = pd.read_csv('../data/train.csv.zip')
args.raw_test = pd.read_csv('../data/test.csv.zip')
args.predictions_folder = Path('../predictions')
args.n_splits = 5
args.n_jobs = 3
args.seed = 101
args.val_fraction=0.1
args.epochs = 50

args.train = args.raw_train.iloc[:, 1:].copy()
args.labels = args.raw_train['label'].copy()
args.test = args.raw_test.copy()


args.predictions_folder.mkdir(parents=True, exist_ok=True) 

In [59]:
X = args.train.to_numpy().reshape(args.train.shape[0], 28, 28, 1)
y = pd.get_dummies(args.labels, prefix='label').to_numpy()
y_sparse = args.labels.to_numpy()
x = args.test.to_numpy().reshape(args.test.shape[0], 28, 28, 1)

In [60]:
t = tf.constant([[1, 2], [3, 4], [5, 6]])
tf.square(t)
keras.backend.transpose(t)
t + tf.cast(tf.constant(1.), tf.int32)
v = tf.Variable([[1, 2], [3, 4], [5, 6]])
v[0,0].assign(100)
tf.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
tf.constant(['019A'])
# tf.strings.unicode_decode(tf.constant(['019A']), input_encoding='UTF-8')

<tf.Tensor: shape=(1,), dtype=string, numpy=array([b'019A'], dtype=object)>

### Custom training loops

In [155]:
import math

def build_simple_model():
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=(28, 28)))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(300, activation='elu', kernel_initializer='he_normal'))
    model.add(keras.layers.Dense(10, activation='softmax')) # !
    return model

def custom_training_loop_my():
    def shuffle(X, y):
        assert len(X) == len(y)
        idx = np.random.permutation(len(X))
        return X[idx], y[idx]
    
    n_epochs = 5
    batch_size = 32
    optimizer=keras.optimizers.Nadam(learning_rate=0.001)
    loss_fn = keras.losses.sparse_categorical_crossentropy # try class
    metrics=[keras.metrics.SparseCategoricalAccuracy()] # try func
    model = build_simple_model()
    
    # compute grad, autodiff
    # perform grad step
    # loss, metric
    for epoch in range(1, n_epochs + 1):
        log.info(f'Epoch {epoch}...')
        n_steps = len(X) // 32 + 1
        X_shuffled, y_shuffled = shuffle(X, y_sparse)
        for step in range(1, n_steps + 1):
            ceiling = step * batch_size if step * batch_size < len(X_shuffled) else len(X_shuffled)
            floor = (step - 1) * batch_size
            X_batch, y_batch = X_shuffled[floor: ceiling], y_shuffled[floor: ceiling]
            with tf.GradientTape() as tape:
                y_pred = model(X_batch, training=True)
                losses = loss_fn(y_batch, y_pred, from_logits=False)
            trainable_weights = [weight for l in model.layers for weight in l.trainable_variables]
            grads = tape.gradient(losses, trainable_weights)
            optimizer.apply_gradients(zip(grads, trainable_weights))
            for metric in metrics:
                metric.update_state(y_true=y_batch, y_pred=y_pred)
        log.info(f'Accuracy: {metrics[0].result():.{5}f}')
            
custom_training_loop_my()

2021-04-05 18:14:03,839 : INFO : Epoch 1...


KeyboardInterrupt: 

In [159]:
def custom_training_loop_revised_by_the_book():
    def shuffle(X, y):
        assert len(X) == len(y)
        idx = np.random.permutation(len(X))
        return X[idx], y[idx]
    
    n_epochs = 5
    batch_size = 32
    optimizer=keras.optimizers.Nadam(learning_rate=0.001)
    loss_fn = keras.losses.sparse_categorical_crossentropy
    metrics=[keras.metrics.SparseCategoricalAccuracy()]
    model = build_simple_model()
    
    # compute grad, autodiff
    # perform grad step
    # loss, metric
    for epoch in range(1, n_epochs + 1):
        log.info(f'Epoch {epoch}...')
        n_steps = len(X) // 32 + 1
        X_shuffled, y_shuffled = shuffle(X, y_sparse)
        for step in range(1, n_steps + 1):
            ceiling = step * batch_size if step * batch_size < len(X_shuffled) else len(X_shuffled)
            floor = (step - 1) * batch_size
            X_batch, y_batch = X_shuffled[floor: ceiling], y_shuffled[floor: ceiling]
            with tf.GradientTape() as tape:
                y_pred = model(X_batch, training=True)
                main_losses = loss_fn(y_batch, y_pred, from_logits=False)
                main_mean_loss = tf.reduce_mean(main_losses)
                regularization_loss = model.losses
                loss = tf.add_n([main_mean_loss] + regularization_loss)
            print(model.trainable_variables)
            print('///')
            print(model.trainable_weights)
            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, trainable_weights))
            for metric in metrics:
                metric.update_state(y_true=y_batch, y_pred=y_pred)
            break
        log.info(f'Accuracy: {metrics[0].result():.{5}f}')
            
custom_training_loop_revised_by_the_book()

2021-04-05 18:31:57,803 : INFO : Epoch 1...


[<tf.Variable 'dense_132/kernel:0' shape=(784, 300) dtype=float32, numpy=
array([[ 0.07370681,  0.07299586, -0.07241157, ...,  0.02127581,
         0.10312156,  0.10082609],
       [-0.029597  , -0.04897328,  0.03768064, ...,  0.02325827,
        -0.02766708,  0.08355017],
       [ 0.08120636, -0.02418085,  0.04974856, ..., -0.07323386,
        -0.00850875, -0.02393488],
       ...,
       [-0.0574012 , -0.03420052, -0.02062207, ...,  0.00929724,
        -0.04794338,  0.08134733],
       [-0.0001684 , -0.00425973,  0.03449818, ..., -0.04467567,
         0.08932037,  0.01097009],
       [-0.08497547, -0.02800078, -0.01991194, ...,  0.07249659,
         0.03787834,  0.01467388]], dtype=float32)>, <tf.Variable 'dense_132/bias:0' shape=(300,) dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
    

NameError: name 'trainable_weights' is not defined

In [126]:
np.zeros((2, 3)) + 1
np.array([[3, 4], [1, 2]])

array([[3, 4],
       [1, 2]])

In [138]:
tf.constant([[3, 4], [1, 2]]).numpy()
tf.Variable([[3, 4], [1, 2]]).numpy()
tf.convert_to_tensor(np.array([[3, 4], [1, 2]])).numpy()

<bound method _EagerTensorBase.numpy of <tf.Tensor: shape=(2, 2), dtype=int64, numpy=
array([[3, 4],
       [1, 2]])>>

In [153]:
[1, 2] + [3]

[1, 2, 3]