In [57]:
import logging
import tensorflow as tf
import keras

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
log = logging.getLogger()

%config Completer.use_jedi = False # make autocompletion works in jupyter

tf.__version__

'2.4.1'

In [58]:
import numpy as np
import pandas as pd
from pathlib import Path

import argparse
args = argparse.Namespace()
args.raw_train = pd.read_csv('../data/train.csv.zip')
args.raw_test = pd.read_csv('../data/test.csv.zip')
args.predictions_folder = Path('../predictions')
args.n_splits = 5
args.n_jobs = 3
args.seed = 101
args.val_fraction=0.1
args.epochs = 50

args.train = args.raw_train.iloc[:, 1:].copy()
args.labels = args.raw_train['label'].copy()
args.test = args.raw_test.copy()


args.predictions_folder.mkdir(parents=True, exist_ok=True) 

In [59]:
X = args.train.to_numpy().reshape(args.train.shape[0], 28, 28, 1)
y = pd.get_dummies(args.labels, prefix='label').to_numpy()
y_sparse = args.labels.to_numpy()
x = args.test.to_numpy().reshape(args.test.shape[0], 28, 28, 1)

In [60]:
t = tf.constant([[1, 2], [3, 4], [5, 6]])
tf.square(t)
keras.backend.transpose(t)
t + tf.cast(tf.constant(1.), tf.int32)
v = tf.Variable([[1, 2], [3, 4], [5, 6]])
v[0,0].assign(100)
tf.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
tf.constant(['019A'])
# tf.strings.unicode_decode(tf.constant(['019A']), input_encoding='UTF-8')

<tf.Tensor: shape=(1,), dtype=string, numpy=array([b'019A'], dtype=object)>

### Custom training loops

In [8]:
len(X) // 32
32 * 1312

41984

In [65]:
# def shuffle(X, y):
#     x = np.array([0, 100, 200, 300, 400])
#     a = np.random.permutation(5)
#     print(a)
#     print(x[a])

# for i in range(5):
#     shuffle(None, None)
    
y_sparse[0:32]

array([1, 0, 1, 4, 0, 0, 7, 3, 5, 3, 8, 9, 1, 3, 3, 1, 2, 0, 7, 5, 8, 6,
       2, 0, 2, 3, 6, 9, 9, 7, 8, 9])

In [79]:
def build_simple_model():
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=(28, 28)))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(300, activation='elu', kernel_initializer='he_normal'))
    model.add(keras.layers.Dense(10, activation='softmax')) # !
    return model
#     model.compile(loss='sparse_categorical_crossentropy', # !
#              optimizer=keras.optimizers.Nadam(learning_rate=lr),
#              metrics=[keras.metrics.sparse_categorical_accuracy]) # !

def custom_training_loop():
    def shuffle(X, y):
        assert len(X) == len(y)
        idx = np.random.permutation(len(X))
        return X[idx], y[idx]
    
    n_epochs = 5
    batch_size = 32
    optimizer=keras.optimizers.Nadam(learning_rate=0.001)
    loss_fn = keras.losses.sparse_categorical_crossentropy # try class
    metrics=[keras.metrics.SparseCategoricalAccuracy] # try func
    model = build_simple_model()
    
    for epoch in range(1, n_epochs + 1):
        log.info(f'Epoch {epoch}...')
        n_steps = len(X) // 32 + 1
        X_shuffled, y_shuffled = shuffle(X, y_sparse)
        for step in range(1, n_steps + 1):
            ceiling = step * batch_size if step * batch_size < len(X_shuffled) else len(X_shuffled)
            floor = (step - 1) * batch_size
            X_batch, y_batch = X_shuffled[floor: ceiling], y_shuffled[floor: ceiling]
            with tf.GradientTape() as tape:
                y_pred = model(X_batch)
                losses = loss_fn(y_batch, y_pred, from_logits=False)
            trainable_weights = [l.trainable_variables for l in model.layers]
            trainable_weights = [weight for l in model.layers for weight in l.trainable_variables]
            grad = tape.gradient(losses, trainable_weights)
            print(grad)
            break
        break
            # compute grad
            # autodiff
            # loss, metric
            
custom_training_loop()

2021-03-30 14:43:44,336 : INFO : Epoch 1...


[<tf.Tensor: shape=(784, 300), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>, <tf.Tensor: shape=(300,), dtype=float32, numpy=
array([ 5.38291186e-02,  2.62173653e-01,  7.76513577e-01,  7.06993461e-01,
        9.62014019e-01, -2.25006163e-01, -1.22472294e-01,  8.26855540e-01,
        3.37943584e-01, -4.39662755e-01, -2.72523016e-01, -5.78270078e-01,
        2.62157232e-01,  8.43107104e-01,  5.78654528e-01,  1.08958530e+00,
        8.88405293e-02, -6.60389364e-01, -7.84225123e-18, -2.33742595e+00,
       -7.52439499e-02,  2.93931246e-01, -3.99086289e-02,  1.57258713e+00,
        1.47954285e-01,  2.80594349e-01,  2.64597952e-01, -2.39175290e-01,
        5.56396767e-02, -3.27828318e-01,  1.16283882e+00,  1.43104589e+00,
        1.18247712e+00, -6.00095332e-01, 