In [57]:
import logging
import tensorflow as tf
import keras

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
log = logging.getLogger()

%config Completer.use_jedi = False # make autocompletion works in jupyter

tf.__version__

'2.4.1'

In [58]:
import numpy as np
import pandas as pd
from pathlib import Path

import argparse
args = argparse.Namespace()
args.raw_train = pd.read_csv('../data/train.csv.zip')
args.raw_test = pd.read_csv('../data/test.csv.zip')
args.predictions_folder = Path('../predictions')
args.n_splits = 5
args.n_jobs = 3
args.seed = 101
args.val_fraction=0.1
args.epochs = 50

args.train = args.raw_train.iloc[:, 1:].copy()
args.labels = args.raw_train['label'].copy()
args.test = args.raw_test.copy()


args.predictions_folder.mkdir(parents=True, exist_ok=True) 

In [59]:
X = args.train.to_numpy().reshape(args.train.shape[0], 28, 28, 1)
y = pd.get_dummies(args.labels, prefix='label').to_numpy()
y_sparse = args.labels.to_numpy()
x = args.test.to_numpy().reshape(args.test.shape[0], 28, 28, 1)

In [60]:
t = tf.constant([[1, 2], [3, 4], [5, 6]])
tf.square(t)
keras.backend.transpose(t)
t + tf.cast(tf.constant(1.), tf.int32)
v = tf.Variable([[1, 2], [3, 4], [5, 6]])
v[0,0].assign(100)
tf.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])
tf.constant(['019A'])
# tf.strings.unicode_decode(tf.constant(['019A']), input_encoding='UTF-8')

<tf.Tensor: shape=(1,), dtype=string, numpy=array([b'019A'], dtype=object)>

### Custom training loops

In [110]:
import math

def build_simple_model():
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=(28, 28)))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(300, activation='elu', kernel_initializer='he_normal', kernel_regularizer='l2'))
    model.add(keras.layers.Dense(10, activation='softmax')) # !
    return model

def custom_training_loop_my():
    def shuffle(X, y):
        assert len(X) == len(y)
        idx = np.random.permutation(len(X))
        return X[idx], y[idx]
    
    n_epochs = 5
    batch_size = 32
    optimizer=keras.optimizers.Nadam(learning_rate=0.001)
    loss_fn = keras.losses.sparse_categorical_crossentropy # try class
    metrics=[keras.metrics.SparseCategoricalAccuracy()] # try func
    model = build_simple_model()
    
    # compute grad, autodiff
    # perform grad step
    # loss, metric
    for epoch in range(1, n_epochs + 1):
        log.info(f'Epoch {epoch}...')
        n_steps = len(X) // 32 + 1
        X_shuffled, y_shuffled = shuffle(X, y_sparse)
        for step in range(1, n_steps + 1):
            ceiling = step * batch_size if step * batch_size < len(X_shuffled) else len(X_shuffled)
            floor = (step - 1) * batch_size
            X_batch, y_batch = X_shuffled[floor: ceiling], y_shuffled[floor: ceiling]
            with tf.GradientTape() as tape:
                y_pred = model(X_batch, training=True)
                losses = loss_fn(y_batch, y_pred, from_logits=False)
            trainable_weights = [weight for l in model.layers for weight in l.trainable_variables]
            grads = tape.gradient(losses, trainable_weights)
            optimizer.apply_gradients(zip(grads, trainable_weights))
            for metric in metrics:
                metric.update_state(y_true=y_batch, y_pred=y_pred)
        log.info(f'Accuracy: {metrics[0].result():.{5}f}')
            
custom_training_loop_my()

2021-04-01 13:46:47,433 : INFO : Epoch 1...


[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([129.29144 , 328.46817 , 173.41437 , 137.4465  ,  10.339434,
       315.39282 , 108.31431 , 355.13855 , 375.69464 , 110.18623 ,
        36.206787,  61.988373,   0.      , 120.70735 , 156.45027 ,
       217.11743 , 118.55936 , 150.97818 , 326.8812  , 164.87314 ,
       135.52002 , 163.82938 ,   0.      , 157.17827 , 161.3319  ,
       283.49048 , 436.93912 , 269.63855 ,   0.      , 352.92346 ,
       101.93386 , 152.21713 ], dtype=float32)>]
[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([8.61508026e+01, 1.15398102e+02, 1.88778259e+02, 0.00000000e+00,
       1.56425476e+02, 1.20234856e+02, 8.07622681e+01, 1.69324829e+02,
       0.00000000e+00, 1.13346970e+02, 9.29816971e+01, 2.64142639e+02,
       1.05110947e+02, 4.86408081e+01, 2.07983505e+02, 1.31692791e+01,
       0.00000000e+00, 1.22397039e-02, 1.35864212e+02, 1.10463913e+02,
       1.06473351e+02, 3.17999817e+02, 1.41273026e+02, 5.97505646e+01,
       4.78873901e+01, 1.87

[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([1.0490363e-05, 5.1119553e+01, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 4.5492432e+01, 9.8259287e+00,
       0.0000000e+00, 3.3599989e+00, 0.0000000e+00, 0.0000000e+00,
       6.4227654e+01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       5.0031539e-04, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
      dtype=float32)>]
[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0.0000000e+00, 5.7496283e-02, 2.3269707e+01, 0.0000000e+00,
       0.0000000e+00, 9.3630812e-04, 0.0000000e+00, 0.0000000e+00,
       6.2865738e+01, 8.4466354e+01, 0.0000000e+00, 1.0186305e+02,
       1.2462568e+02, 4.7685524e+01, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 4.5567261e+01, 0.0000000e+00, 0.0000000e+00,
       0.

[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.9424942e+01,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 3.3378547e-06,
       7.0004921e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 1.2893463e+01, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.7419466e+01,
       0.0000000e+00, 0.0000000e+00, 1.7160660e+01, 5.3668964e+01],
      dtype=float32)>]
[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 2.0143814e+01, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 2.7354944e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 4.4514880e+00,
       4.6328690e+01, 0.0000000e+00, 3.4410366e-01, 0.0000000e+00,
       1.

[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,
         0.       ,   0.       ,   0.       ,   0.       ,  49.741516 ,
         2.8231008,   0.       ,   0.       ,  58.98198  ,   0.       ,
         0.       ,   0.       ,   0.       ,   0.       ,   0.       ,
         0.       , 164.94214  ,   0.       ,   0.       ,   0.       ,
         0.       ,   0.       ,   0.       ,   0.       ,   0.       ,
         0.       ,  24.073181 ], dtype=float32)>]
[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.3351351e-05,
       7.7468907e-03, 6.7837646e+01, 9.9759827e+01, 0.0000000e+00,
       0.0000000e+00, 1.3678590e+01, 0.0000000e+00, 0.0000000e+00,
       1.6124275e+01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 8.9944702e+01, 0.0000000e+00, 0.0000000e+00,
       1.1920922e-06, 1.3963793e-01, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+0

[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 2.2837892e-04,
       0.0000000e+00, 0.0000000e+00, 7.1873077e+01, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       2.0115189e+01, 3.8507828e+01, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 2.0770630e+01, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
      dtype=float32)>]
[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 8.4245473e-04,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 3.1580414e+01, 0.0000000e+00, 9.8859924e+01,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       3.6151093e+01, 3.1983978e+01, 8.8656807e+00, 0.0000000e+00,
       0.

[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.96610856, 54.616104  ,
        0.        ,  0.        , 26.906769  ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        , 67.84679   ], dtype=float32)>]
[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([ 0.      ,  0.      ,  0.      ,  0.      ,  0.      , 18.653526,
        0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.      ,
        0.      ,  0.      ,  0.      ,  0.      ,  0.      ,  0.      ,
       53.724586,  0.      ,  0.      , 18.948341,  0.      ,  0.      ,
        0.      ,  0.      ,  0.      ,  0.      , 54.560318,  0.      ,
        0.      ,  0.      ], dtype=float32)>]
[<tf.Tens

[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 2.8127396e+01,
       2.1562635e-04, 0.0000000e+00, 0.0000000e+00, 2.7416443e+02,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 3.9282700e+01, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.7227676e+01,
       2.1470424e+02, 0.0000000e+00, 1.0400998e+02, 0.0000000e+00,
       1.1938483e+01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
      dtype=float32)>]
[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0.0000000e+00, 0.0000000e+00, 2.5875076e+01, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 1.8032402e+01, 0.0000000e+00, 0.0000000e+00,
       1.8834889e-05, 0.0000000e+00, 0.0000000e+00, 4.0039616e+00,
       0.

[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.3425652e+00,
       6.2672287e+01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       6.4293030e+01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 1.1920928e-07, 5.7057022e+01,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.7808075e+01,
       1.4824849e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
      dtype=float32)>]
[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 4.1287083e+01, 2.9613289e+01, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       1.

[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([29.311508 ,  0.       ,  0.       ,  0.       ,  0.       ,
       57.397163 ,  0.       ,  0.       ,  0.       ,  0.       ,
        0.       ,  0.       ,  0.       ,  0.       ,  0.       ,
        0.       ,  0.       ,  0.       ,  0.       ,  0.       ,
        0.       ,  0.       ,  2.7122982,  0.       ,  0.       ,
        0.       ,  0.       , 43.552147 ,  0.       ,  0.       ,
        0.       ,  0.       ], dtype=float32)>]
[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([6.9902718e+01, 0.0000000e+00, 9.5367386e-07, 1.3344711e+00,
       4.7307987e-02, 0.0000000e+00, 0.0000000e+00, 1.4318869e+02,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 7.4788734e+01,
       0.0000000e+00, 4.3624362e-01, 0.0000000e+00, 1.0302736e+02,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 1.2206914e+01, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00,

[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       9.0542542e+01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
       0.0000000e+00, 8.6139282e-04, 1.5199463e+01, 0.0000000e+00,
       0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
      dtype=float32)>]
[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([  0.      ,   0.      ,   0.      ,   0.      ,   0.      ,
         0.      ,  97.32764 ,   0.      ,  54.243423, 169.53308 ,
         0.      ,   0.      , 174.58032 ,   0.      ,   0.      ,
         0.      ,   0.      ,   0.      ,   0.      ,   0.      ,
         0.      ,   0.      ,   0.      ,   0.      ,   0.      ,
         

KeyboardInterrupt: 

In [148]:
def custom_training_loop_revised():
    def shuffle(X, y):
        assert len(X) == len(y)
        idx = np.random.permutation(len(X))
        return X[idx], y[idx]
    
    n_epochs = 5
    batch_size = 32
    optimizer=keras.optimizers.Nadam(learning_rate=0.001)
    loss_fn = keras.losses.sparse_categorical_crossentropy
    metrics=[keras.metrics.SparseCategoricalAccuracy()]
    model = build_simple_model()
    
    # compute grad, autodiff
    # perform grad step
    # loss, metric
    for epoch in range(1, n_epochs + 1):
        log.info(f'Epoch {epoch}...')
        n_steps = len(X) // 32 + 1
        X_shuffled, y_shuffled = shuffle(X, y_sparse)
        for step in range(1, n_steps + 1):
            ceiling = step * batch_size if step * batch_size < len(X_shuffled) else len(X_shuffled)
            floor = (step - 1) * batch_size
            X_batch, y_batch = X_shuffled[floor: ceiling], y_shuffled[floor: ceiling]
            with tf.GradientTape() as tape:
                y_pred = model(X_batch, training=True)
                main_losses = loss_fn(y_batch, y_pred, from_logits=False)
                main_loss = tf.reduce_mean(main_losses)
                print(main_losses)
                print(main_loss)
                reg_loss = model.losses
                print(reg_loss)
                print([main_losses] + reg_loss)
                losses = tf.add_n([main_losses] + reg_loss)
                print(losses)
                mean_loss = tf.reduce_mean(losses)
                print(mean_loss)
  
            trainable_weights = [weight for l in model.layers for weight in l.trainable_variables]
            grads = tape.gradient(losses, trainable_weights)
            optimizer.apply_gradients(zip(grads, trainable_weights))
            for metric in metrics:
                metric.update_state(y_true=y_batch, y_pred=y_pred)
            break
        log.info(f'Accuracy: {metrics[0].result():.{5}f}')
            
custom_training_loop_revised()

2021-04-04 14:23:30,607 : INFO : Epoch 1...


tf.Tensor(
[ 83.99216   251.02034     0.7511577 131.22961   154.35754   144.9977
 200.40485   185.71475   174.29517    82.57141    99.23437     0.
 181.01837    76.49397   176.80643     0.        217.69266    79.26572
 112.755264  243.05792    68.84169   158.49606    75.91693    65.446304
 202.12796    87.63913   301.473     110.06311   254.55013     0.
 169.67981   224.19025  ], shape=(32,), dtype=float32)
tf.Tensor(134.81512, shape=(), dtype=float32)
[<tf.Tensor: shape=(), dtype=float32, numpy=6.013867>]
[<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([ 83.99216  , 251.02034  ,   0.7511577, 131.22961  , 154.35754  ,
       144.9977   , 200.40485  , 185.71475  , 174.29517  ,  82.57141  ,
        99.23437  ,   0.       , 181.01837  ,  76.49397  , 176.80643  ,
         0.       , 217.69266  ,  79.26572  , 112.755264 , 243.05792  ,
        68.84169  , 158.49606  ,  75.91693  ,  65.446304 , 202.12796  ,
        87.63913  , 301.473    , 110.06311  , 254.55013  ,   0.       ,
       1

InvalidArgumentError: Inputs to operation AddN of type AddN must have the same size and shape.  Input 0: [32] != input 1: [] [Op:AddN]

In [126]:
np.zeros((2, 3)) + 1
np.array([[3, 4], [1, 2]])

array([[3, 4],
       [1, 2]])

In [138]:
tf.constant([[3, 4], [1, 2]]).numpy()
tf.Variable([[3, 4], [1, 2]]).numpy()
tf.convert_to_tensor(np.array([[3, 4], [1, 2]])).numpy()

<bound method _EagerTensorBase.numpy of <tf.Tensor: shape=(2, 2), dtype=int64, numpy=
array([[3, 4],
       [1, 2]])>>