Complete guide to writing low-level training and evaluation loops.

In [4]:
# Libraries Needed
import os
import re
import sys
import time
from datetime import datetime
import pandas as pd
import numpy as np
from typing import Any, List, Tuple, Union
import sklearn
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, precision_score, confusion_matrix
import tensorflow
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ModelCheckpoint, CSVLogger
from tensorflow.keras import regularizers
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten,BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
import pickle
from tqdm import tqdm

In [5]:
# create dataset start
def create_dataset(df):
    
    X_dataset = []
    img_list = df['Path'].tolist()
    for i in tqdm(range(df.shape[0])):
        
        img = image.load_img(img_list[i])
        img = image.img_to_array(img)
        img = img / 255.
        X_dataset.append(img)

    X = np.array(X_dataset)
  # drop unnecessary columns not used for training
    y = np.array(df.drop(['Index', 'Filename','Path','Emotion'], axis=1))

    return X, y
# create dataset end
def label_binarizer(y_intensity,threshold_val):
    return np.where(y_intensity<threshold_val, 0, 1)
def subset_accuracy(y_true,y_pred):
    y_true = tensorflow.py_function(label_binarizer,(y_true,0.5), tensorflow.double)
    y_pred = tensorflow.py_function(label_binarizer,(y_pred,0.5), tensorflow.double)
    return tensorflow.py_function(accuracy_score,(y_true,y_pred),tensorflow.double)

def load_model_data(model_path, opt_path):
    model = load_model(model_path,custom_objects={"subset_accuracy":subset_accuracy})
    return model
# define Assume Negative loss
def loss_an(y_pred, y_true):
    bce_loss =tensorflow.keras.losses.BinaryCrossentropy()
    return bce_loss(y_pred,y_true)

# create model start
def create_model(width=224, height=224):
  # load pretrained model 'VGG16'
    base_model = keras.applications.VGG16(
    include_top=False,
    weights="imagenet",
    input_shape=(width, height, 3))
    base_model.trainable = False
    model = Sequential()
    model.add(base_model)
    model.add(Flatten())
    
    model.add(Dense(12, activation='sigmoid', name='final_au', kernel_initializer='glorot_normal'))
    
  # sigmoid classification for 12 au labels
    model.compile(optimizer='adam', loss=loss_an, metrics=[subset_accuracy])
    return model
# create model end


In [6]:
# load disfa+ dataset
disfa_df = pd.read_csv('AU_image_data.csv')
disfa_df.head()

Unnamed: 0,Index,Filename,Path,Emotion,AU1,AU2,AU4,AU5,AU6,AU9,AU12,AU15,AU17,AU20,AU25,AU26
0,0,SN001Y_AngerDescribed_TrailNo_1011.jpg,C:\Users\user\Documents\MSc Computer\EmotionDe...,anger,0,0,1,0,0,0,0,0,0,0,0,0
1,1,SN001Y_AngerDescribed_TrailNo_1012.jpg,C:\Users\user\Documents\MSc Computer\EmotionDe...,anger,0,0,2,0,0,0,0,0,0,0,0,0
2,2,SN001Y_AngerDescribed_TrailNo_1013.jpg,C:\Users\user\Documents\MSc Computer\EmotionDe...,anger,0,0,3,0,0,0,0,0,0,0,0,0
3,3,SN001Y_AngerDescribed_TrailNo_1014.jpg,C:\Users\user\Documents\MSc Computer\EmotionDe...,anger,0,0,4,0,0,0,0,0,0,0,0,0
4,4,SN001Y_AngerDescribed_TrailNo_1015.jpg,C:\Users\user\Documents\MSc Computer\EmotionDe...,anger,0,0,4,0,0,0,0,0,0,0,0,0


In [7]:
sample_df = disfa_df.sample(n=950)
X,y = create_dataset(sample_df)

100%|███████████████████████████████████████████████████████████████████████████████| 950/950 [00:04<00:00, 204.71it/s]


In [8]:
# create model 
# create AU model
model = create_model()

Train model using mini-batch gradient with a custom training loop.

Need optimizer, loss function, and a dataset


In [9]:
# instantiate an optimizer
optimizer = tensorflow.keras.optimizers.Adam(learning_rate=1e-3)
# instantiate a loss function
# Prepare the metrics
train_acc_metric = tensorflow.keras.metrics.SparseCategoricalAccuracy()
val_acc_metric = tensorflow.keras.metrics.SparseCategoricalAccuracy()
# prepare training dataset
batch_size = 32
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3, random_state=1)

In [10]:
# prepare the training dataset
train_dataset = tensorflow.data.Dataset.from_tensor_slices((X_train,y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

# prepare the validation dataset
val_dataset = tensorflow.data.Dataset.from_tensor_slices((X_val,y_val))
val_dataset = val_dataset.batch(batch_size)

Here's our training loop:

* We open a for loop that iterates over epochs
* For each epoch, we open a for loop that iterates over the dataset, in batches
* For each batch, we open a GradientTape() scope
* Inside this scope, we call the model (forward pass) and compute the loss
* Outside the scope, we retrieve the gradients of the weights of the model with regard to the loss
* Finally, we use the optimizer to update the weights of the model based on the gradients

In [12]:
# define number of epochs
epochs = 2
for epoch in range(epochs):
    print(f'\n Start of epoch {epoch}')
    
    # iterate over the batches of the dataset
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        # Open a GradientTape to record the operations run
        # during the forward pass, which enables auto-differentiation
        
        with tensorflow.GradientTape() as tape:
            # Run the forward pass of the layer.
            # The operations that the layer applies
            # to its inputs are foing to be recorded 
            # on the GradientTape
            logits = model(x_batch_train, training=True) # logits of the minibatch
            print(y_batch_train.shape)
            print(logits.shape)
            # compute the loss value for this minibatch
            loss_value = loss_an(y_batch_train, logits)
            print(loss_value)
        # Use the gradient tape to automatically retrieve
        # the gradients of the trainable variables with respect to the loss.
        grads = tape.gradient(loss_value, model.trainable_weights)
        print(grads)
        # Run one step of gradient descent by updating
        # the value of the variables to minimize the loss.
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        # Log every 200 batches
        if step % 200 == 0:
            print(
            f'Training loss (for one batch) at step {step}: {float(loss_value)}')
            print(f'Seen so far: {(step+1)*batch_size}')
        
    


 Start of epoch 0
(32, 12)
(32, 12)
tf.Tensor(1.1142037, shape=(), dtype=float32)
[<tf.Tensor: shape=(25088, 12), dtype=float32, numpy=
array([[ 1.3638728e-02,  1.2928383e-02,  2.4042102e-03, ...,
        -3.0311642e-03, -1.5525231e-02, -2.5319573e-03],
       [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
       [ 7.0203596e-04,  7.2361692e-04,  7.0102245e-04, ...,
         5.5721775e-05,  2.4313033e-05, -2.2162716e-03],
       ...,
       [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
       [ 3.0784370e-02,  2.5495784e-02,  2.6190763e-02, ...,
        -4.2033946e-04, -3.8626097e-02, -2.0902824e-02],
       [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00]], dtype=float32)>, <tf.Tensor: shape=(12,), dtype=float32, numpy=
array([ 0.03730034,  0.02445343,  0.03555912, -0.00672579, -0.02787081,
      

(32, 12)
(32, 12)
tf.Tensor(0.2558704, shape=(), dtype=float32)


KeyboardInterrupt: 

In [31]:
lt = np.array([0,1,1,0,0,1,1,0,1,0])
lt_new = (lt==0)

In [33]:
lt_new

array([ True, False, False,  True,  True, False, False,  True, False,
        True])

In [15]:
np_array2 = np.zeros((12, 12), int)
np_array = np


In [16]:
np_array2

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [20]:
np_array2[1][11]

0

In [21]:
indices = np.random.randint(0,11,(12,2))

In [22]:
indices

array([[ 4, 10],
       [ 6,  6],
       [ 9,  2],
       [ 7,  3],
       [ 9,  0],
       [ 4,  2],
       [ 6,  1],
       [ 2,  6],
       [ 4, 10],
       [ 1,  0],
       [ 1,  6],
       [ 0,  9]])

In [23]:
new_in = tuple(zip(*indices))
np_array2[new_in] = 3
np_array2

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0],
       [3, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0],
       [0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])