# CUSTOM TRAINING LOOPS

## How model.fit works?

In [1]:
# Import Depedencies
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
import import_ipynb
import Utils

2025-01-25 19:04:23.293521: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-01-25 19:04:23.293548: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-01-25 19:04:23.293551: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2025-01-25 19:04:23.293564: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-01-25 19:04:23.293574: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [2]:
# Dataset Batching
BATCH_SIZE=32
BUFFER_SIZE = 8
trainDataset = Utils.trainDataset.shuffle(buffer_size=BUFFER_SIZE, reshuffle_each_iteration=True).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
valDataset = Utils.valDataset.shuffle(buffer_size=BUFFER_SIZE, reshuffle_each_iteration=True).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
testDataset = Utils.testDataset.shuffle(buffer_size=BUFFER_SIZE, reshuffle_each_iteration=True).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [3]:
# Custom Binary Cross-Entropy Loss
def customBCE(yTrue, yPred):
    bce = tf.keras.losses.BinaryCrossentropy()
    return bce(yTrue, yPred)

In [4]:
OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.01)
METRIC = tf.keras.metrics.BinaryAccuracy()
METRIC_VAL = tf.keras.metrics.BinaryAccuracy()
EPOCHS = 3

## TRAINING LOOP

In [5]:
@tf.function # Converts to graph mode -> faster
def trainingBlock(xBatch, yBatch):
    with tf.GradientTape() as recorder: # Record Gradients
        yPred = Utils.model(xBatch, training=True)
        loss = customBCE(yBatch, yPred)
    partialDerivatives = recorder.gradient(loss, Utils.model.trainable_weights)
    OPTIMIZER.apply_gradients(zip(partialDerivatives, Utils.model.trainable_weights))
    METRIC.update_state(yBatch, yPred)

    return loss


In [6]:
@tf.function
def valBlock(xBatchVal, yBatchVal):
    yPredVal = Utils.model(xBatchVal, training=False)
    lossVal = customBCE(yBatchVal, yPredVal)
    METRIC_VAL.update_state(yBatchVal, yPredVal)

    return lossVal

In [7]:
for epoch in range(EPOCHS):
    print("Training For Epoch : ", epoch+1)
    for step, (xBatch, yBatch) in enumerate(trainDataset):
        loss = trainingBlock(xBatch, yBatch)
    print("Training Loss : ", loss)
    print("The Accuracy is : ", METRIC.result())
    METRIC.reset_state()
    for (xBatchVal, yBatchVal) in valDataset:
        lossVal = valBlock(xBatchVal, yBatchVal)
    print("Validation Loss : ", lossVal)
    print("The Validation Accuracy is : ", METRIC_VAL.result())
    METRIC_VAL.reset_state()

Training For Epoch :  1


2025-01-25 19:04:24.102911: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
2025-01-25 19:04:59.246951: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Training Loss :  tf.Tensor(0.39373285, shape=(), dtype=float32)
The Accuracy is :  tf.Tensor(0.77828175, shape=(), dtype=float32)


2025-01-25 19:05:01.821642: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Validation Loss :  tf.Tensor(0.9380354, shape=(), dtype=float32)
The Validation Accuracy is :  tf.Tensor(0.5520871, shape=(), dtype=float32)
Training For Epoch :  2


2025-01-25 19:05:34.506039: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Training Loss :  tf.Tensor(0.53819585, shape=(), dtype=float32)
The Accuracy is :  tf.Tensor(0.8025946, shape=(), dtype=float32)


2025-01-25 19:05:36.881472: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Validation Loss :  tf.Tensor(0.40645862, shape=(), dtype=float32)
The Validation Accuracy is :  tf.Tensor(0.76842105, shape=(), dtype=float32)
Training For Epoch :  3


2025-01-25 19:06:09.585555: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Training Loss :  tf.Tensor(0.45861244, shape=(), dtype=float32)
The Accuracy is :  tf.Tensor(0.80123377, shape=(), dtype=float32)
Validation Loss :  tf.Tensor(0.3955036, shape=(), dtype=float32)
The Validation Accuracy is :  tf.Tensor(0.78221416, shape=(), dtype=float32)


2025-01-25 19:06:11.905489: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


# TRAIN METHOD

In [13]:
def trainModel(model, lossFunction, metric, valMetric, optimizer, trainDataset, valDataset, epochs=EPOCHS):
    for epoch in range(epochs):
        print("Training For Epoch : ", epoch+1)
        print("\n")
        for step, (xBatch, yBatch) in enumerate(trainDataset):
            loss = trainingBlock(xBatch, yBatch)
        print("Training Loss : ", loss)
        print("The Accuracy is : ", METRIC.result())
        print("\n")
        METRIC.reset_state()
        for (xBatchVal, yBatchVal) in valDataset:
            lossVal = valBlock(xBatchVal, yBatchVal)
        print("Validation Loss : ", lossVal)
        print("The Validation Accuracy is : ", METRIC_VAL.result())
        print("\n")
        METRIC_VAL.reset_state()
    print("TRAINING COMPLETE !!")

In [14]:
trainModel(Utils.model, customBCE, METRIC, METRIC_VAL, OPTIMIZER, trainDataset, valDataset, 3)

Training For Epoch :  1




2025-01-25 19:09:49.839840: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Training Loss :  tf.Tensor(0.312426, shape=(), dtype=float32)
The Accuracy is :  tf.Tensor(0.8856889, shape=(), dtype=float32)




2025-01-25 19:09:51.890787: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Validation Loss :  tf.Tensor(0.26283422, shape=(), dtype=float32)
The Validation Accuracy is :  tf.Tensor(0.89945555, shape=(), dtype=float32)


Training For Epoch :  2




2025-01-25 19:10:24.590389: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Training Loss :  tf.Tensor(0.3028249, shape=(), dtype=float32)
The Accuracy is :  tf.Tensor(0.8966252, shape=(), dtype=float32)




2025-01-25 19:10:26.997082: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Validation Loss :  tf.Tensor(0.15525366, shape=(), dtype=float32)
The Validation Accuracy is :  tf.Tensor(0.92413795, shape=(), dtype=float32)


Training For Epoch :  3




2025-01-25 19:10:59.737202: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Training Loss :  tf.Tensor(0.32450148, shape=(), dtype=float32)
The Accuracy is :  tf.Tensor(0.90175086, shape=(), dtype=float32)


Validation Loss :  tf.Tensor(0.18081427, shape=(), dtype=float32)
The Validation Accuracy is :  tf.Tensor(0.9259528, shape=(), dtype=float32)


TRAINING COMPLETE !!


2025-01-25 19:11:02.507251: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
