# MODEL VERSIONING WITH WandB

In [1]:
# Import Depedencies
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import datetime
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Input, Normalization, Conv2D, MaxPooling2D, Dense, Flatten, BatchNormalization, Dropout
from tensorflow.keras.metrics import BinaryAccuracy, FalsePositives, FalseNegatives, TrueNegatives, TruePositives, Precision, Recall, F1Score, AUC
from tensorflow.keras.regularizers import L2
import wandb
from wandb.integration.keras import WandbMetricsLogger, WandbModelCheckpoint, WandbEvalCallback, WandbCallback

## Model Definition

In [2]:
wandb.config = {
    "LEARNING_RATE" : 0.001,
    "N_EPOCHS" : 20,
    "BATCH_SIZE" : 128,
    "DROPOUT_RATE": 0.0,
    "IM_SIZE" : 224,
    "REGULARIZATION_RATE" : 0.0,
    "N_FILTERS" : 6,
    "KERNEL_SIZE" : 3,
    "N_STRIDES" : 1,
    "POOL_SIZE" : 2,
    "N_DENSE_1" : 128,
    "N_DENSE_2" : 32
}

In [3]:
# Using Sequential API
CONFIGURATION = wandb.config
IMAGE_SIZE = CONFIGURATION['IM_SIZE']
lenetModel = tf.keras.Sequential([
    Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3)),

    Conv2D(filters=CONFIGURATION["N_FILTERS"], kernel_size=CONFIGURATION["KERNEL_SIZE"], strides=CONFIGURATION["N_STRIDES"], padding='valid', activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=CONFIGURATION["POOL_SIZE"], strides=CONFIGURATION["N_STRIDES"]),
    Dropout(rate = CONFIGURATION["DROPOUT_RATE"]),

    Conv2D(filters=CONFIGURATION["N_FILTERS"], kernel_size=CONFIGURATION["KERNEL_SIZE"], strides=CONFIGURATION["N_STRIDES"], padding='valid', activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=CONFIGURATION["POOL_SIZE"], strides=CONFIGURATION["N_STRIDES"]),

    Flatten(),

    Dense(CONFIGURATION["N_DENSE_1"], activation='relu'),
    BatchNormalization(),
    Dropout(rate = CONFIGURATION["DROPOUT_RATE"]),

    Dense(CONFIGURATION["N_DENSE_2"], activation='relu'),
    BatchNormalization(),
    Dense(1, activation='sigmoid'),
])
lenetModel.summary()

2025-01-27 18:31:18.271407: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-01-27 18:31:18.271452: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-01-27 18:31:18.271456: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2025-01-27 18:31:18.271679: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-01-27 18:31:18.271700: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


## Logging Untrained Model

In [6]:
def logModel():
    with wandb.init(name="Log Model", project="Malaria-Detection-Dataset-Model-Versioning", entity="amanjn2003-santa-clara-university") as run:

        untrainedModel = wandb.Artifact(name="UntrainedModel", 
                                        type="Model", 
                                        description="Untrained Version of LeNet Model",
                                        metadata = CONFIGURATION)
        
        filename = "Models/LeNet.keras"
        lenetModel.save(filename)

        untrainedModel.add_file(filename)

        wandb.save(filename)

        run.log_artifact(untrainedModel)

In [7]:
logModel()

## Trained Model Versioning

In [9]:
def trainedModelLog():
    with wandb.init(name="Trained Model Log", project="Malaria-Detection-Dataset-Model-Versioning", entity="amanjn2003-santa-clara-university") as run:
        
        trainedModel = wandb.Artifact(name="TrainedModel", 
                                        type="Model", 
                                        description="Trained Version of LeNet Model",
                                        metadata = CONFIGURATION)
        
        # Loading the Train Dataset
        artifact = run.use_artifact('amanjn2003-santa-clara-university/Malaria-Detection-Dataset-Model-Versioning/AugmentedDataset:v0', type='augmentedData')
        artifact_dir = artifact.download()
        
        artifactFile = "artifacts/AugmentedDataset:v0/AugmentedTrainDataset.npz"
        
        with open(artifactFile, 'rb') as file:
            npzArr = np.load(file, allow_pickle=True)
            images, labels = npzArr['image'], npzArr['label']
        
        dx = tf.convert_to_tensor(images, dtype=tf.float32)
        dy = tf.convert_to_tensor(labels, dtype=tf.float32)

        d = tf.data.Dataset.from_tensor_slices((dx, dy))

        trainDataset = (
            d.shuffle(buffer_size=1024, reshuffle_each_iteration=True)
            .batch(batch_size=32)
            .prefetch(tf.data.AUTOTUNE)
        )

        # Loading the Validation Dataset
        artifact = run.use_artifact('amanjn2003-santa-clara-university/Malaria-Detection-Dataset-Model-Versioning/ValDataset:v0', type='DatasetSplits')
        artifact_dir = artifact.download()
        
        artifactFile = "artifacts/ValDataset:v0/ValDataset.npz"
        
        with open(artifactFile, 'rb') as file:
            npzArr = np.load(file, allow_pickle=True)
            images, labels = npzArr['image'], npzArr['label']
        
        dx = tf.convert_to_tensor(images, dtype=tf.float32)
        dy = tf.convert_to_tensor(labels, dtype=tf.float32)

        d = tf.data.Dataset.from_tensor_slices((dx, dy))

        valDataset = (
            d.shuffle(buffer_size=1024, reshuffle_each_iteration=True)
            .batch(batch_size=32)
            .prefetch(tf.data.AUTOTUNE)
        )

        # Loading Previous Untrained Version of Model From WandB
        artifact = run.use_artifact('amanjn2003-santa-clara-university/Malaria-Detection-Dataset-Model-Versioning/UntrainedModel:v0', type='Model')
        artifact_dir = artifact.download()

        artifactFile = "artifacts/UntrainedModel:v0/LeNet.keras"

        lenetModel = tf.keras.models.load_model(artifactFile)

        # Compiling & Training The Model
        Metrics = [BinaryAccuracy(), FalsePositives(), FalseNegatives(), TrueNegatives(), TruePositives(), Precision(), Recall(), AUC()]

        lenetModel.compile(
                optimizer=tf.keras.optimizers.Adam(learning_rate=CONFIGURATION["LEARNING_RATE"]),
                loss=tf.keras.losses.BinaryCrossentropy(),
                metrics=Metrics
            )
        
        history = lenetModel.fit(
            trainDataset,
            validation_data=valDataset,
            epochs=10,
            verbose=1,
            callbacks = [WandbMetricsLogger(), WandbModelCheckpoint("Models/LeNetModel.keras")]
        )

        # Logging the Model
        filename = "Models/LeNetTrained.keras"
        lenetModel.save(filename)

        trainedModel.add_file(filename)

        wandb.save(filename)

        run.log_artifact(trainedModel)

In [10]:
trainedModelLog()

[34m[1mwandb[0m: Downloading large artifact AugmentedDataset:v0, 459.38MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:8.8
[34m[1mwandb[0m: Downloading large artifact ValDataset:v0, 57.42MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:1.9
[34m[1mwandb[0m: Downloading large artifact UntrainedModel:v0, 139.30MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.2


Epoch 1/10


2025-01-27 19:00:04.100325: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 190ms/step - auc: 0.6256 - binary_accuracy: 0.5901 - false_negatives: 72.9231 - false_positives: 104.4231 - loss: 0.7978 - precision: 0.5669 - recall: 0.6550 - true_negatives: 118.7308 - true_positives: 134.6923 - val_auc: 0.6360 - val_binary_accuracy: 0.4100 - val_false_negatives: 59.0000 - val_false_positives: 0.0000e+00 - val_loss: 1.1483 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val_true_negatives: 41.0000 - val_true_positives: 0.0000e+00
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 149ms/step - auc: 0.6859 - binary_accuracy: 0.6342 - false_negatives: 74.8462 - false_positives: 84.1923 - loss: 0.6360 - precision: 0.6087 - recall: 0.6419 - true_negatives: 141.4615 - true_positives: 130.2692 - val_auc: 0.6265 - val_binary_accuracy: 0.4100 - val_false_negatives: 59.0000 - val_false_positives: 0.0000e+00 - val_loss: 0.9959 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00 - val

0,1
epoch/auc,▁▂▃▃▄▅▆▇██
epoch/binary_accuracy,▁▂▃▃▄▅▅▆██
epoch/epoch,▁▂▃▃▄▅▆▆▇█
epoch/false_negatives,██▇▇▆▄▄▃▂▁
epoch/false_positives,█▆▆▆▅▄▄▂▁▁
epoch/learning_rate,▁▁▁▁▁▁▁▁▁▁
epoch/loss,█▆▆▆▆▅▄▃▂▁
epoch/precision,▁▂▃▃▄▄▅▆██
epoch/recall,▁▁▂▂▃▅▅▆▇█
epoch/true_negatives,▁▃▃▃▄▅▅▇██

0,1
epoch/auc,0.98585
epoch/binary_accuracy,0.935
epoch/epoch,9.0
epoch/false_negatives,26.0
epoch/false_positives,26.0
epoch/learning_rate,0.001
epoch/loss,0.22451
epoch/precision,0.93229
epoch/recall,0.93229
epoch/true_negatives,390.0
