1. Importing Cells


In [8]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"

import wandb
import numpy as np

from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow import keras as k


2. wandb login cell


In [9]:
wandb.login()




True

3.  CIFAR-10 trainer + callbacks (main lab cell)

In [10]:
# ---------------------- Custom Callbacks ----------------------

class WandbLoggingCallback(k.callbacks.Callback):
    """Log loss and metrics to wandb at the end of each epoch."""
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
        logs["epoch"] = epoch
        wandb.log(logs)


class LogLRCallback(k.callbacks.Callback):
    """Log learning rate at the end of each epoch."""
    def on_epoch_end(self, epoch, logs=None):
        opt = self.model.optimizer
        try:
            lr = float(k.backend.get_value(opt.learning_rate))
        except Exception:
            lr = float(k.backend.get_value(opt.lr))
        wandb.log({"learning_rate": lr, "epoch": epoch})


class LogSamplesCallback(k.callbacks.Callback):
    """Log a few sample images and predictions each epoch."""
    def __init__(self, x, y, labels, num_samples=16):
        super().__init__()
        self.x = x[:num_samples]
        self.y = np.argmax(y[:num_samples], axis=1)
        self.labels = labels
        self.num_samples = num_samples

    def on_epoch_end(self, epoch, logs=None):
        preds = self.model.predict(self.x, verbose=0)
        pred_labels = np.argmax(preds, axis=1)

        images = []
        for i in range(self.num_samples):
            true_lab = self.labels[self.y[i]]
            pred_lab = self.labels[pred_labels[i]]
            caption = f"True: {true_lab}, Pred: {pred_lab}"
            images.append(wandb.Image(self.x[i], caption=caption))

        wandb.log({"sample_predictions": images, "epoch": epoch})


class ConfusionMatrixCallback(k.callbacks.Callback):
    """Log confusion matrix for the test set at the end of each epoch."""
    def __init__(self, x, y_true, labels):
        super().__init__()
        self.x = x
        self.y_true = np.argmax(y_true, axis=1)
        self.labels = labels

    def on_epoch_end(self, epoch, logs=None):
        preds = self.model.predict(self.x, verbose=0)
        y_pred = np.argmax(preds, axis=1)

        cm = wandb.plot.confusion_matrix(
            preds=y_pred,
            y_true=self.y_true,
            class_names=self.labels
        )
        wandb.log({"confusion_matrix": cm, "epoch": epoch})


# ---------------------- CIFAR-10 Trainer ----------------------

class CIFAR10Trainer:
    def __init__(self, project_name="Lab2-logging-cifar10", run_name="cifar10_cnn"):
        # Hyperparameters
        self.config = dict(
            epochs=5,
            batch_size=128,
            sample=20000    # number of samples from train and test
        )

        # Start wandb run
        self.run = wandb.init(
            project=project_name,
            name=run_name,
            config=self.config
        )
        self.config = wandb.config

        # CIFAR-10 labels
        self.labels = [
            "airplane", "automobile", "bird", "cat", "deer",
            "dog", "frog", "horse", "ship", "truck"
        ]

        self._prepare_data()
        self._build_model()

    def _prepare_data(self):
        """Load CIFAR-10 and prepare tensors and labels."""
        (xtr, ytr), (xte, yte) = cifar10.load_data()

        n = self.config.sample

        xtr = xtr[:n].astype("float32") / 255.0
        xte = xte[:n].astype("float32") / 255.0
        ytr = ytr[:n].reshape(-1)
        yte = yte[:n].reshape(-1)

        self.X_train = xtr          # shape (n, 32, 32, 3)
        self.X_test  = xte

        self.y_train = to_categorical(ytr, num_classes=10)
        self.y_test  = to_categorical(yte, num_classes=10)

        self.num_classes = self.y_test.shape[1]

    def _build_model(self):
        """Simple CNN for CIFAR-10."""
        inputs = k.Input(shape=(32, 32, 3))

        x = k.layers.Conv2D(32, (3, 3), activation="relu", padding="same")(inputs)
        x = k.layers.Conv2D(32, (3, 3), activation="relu", padding="same")(x)
        x = k.layers.MaxPooling2D((2, 2))(x)

        x = k.layers.Conv2D(64, (3, 3), activation="relu", padding="same")(x)
        x = k.layers.Conv2D(64, (3, 3), activation="relu", padding="same")(x)
        x = k.layers.MaxPooling2D((2, 2))(x)

        x = k.layers.Flatten()(x)
        x = k.layers.Dense(128, activation="relu")(x)
        x = k.layers.Dropout(0.5)(x)

        outputs = k.layers.Dense(self.num_classes, activation="softmax")(x)

        self.model = k.Model(inputs=inputs, outputs=outputs)
        self.model.compile(
            optimizer="adam",
            loss="categorical_crossentropy",
            metrics=["accuracy"],
        )

    def _get_callbacks(self):
        """Create callbacks for wandb logging and visualizations."""
        return [
            WandbLoggingCallback(),
            LogLRCallback(),
            LogSamplesCallback(self.X_test, self.y_test, self.labels, num_samples=16),
            ConfusionMatrixCallback(self.X_test, self.y_test, self.labels),
        ]

    def train(self):
        """Train model and save it."""
        history = self.model.fit(
            self.X_train,
            self.y_train,
            epochs=self.config.epochs,
            batch_size=self.config.batch_size,
            validation_data=(self.X_test, self.y_test),
            callbacks=self._get_callbacks(),
            verbose=1,
        )

        # Save model locally only (no wandb.save to avoid Windows symlink issue)
        self.model.save("cifar10_cnn.keras")
        wandb.log({"saved_model_path": "cifar10_cnn.keras"})

        return history


# ---------------------- Run the trainer ----------------------

trainer = CIFAR10Trainer()
trainer.train()
wandb.finish()


0,1
accuracy,▁▄▆▇█
epoch,▁▁▁▁▃▃▃▃▅▅▅▅▆▆▆▆████
learning_rate,▁▁▁▁▁
loss,█▅▃▂▁
val_accuracy,▁▃▆▇█
val_loss,█▆▃▂▁

0,1
accuracy,0.5966
epoch,4.0
learning_rate,0.001
loss,1.13156
val_accuracy,0.6325
val_loss,1.05831


Epoch 1/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 57ms/step - accuracy: 0.2745 - loss: 1.9519 - val_accuracy: 0.4271 - val_loss: 1.6269 - epoch: 0.0000e+00
Epoch 2/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 61ms/step - accuracy: 0.4132 - loss: 1.6153 - val_accuracy: 0.5085 - val_loss: 1.3742 - epoch: 1.0000
Epoch 3/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 62ms/step - accuracy: 0.4726 - loss: 1.4620 - val_accuracy: 0.5304 - val_loss: 1.2969 - epoch: 2.0000
Epoch 4/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 62ms/step - accuracy: 0.5245 - loss: 1.3228 - val_accuracy: 0.5358 - val_loss: 1.2885 - epoch: 3.0000
Epoch 5/5
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 61ms/step - accuracy: 0.5583 - loss: 1.2193 - val_accuracy: 0.5906 - val_loss: 1.1349 - epoch: 4.0000


[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
accuracy,▁▄▆▇█
epoch,▁▁▁▁▃▃▃▃▅▅▅▅▆▆▆▆████
learning_rate,▁▁▁▁▁
loss,█▅▃▂▁
val_accuracy,▁▄▅▆█
val_loss,█▄▃▃▁

0,1
accuracy,0.55825
epoch,4
learning_rate,0.001
loss,1.21931
saved_model_path,cifar10_cnn.keras
val_accuracy,0.5906
val_loss,1.13493
