In [None]:
# week10_evaluation.ipynb

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from tensorflow.keras.models import load_model

# Load Oxford-IIIT Pets Dataset
IMG_SIZE = 128
print("Loading Oxford-IIIT Pets dataset...")
dataset, info = tfds.load("oxford_iiit_pet:4.0.0", with_info=True)

test = dataset["test"]

# Preprocess images & masks
def preprocess(example):
    img = tf.image.resize(example["image"], (IMG_SIZE, IMG_SIZE)) / 255.0
    mask = tf.image.resize(example["segmentation_mask"], (IMG_SIZE, IMG_SIZE))
    return img, mask

test = test.map(preprocess).batch(8)

print("Dataset ready for evaluation.")

# Load Trained U-Net Model
print("Loading trained U-Net model...")
model = load_model("./models/unet_model.h5", compile=False)
print("Model loaded successfully.")

# Define Evaluation Metrics
def iou_metric(y_true, y_pred):
    y_true = tf.cast(tf.squeeze(y_true) > 0, tf.int32)  # binary GT
    y_pred = tf.cast(tf.squeeze(tf.argmax(y_pred, axis=-1)) > 0, tf.int32)  # binary pred

    intersection = tf.reduce_sum(tf.cast(y_true & y_pred, tf.float32))
    union = tf.reduce_sum(tf.cast(y_true | y_pred, tf.float32))
    return (intersection + 1e-7) / (union + 1e-7)

def dice_coefficient(y_true, y_pred):
    y_true = tf.cast(tf.squeeze(y_true) > 0, tf.int32)
    y_pred = tf.cast(tf.squeeze(tf.argmax(y_pred, axis=-1)) > 0, tf.int32)

    intersection = tf.reduce_sum(tf.cast(y_true & y_pred, tf.float32))
    return (2. * intersection + 1e-7) / (
        tf.reduce_sum(tf.cast(y_true, tf.float32)) +
        tf.reduce_sum(tf.cast(y_pred, tf.float32)) + 1e-7
)

# Run Evaluation
iou_scores, dice_scores = [], []

for images, masks in test.take(20):  # evaluate on 20 batches for speed
    preds = model.predict(images)
    for i in range(len(images)):
        iou_scores.append(iou_metric(masks[i], preds[i]).numpy())
        dice_scores.append(dice_coefficient(masks[i], preds[i]).numpy())

print("Segmentation Evaluation Results:")
print(f"Average IoU: {np.mean(iou_scores):.4f}")
print(f"Average Dice: {np.mean(dice_scores):.4f}")

Loading Oxford-IIIT Pets dataset...
Dataset ready for evaluation.
Loading trained U-Net model...
Model loaded successfully.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 242ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[