# MODEL EVALUATION

In [1]:
import fiftyone as fo
import os
import tensorflow as tf
import numpy as np
import transformers
from transformers import create_optimizer, TFSegformerForSemanticSegmentation
import evaluate
import cv2
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
trainDataset = tf.data.Dataset.load('SavedDataset/trainDataset')
valDataset = tf.data.Dataset.load('SavedDataset/valDataset')

2025-02-09 19:06:17.668920: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-02-09 19:06:17.668940: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-02-09 19:06:17.668947: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
I0000 00:00:1739156777.668960 12070899 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1739156777.668978 12070899 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
dataset = fo.load_dataset('Validation-Clothing-Dataset')
dataset.head()

[<Sample: {
     'id': '67a85a3d38d454199c663e1d',
     'media_type': 'image',
     'filepath': '/Users/aman/Documents/Work/Machine Learning/Computer-Vision-TensorFlow/Virtual-Cloth-TryOn-Image-Segmentation/ValDataset/png_images/IMAGES/img_0003.png',
     'tags': [],
     'metadata': None,
     'created_at': datetime.datetime(2025, 2, 9, 7, 33, 17, 676000),
     'last_modified_at': datetime.datetime(2025, 2, 9, 18, 15, 27, 773000),
     'ground_truth': <Segmentation: {
         'id': '67a85a3d38d454199c663e1c',
         'tags': [],
         'mask': None,
         'mask_path': '/Users/aman/Documents/Work/Machine Learning/Computer-Vision-TensorFlow/Virtual-Cloth-TryOn-Image-Segmentation/ValDataset/png_masks/MASKS/img_0003.png',
     }>,
     'pred': <Segmentation: {
         'id': '67a8f0bf2aeb60c9e46c788d',
         'tags': [],
         'mask': array([[0, 0, 0, ..., 0, 0, 0],
                [0, 0, 0, ..., 0, 0, 0],
                [0, 0, 0, ..., 0, 0, 0],
                ...,
         

In [5]:
MEAN = [123.675, 116.28, 103.53]
STD = [58.395, 57.12, 57.375]
Hinit, Winit = 825, 550

In [6]:
dataFrame = pd.read_csv('Dataset/labels.csv')
ID2LABEL = {}
for i, j in dataFrame.iterrows():
    if i==0:
        ID2LABEL[i] = 'nan'
    else:
        ID2LABEL[i] = j['label_list']
LABEL2ID = {v: k for k, v in ID2LABEL.items()}
NUM_CLASSES = len(ID2LABEL)

In [6]:
modelID = "nvidia/segformer-b5-finetuned-cityscapes-1024-1024"
model = TFSegformerForSemanticSegmentation.from_pretrained(
    modelID,
    num_labels=NUM_CLASSES,
    id2label=ID2LABEL,
    label2id=LABEL2ID,
    ignore_mismatched_sizes=True)

model.load_weights('Models/Segformer-B5.h5')

All model checkpoint layers were used when initializing TFSegformerForSemanticSegmentation.

Some weights of TFSegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/segformer-b5-finetuned-cityscapes-1024-1024 and are newly initialized because the shapes did not match:
- decode_head/classifier/kernel:0: found shape (1, 1, 768, 19) in the checkpoint and (1, 1, 768, 59) in the model instantiated
- decode_head/classifier/bias:0: found shape (19,) in the checkpoint and (59,) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
metric = evaluate.load("mean_iou")

In [8]:
for i, sample in enumerate(dataset):
    image = tf.io.decode_jpeg(tf.io.read_file(sample.filepath))
    image = tf.image.resize(image, [512, 512])
    image = tf.cast(image, tf.float32)
    image = (image-MEAN)/STD
    image = tf.transpose(image, [2, 0, 1])
    image = tf.expand_dims(image, axis=0)

    with tf.device("/CPU:0"):
        output = model(image).logits # -> [B, 59, 128, 128]
    output = tf.argmax(output, axis=1)

    resizedOutput = tf.image.resize(tf.expand_dims(output, axis=-1), [Hinit, Winit], method='bilinear', antialias=True)
    resizedOutput = tf.squeeze(resizedOutput, axis=-1)
    resizedOutput = tf.cast(resizedOutput, tf.uint8)[0]

    sample['pred'] = fo.Segmentation(mask=resizedOutput.numpy())

    mask = cv2.imread(sample['ground_truth']['mask_path'], cv2.IMREAD_GRAYSCALE)
    
    metrics = metric.compute(predictions=[resizedOutput.numpy()], 
                             references=[mask], 
                             num_labels=59,
                             ignore_index=0,
                             reduce_labels=False,
                             nan_to_num=0)

    sample['mean_iou'] = metrics['mean_iou']
    sample['mean_accuracy'] = metrics['mean_accuracy']
    sample['overall_accuracy'] = metrics['overall_accuracy']

    for k, c in enumerate(metrics['per_category_iou']):
        if c>0.0001:
            key = ID2LABEL[k] + '_iou'
            sample[key] = c

    sample.save()

I0000 00:00:1739124924.727176 11560378 service.cc:148] XLA service 0x39a77a070 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1739124924.727574 11560378 service.cc:156]   StreamExecutor device (0): Host, Default Version
I0000 00:00:1739124924.769287 11560378 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.




  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label


In [7]:
dataset.default_mask_targets = ID2LABEL
session = fo.launch_app(dataset)