# Inference fine-tuned OneFormer on validation dataset and evaluate performance

In [None]:
from PIL import Image
from transformers import AutoProcessor
from transformers import AutoModelForUniversalSegmentation
import evaluate
import torch
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.patches as mpatches
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import os
import numpy as np
import pandas as pd
from glob import glob

In [None]:
# images_folder = r"C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\GSV_annotations_converted_merged\all\images"
# labels_folder = r"C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\GSV_annotations_converted_merged\all\labels"
# model_folder = r"C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\oneformer\from_all"
# out_folder=r'C:\Users\lliu\Desktop\FrontierSI\projects\GA_floor_height\GA-floor-height\output\Wagga\GSV_prediction\OneFormer\from_all'

In [None]:
images_folder = r"D:\Wagga\RICS\annotations_converted_merged\validation\images"
labels_folder = r"D:\Wagga\RICS\annotations_converted_merged\validation\labels"
model_folder = r"D:\Wagga\RICS\OneFormer\from_train_set"
out_folder=r'D:\Wagga\RICS\OneFormer\from_train_set\prediction_validation_set'

### Read in GSV image

In [None]:
image_files = sorted(glob(f"{images_folder}/*.png"))
label_files = sorted(glob(f"{labels_folder}/*.png"))
assert len(image_files) == len(label_files), "Number of images and labels do not match!"

In [None]:
image_file=image_files[0]
image = Image.open(image_file)

### Load pre-trained model and initialise processor

In [None]:
# id2label =  {1:'front door',2:'foundation',3:'garage door',4:'pavement'}
id2label = {0:"_background_", 1:"foundation", 2:"front door", 3:"garage door", 4:"stairs"}
id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: k for k, v in id2label.items()}

In [None]:
processor = AutoProcessor.from_pretrained(model_folder)
encoded_inputs = processor(images=image, task_inputs=["semantic"], return_tensors="pt",size=(512,512))
processor.tokenizer.batch_decode(encoded_inputs.task_inputs)

In [None]:
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 
device='cpu'
model = AutoModelForUniversalSegmentation.from_pretrained(model_folder,is_training=False,
                                                        ignore_mismatched_sizes=True,
                                                        num_labels=len(label2id), 
                                                        id2label=id2label, 
                                                        label2id=label2id)
# model = AutoModelForUniversalSegmentation.from_pretrained(model_path)
model = model.to(device)

### Inference

In [None]:
# forward pass
with torch.no_grad():
  outputs = model(**encoded_inputs)

### Post process

In [None]:
predicted_segmentation_map = processor.post_process_semantic_segmentation(outputs, target_sizes=[(image.size[1],image.size[0])])[0]
predicted_segmentation_map.shape

### Evaluate predictions

In [None]:
label_gt=label_files[0]
label_gt_map = Image.open(label_gt)
# convert map to NumPy array
label_gt_map = np.array(label_gt_map)


In [None]:
# metric expects a list of numpy arrays for both predictions and references
metric = evaluate.load("mean_iou")
metrics = metric._compute(
                  predictions=[predicted_segmentation_map],
                  references=[label_gt_map],
                  num_labels=len(id2label),
                  ignore_index=255,
                  reduce_labels=False, # we've already reduced the labels ourselves
              )
metrics.keys()

In [None]:
# print overall metrics
for key in list(metrics.keys())[:3]:
  print(key, metrics[key])

# pretty-print per category metrics as Pandas DataFrame
metric_table = dict()
for id, label in id2label.items():
    metric_table[label] = [
                           metrics["per_category_iou"][id],
                           metrics["per_category_accuracy"][id]
    ]

print("---------------------")
print("per-category metrics:")
pd.DataFrame.from_dict(metric_table, orient="index", columns=["IoU", "accuracy"])

### Save predictions

In [None]:
# image = Image.fromarray(np.array(semantic_segmentation).astype(np.uint8))
image = Image.fromarray(np.array(predicted_segmentation_map).astype(np.uint8))

# Save the image as a JPG
out_prediction = os.path.join(out_folder,os.path.basename(label_gt).replace('jpg','png'))
image.save(out_prediction)

## Put together and do for all validation images

In [None]:
predictions=[]
gt_labels=[]
for image_file, label_gt in zip(image_files, label_files):

    assert os.path.splitext(os.path.basename(image_file))[0] == os.path.splitext(os.path.basename(label_gt))[0], \
    f"Image file '{image_file}' and label file '{label_gt}' do not match!"

    image = Image.open(image_file)
    encoded_inputs = processor(images=image, task_inputs=["semantic"], return_tensors="pt",size=(512,512))
    processor.tokenizer.batch_decode(encoded_inputs.task_inputs)
    # forward pass
    with torch.no_grad():
        outputs = model(**encoded_inputs)
    predicted_segmentation_map = processor.post_process_semantic_segmentation(outputs, target_sizes=[(image.size[1],image.size[0])])[0]

    # Save prediction image as a JPG
    image_predicted = Image.fromarray(np.array(predicted_segmentation_map).astype(np.uint8))
    out_prediction = os.path.join(out_folder,os.path.basename(label_gt).replace('jpg','png'))
    image_predicted.save(out_prediction)

    label_gt_map = Image.open(label_gt)
    # convert map to NumPy array
    label_gt_map = np.array(label_gt_map)

    predictions.append(predicted_segmentation_map.flatten())
    gt_labels.append(label_gt_map.flatten())


## Evaluate - IoU by category

In [None]:
# metric expects a list of numpy arrays for both predictions and references
metric = evaluate.load("mean_iou")
metrics = metric._compute(
                predictions=predictions,
                references=gt_labels,
                num_labels=len(id2label),
                ignore_index=255,
                reduce_labels=False, # we've already reduced the labels ourselves
            )

# pretty-print per category metrics as Pandas DataFrame
metric_table = dict()
for id, label in id2label.items():
    metric_table[label] = [
                        metrics["per_category_iou"][id],
                        metrics["per_category_accuracy"][id]
    ]

print("Mean IoU:",metrics["mean_iou"])
print("Mean accuracy:",metrics["mean_accuracy"])
print("---------------------")
print("per-category metrics:")
pd.DataFrame.from_dict(metric_table, orient="index", columns=["IoU", "accuracy"])

## Evaluate - percent of samples with IoU>50%

In [None]:
# Compute IoU and filter for IoU >= 50% for each sample
ious_per_sample = []
for i in range(len(gt_labels)):
    results = metric._compute(references=[gt_labels[i]], predictions=[predictions[i]], num_labels=len(id2label),ignore_index=255,)
    ious_per_sample.append(results['per_category_iou'])

# Calculate percentage of IoU values higher than a threshold (e.g., 50%) for each sample
iou_threshold = 0.2
ious_per_category = np.array(ious_per_sample)
percent_iou_above_threshold_per_category = (ious_per_category >= iou_threshold).sum(axis=0) / len(gt_labels) * 100
percent_iou_above_threshold_per_category