In [None]:
! rm -rf /kaggle/working/TransPatch

In [None]:
%cd /kaggle/working/

In [None]:
!git clone --branch ablations --single-branch --repo_name

In [None]:
cd /kaggle/working/TransPatch

## Loading Dependencies

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import sys
import gc
sys.path.append('/kaggle/working/TransPatch/')
from utils.utils import setup_logging, get_config_from_yaml, process_config, print_config
from dataset.cityscapes import Cityscapes
from metrics.performance import SegmentationMetric
from utils.helper import val_plot
from patch.create import Patch
from pretrained_models.ICNet.icnet import ICNet
from pretrained_models.BisNetV2.model import BiSeNetV2
from pretrained_models.PIDNet.model import PIDNet, get_pred_model


import pickle
from copy import deepcopy
from tqdm import tqdm
config = get_config_from_yaml('/kaggle/working/TransPatch/configs/config.yaml')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

## Preparing Dataloader

## Cityscapes

In [None]:
cityscape_val = Cityscapes(
          root = config.dataset.root,
          list_path = config.dataset.val,
          num_classes = config.dataset.num_classes,
          multi_scale = False,
          flip = False,
          ignore_label = config.train.ignore_label,
          base_size = config.train.base_size,
          crop_size = (config.train.height,config.train.width),
        )

val_dataloader = torch.utils.data.DataLoader(dataset=cityscape_val,
                                            batch_size=1,
                                            shuffle=False,
                                            num_workers=config.train.num_workers,
                                            pin_memory=config.train.pin_memory,
                                            drop_last=config.train.drop_last)

## BDD100k

In [None]:
# from dataset.bdd100k import BDD100K
# from torch.utils.data import Subset

# bdd_data = BDD100K(
#           root = "/kaggle/input/solesensei_bdd100k/bdd100k_seg/bdd100k/seg/",
#           list_path = config.other_dataset.val,
#           num_classes = 19,
#           use_color_labels=False,
#           multi_scale = False,
#           flip = False,
#           ignore_label = config.train.ignore_label,
#           base_size = config.train.base_size,
#           crop_size = (config.train.height,config.train.width),
#         )

# small_dataset = Subset(bdd_data, range(32))

# val_dataloader_bdd = torch.utils.data.DataLoader(dataset=small_dataset,
#                                             batch_size=4,
#                                             shuffle=False,
#                                             num_workers=config.train.num_workers,
#                                             pin_memory=config.train.pin_memory,
#                                             drop_last=config.train.drop_last)

# Loading Models

In [None]:
## PIDNet-s
model = torch.load('/kaggle/input/models/PIDNet_S_Cityscapes_test.pt',map_location=device)
pidnet_s = get_pred_model(name = 'pidnet_s', num_classes = 19).to(device)
if 'state_dict' in model:
    model = model['state_dict']
model_dict = pidnet_s.state_dict()
model = {k[6:]: v for k, v in model.items() # k[6:] to start after model. in key names
                    if k[6:] in model_dict.keys()}

pidnet_s.load_state_dict(model)
pidnet_s.eval()
print('PIDNet-s Model loaded')

## PIDNet-m
model = torch.load('/kaggle/input/models/PIDNet_M_Cityscapes_test.pt',map_location=device)
pidnet_m = get_pred_model(name = 'pidnet_m', num_classes = 19).to(device)
if 'state_dict' in model:
    model = model['state_dict']
model_dict = pidnet_m.state_dict()
model = {k[6:]: v for k, v in model.items() # k[6:] to start after model. in key names
                    if k[6:] in model_dict.keys()}

pidnet_m.load_state_dict(model)
pidnet_m.eval()
print('PIDNet-m Model loaded')

## PIDNet-l
try:
    model = torch.load('/kaggle/input/pidnet-l-weights/PIDNet_L_Cityscapes_test.pt',
                       map_location=device)
    pidnet_l = get_pred_model(name='pidnet_l', num_classes=19).to(device)
    if 'state_dict' in model:
        model = model['state_dict']
    model_dict = pidnet_l.state_dict()
    model = {k[6:]: v for k, v in model.items() if k[6:] in model_dict.keys()}
    pidnet_l.load_state_dict(model)
    pidnet_l.eval()
    print('PIDNet-L loaded')
except Exception as e:
    pidnet_l = None
    print(f'WARNING: PIDNet-L not loaded: {e}')

# ---- metrics for new models ----
metric_pidnet_l   = SegmentationMetric(config)

## ICNet
model = torch.load('/kaggle/input/icnet-wts/icnet_resnet50os8_cityscapes.pth',map_location=device)
icnet = ICNet(nclass = 19).to(device)
icnet.eval()
print('ICNet loaded')


## BISNetV1
model = torch.load('/kaggle/input/models/model_final_v1_city_new.pth',map_location=device)
bisenetv1 = BiSeNetV2(19,aux_mode = 'eval').to(device)
bisenetv1.load_state_dict(model, strict=False)
bisenetv1.eval()
print('BisNetV1 loaded')


## BiseNetV2
model = torch.load('/kaggle/input/models/model_final_v2_city.pth',map_location=device)
bisenetv2 = BiSeNetV2(19,aux_mode = 'eval').to(device)
bisenetv2.load_state_dict(model, strict=False)
bisenetv2.eval()
print('BisNetV2 loaded')


## segformer from huggingface
import os, io, contextlib
from transformers.utils import logging as hf_logging
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation

# (A) kill progress bars & tokenizers chatter
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# (B) silence HF logging completely
hf_logging.set_verbosity_error()           # or set_verbosity(hf_logging.CRITICAL)
hf_logging.disable_default_handler()       # prevent adding console handlers
hf_logging.enable_propagation()            # keep logs from re-adding handlers

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# (C) suppress any stdout/stderr printed during load (belt & suspenders)
with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()):
    processor = SegformerImageProcessor.from_pretrained(
        "nvidia/segformer-b5-finetuned-cityscapes-1024-1024"
    )
    segformer = SegformerForSemanticSegmentation.from_pretrained(
        "nvidia/segformer-b5-finetuned-cityscapes-1024-1024"
    )


# segformer = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-cityscapes-1024-1024").to(device)
# segformer.eval()
segformer.to(device).eval()
print("segformer loaded")

## Testing mIoU on clean image

In [None]:
mean_standard = np.array([0.485, 0.456, 0.406],dtype = np.float32)
std_standard = np.array([0.229, 0.224, 0.225],dtype = np.float32)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
metric_pidnet_l = SegmentationMetric(config)
metric_pidnet_m = SegmentationMetric(config)
metric_pidnet_s = SegmentationMetric(config)
metric_icnet = SegmentationMetric(config)
metric_bisenetv1 = SegmentationMetric(config)
metric_bisenetv2 = SegmentationMetric(config)
metric_segformer = SegmentationMetric(config)

data = {}
for i in range(1):
  # patch = patches[pat]
  metric_pidnet_l.reset()
  metric_pidnet_m.reset()
  metric_pidnet_s.reset()
  metric_icnet.reset()
  metric_bisenetv1.reset()
  metric_bisenetv2.reset()
  metric_segformer.reset()
  # print(f'Computing for: {pat}')
  temp = []
  for iter,batches in tqdm(enumerate(val_dataloader,0)):
    image_standard,label,_,_,idx = batches
    label_patched = deepcopy(label)
    ## adding patch
    image_standard, label_patched = image_standard.to(device), label_patched.to(device)
      
    # image_standard[:,:,y:y_end,x:x_end] = patch
    # no patching of label at inference time
    # label_patched[:,y1:y2,x1:x2] = 10

    ##PIDNet-l
    outputs = pidnet_l(image_standard)
    size = label.shape
    output = F.interpolate(
                  outputs[config.test.output_index_pidnet], size[-2:],
                  mode='bilinear', align_corners=True
                          )
    metric_pidnet_l.update(output, label_patched)
    
    ##PIDNet-m
    outputs = pidnet_m(image_standard)
    size = label.shape
    output = F.interpolate(
                  outputs[config.test.output_index_pidnet], size[-2:],
                  mode='bilinear', align_corners=True
                          )
    metric_pidnet_m.update(output, label_patched)

    ## PIDNet-s
    outputs = pidnet_s(image_standard)
    size = label.shape
    output = F.interpolate(
                  outputs[config.test.output_index_pidnet], size[-2:],
                  mode='bilinear', align_corners=True
                          )
    metric_pidnet_s.update(output, label_patched)

    ##ICNet
    outputs = icnet(image_standard)
    output = outputs[config.test.output_index_icnet]
    metric_icnet.update(output,label_patched)

    ##BiseNetV2
    outputs = bisenetv1(image_standard)
    output = outputs[config.test.output_index_bisenet]
    metric_bisenetv1.update(output,label_patched)
    
    ##BiseNetV2
    outputs = bisenetv2(image_standard)
    output = outputs[config.test.output_index_bisenet]
    metric_bisenetv2.update(output,label_patched)

    #Segformer
    segformer = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-cityscapes-1024-1024").to(device)
    segformer.eval()
    outputs = segformer(image_standard)
    del segformer
    output = F.interpolate(
                outputs.logits, size[-2:],
                mode='bilinear', align_corners=True
            )
    metric_segformer.update(output,label_patched)
    
    del outputs,output,image_standard,label,batches
    gc.collect()
    torch.cuda.empty_cache()

  data = [
      metric_pidnet_s.get()[1],
      metric_pidnet_m.get()[1],
      metric_pidnet_l.get()[1],
      metric_bisenetv1.get()[1],
      metric_bisenetv2.get()[1],
      metric_icnet.get()[1],
      metric_segformer.get()[1]
  ]
  print(data)

## Loading coordinates and patch

In [None]:
sidewalk_coords = pickle.load(open( "/kaggle/input/pole-val-coords/pole_val_coords.p", "rb" ))
pidnet_s_p = pickle.load(open( "/kaggle/input/patches-for-ablations-batch-1/Patches/segformer_patch_base.p", "rb" ))[0]
patches = {
    'pidnet_s':pidnet_s_p
}

# testing on patch location

In [None]:
mean_standard = np.array([0.485, 0.456, 0.406],dtype = np.float32)
std_standard = np.array([0.229, 0.224, 0.225],dtype = np.float32)
x = (2048 - 200)//2
y = (1024 - 200)//2
x_end = x + 200
y_end = y + 200
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
metric_pidnet_l = SegmentationMetric(config)
metric_pidnet_m = SegmentationMetric(config)
metric_pidnet_s = SegmentationMetric(config)
metric_icnet = SegmentationMetric(config)
metric_bisenetv1 = SegmentationMetric(config)
metric_bisenetv2 = SegmentationMetric(config)
metric_segformer = SegmentationMetric(config)

data = {}
for pat in ['pidnet_s']:
  patch = patches[pat]
  metric_pidnet_l.reset()
  metric_pidnet_m.reset()
  metric_pidnet_s.reset()
  metric_icnet.reset()
  metric_bisenetv1.reset()
  metric_bisenetv2.reset()
  metric_segformer.reset()
  print(f'Computing for: {pat}')
  temp = []
  for iter,batches in tqdm(enumerate(val_dataloader,0)):
    image_standard,label,_,_,idx = batches
    label_patched = deepcopy(label)

    ## adding patch
    sidewalk_coords[iter] = sidewalk_coords[iter].to(device)
    image_standard, label_patched = image_standard.to(device), label_patched.to(device)
    if(len(sidewalk_coords[iter])!=0):
        x1, y1, x2, y2 = sidewalk_coords[iter]
        #self.logger.info(f"(x1,y1,x2,y2):{x1,y1,x2,y2}, Idx:{idx}, Iter: {i_iter}")
        image_standard[:,:,y1:y2,x1:x2] = patch
        # label_patched[:,y1:y2,x1:x2] = 10
    
        ##PIDNet-l
        outputs = pidnet_l(image_standard)
        size = label.shape
        output = F.interpolate(
                      outputs[config.test.output_index_pidnet], size[-2:],
                      mode='bilinear', align_corners=True
                              )
        metric_pidnet_l.update(output, label_patched)
        
        #PIDNet-m
        outputs = pidnet_m(image_standard)
        size = label.shape
        output = F.interpolate(
                      outputs[config.test.output_index_pidnet], size[-2:],
                      mode='bilinear', align_corners=True
                              )
        metric_pidnet_m.update(output, label_patched)
    
        # PIDNet-s
        outputs = pidnet_s(image_standard)
        size = label.shape
        output = F.interpolate(
                      outputs[config.test.output_index_pidnet], size[-2:],
                      mode='bilinear', align_corners=True
                              )
        metric_pidnet_s.update(output, label_patched)
    
        #ICNet
        outputs = icnet(image_standard)
        output = outputs[config.test.output_index_icnet]
        metric_icnet.update(output,label_patched)

        #BiseNetV2
        outputs = bisenetv1(image_standard)
        output = outputs[config.test.output_index_bisenet]
        metric_bisenetv1.update(output,label_patched)
        
        #BiseNetV2
        outputs = bisenetv2(image_standard)
        output = outputs[config.test.output_index_bisenet]
        metric_bisenetv2.update(output,label_patched)

        ##Segformer
        segformer = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-cityscapes-1024-1024").to(device)
        segformer.eval()
        outputs = segformer(image_standard)
        del segformer
        output = F.interpolate(
                    outputs.logits, size[-2:],
                    mode='bilinear', align_corners=True
                )
        metric_segformer.update(output,label_patched)
        
        del outputs,output,image_standard,label,batches
        gc.collect()
        torch.cuda.empty_cache()
  data[pat] = [
      metric_pidnet_s.get()[1],
      metric_pidnet_m.get()[1],
      metric_pidnet_l.get()[1],
      metric_bisenetv1.get()[1],
      metric_bisenetv2.get()[1],
      metric_icnet.get()[1],
      metric_segformer.get()[1]
  ]
  print(data[pat])

## Testing by placing in center

In [None]:
mean_standard = np.array([0.485, 0.456, 0.406],dtype = np.float32)
std_standard = np.array([0.229, 0.224, 0.225],dtype = np.float32)
x = (2048 - 200)//2
y = (1024 - 200)//2
x_end = x + 200
y_end = y + 200
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
metric_pidnet_l = SegmentationMetric(config)
metric_pidnet_m = SegmentationMetric(config)
metric_pidnet_s = SegmentationMetric(config)
metric_icnet = SegmentationMetric(config)
metric_bisenetv1 = SegmentationMetric(config)
metric_bisenetv2 = SegmentationMetric(config)
metric_segformer = SegmentationMetric(config)

data = {}
for pat in ['pidnet_s']:
  patch = patches[pat]
  metric_pidnet_l.reset()
  metric_pidnet_m.reset()
  metric_pidnet_s.reset()
  metric_icnet.reset()
  metric_bisenetv1.reset()
  metric_bisenetv2.reset()
  metric_segformer.reset()
  print(f'Computing for: {pat}')
  temp = []
  for iter,batches in tqdm(enumerate(val_dataloader,0)):
    image_standard,label,_,_,idx = batches
    label_patched = deepcopy(label)
    ## adding patch
    image_standard, label_patched = image_standard.to(device), label_patched.to(device)
    image_standard[:,:,y:y_end,x:x_end] = patch
    # label_patched[:,y1:y2,x1:x2] = 10

    ##PIDNet-l
    outputs = pidnet_l(image_standard)
    size = label.shape
    output = F.interpolate(
                  outputs[config.test.output_index_pidnet], size[-2:],
                  mode='bilinear', align_corners=True
                          )
    metric_pidnet_l.update(output, label_patched)
    
    ##PIDNet-m
    outputs = pidnet_m(image_standard)
    size = label.shape
    output = F.interpolate(
                  outputs[config.test.output_index_pidnet], size[-2:],
                  mode='bilinear', align_corners=True
                          )
    metric_pidnet_m.update(output, label_patched)

    ## PIDNet-s
    outputs = pidnet_s(image_standard)
    size = label.shape
    output = F.interpolate(
                  outputs[config.test.output_index_pidnet], size[-2:],
                  mode='bilinear', align_corners=True
                          )
    metric_pidnet_s.update(output, label_patched)

    # ##ICNet
    # outputs = icnet(image_standard)
    # output = outputs[config.test.output_index_icnet]
    # metric_icnet.update(output,label_patched)

    ##BiseNetV2
    outputs = bisenetv1(image_standard)
    output = outputs[config.test.output_index_bisenet]
    metric_bisenetv1.update(output,label_patched)
    
    ##BiseNetV2
    outputs = bisenetv2(image_standard)
    output = outputs[config.test.output_index_bisenet]
    metric_bisenetv2.update(output,label_patched)

    ##Segformer
    segformer = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-cityscapes-1024-1024").to(device)
    segformer.eval()
    outputs = segformer(image_standard)
    del segformer
    output = F.interpolate(
                outputs.logits, size[-2:],
                mode='bilinear', align_corners=True
            )
    metric_segformer.update(output,label_patched)
    
    del outputs,output,image_standard,label,batches
    gc.collect()
    torch.cuda.empty_cache()

  data[pat] = [
      metric_pidnet_s.get()[1],
      metric_pidnet_m.get()[1],
      metric_pidnet_l.get()[1],
      metric_bisenetv1.get()[1],
      metric_bisenetv2.get()[1],
      # metric_icnet.get()[1],
      metric_segformer.get()[1]
  ]
  print(data[pat])

## Testing patch on coordinate placement

In [None]:
mean_standard = np.array([0.485, 0.456, 0.406],dtype = np.float32)
std_standard = np.array([0.229, 0.224, 0.225],dtype = np.float32)
x = (2048 - 200)//2
y = (1024 - 200)//2
x_end = x + 200
y_end = y + 200
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
metric_pidnet_l = SegmentationMetric(config)
metric_pidnet_m = SegmentationMetric(config)
metric_pidnet_s = SegmentationMetric(config)
metric_icnet = SegmentationMetric(config)
metric_bisenetv1 = SegmentationMetric(config)
metric_bisenetv2 = SegmentationMetric(config)
metric_segformer = SegmentationMetric(config)

data = {}
for pat in ['pidnet_s']:
  patch = patches[pat]
  metric_pidnet_l.reset()
  metric_pidnet_m.reset()
  metric_pidnet_s.reset()
  metric_icnet.reset()
  metric_bisenetv1.reset()
  metric_bisenetv2.reset()
  metric_segformer.reset()
  print(f'Computing for: {pat}')
  temp = []
  for iter,batches in tqdm.tqdm(enumerate(val_dataloader,0)):
    image_standard,label,_,_,idx = batches
    label_patched = deepcopy(label)

    ## adding patch
    sidewalk_coords[iter] = sidewalk_coords[iter].to(device)
    image_standard, label_patched = image_standard.to(device), label_patched.to(device)
    if(len(sidewalk_coords[iter])!=0):
        x1, y1, x2, y2 = sidewalk_coords[iter]
        #self.logger.info(f"(x1,y1,x2,y2):{x1,y1,x2,y2}, Idx:{idx}, Iter: {i_iter}")
        image_standard[:,:,y1:y2,x1:x2] = patch
        # label_patched[:,y1:y2,x1:x2] = 10
    
        ##PIDNet-l
        outputs = pidnet_l(image_standard)
        size = label.shape
        output = F.interpolate(
                      outputs[config.test.output_index_pidnet], size[-2:],
                      mode='bilinear', align_corners=True
                              )
        metric_pidnet_l.update(output, label_patched)
        
        #PIDNet-m
        outputs = pidnet_m(image_standard)
        size = label.shape
        output = F.interpolate(
                      outputs[config.test.output_index_pidnet], size[-2:],
                      mode='bilinear', align_corners=True
                              )
        metric_pidnet_m.update(output, label_patched)
    
        # PIDNet-s
        outputs = pidnet_s(image_standard)
        size = label.shape
        output = F.interpolate(
                      outputs[config.test.output_index_pidnet], size[-2:],
                      mode='bilinear', align_corners=True
                              )
        metric_pidnet_s.update(output, label_patched)
    
        #ICNet
        outputs = icnet(image_standard)
        output = outputs[config.test.output_index_icnet]
        metric_icnet.update(output,label_patched)

        #BiseNetV2
        outputs = bisenetv1(image_standard)
        output = outputs[config.test.output_index_bisenet]
        metric_bisenetv1.update(output,label_patched)
        
        #BiseNetV2
        outputs = bisenetv2(image_standard)
        output = outputs[config.test.output_index_bisenet]
        metric_bisenetv2.update(output,label_patched)

        ##Segformer
        segformer = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-cityscapes-1024-1024").to(device)
        segformer.eval()
        outputs = segformer(image_standard)
        del segformer
        output = F.interpolate(
                    outputs.logits, size[-2:],
                    mode='bilinear', align_corners=True
                )
        metric_segformer.update(output,label_patched)
        
        del outputs,output,image_standard,label,batches
        gc.collect()
        torch.cuda.empty_cache()
  data[pat] = [
      metric_pidnet_s.get()[1],
      metric_pidnet_m.get()[1],
      metric_pidnet_l.get()[1],
      metric_bisenetv1.get()[1],
      metric_bisenetv2.get()[1],
      # metric_icnet.get()[1],
      metric_segformer.get()[1]
  ]
  print(data[pat])