# CLIP + SAM
- Environment Setting
- CLIP + SAM Evaluation on People Pose
- Hyperparameter Finetuning

## Environment Setting

### Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Install Packages

In [None]:
!pip install torch opencv-python Pillow
!pip install git+https://github.com/openai/CLIP.git
!pip install git+https://github.com/facebookresearch/segment-anything.git

Collecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-1sp50ov5
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-1sp50ov5
  Resolved https://github.com/openai/CLIP.git to commit a1d071733d7111c9c014f024669f959182114e33
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ftfy (from clip==1.0)
  Downloading ftfy-6.1.3-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.4/53.4 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
Collecting wcwidth<0.3.0,>=0.2.12 (from ftfy->clip==1.0)
  Downloading wcwidth-0.2.12-py2.py3-none-any.whl (34 kB)
Building wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l[?25hdone
  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369500 sha256=21d5ec4a3b13a9de2e9cbd8855ca3257161448059dce479fd498bff4073ca161
  Stored in directory: /tmp/pip-ephem-w

Collecting git+https://github.com/facebookresearch/segment-anything.git
  Cloning https://github.com/facebookresearch/segment-anything.git to /tmp/pip-req-build-escjqplo
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/segment-anything.git /tmp/pip-req-build-escjqplo
  Resolved https://github.com/facebookresearch/segment-anything.git to commit 6fdee8f2727f4506cfbbe553e23b895e27956588
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: segment-anything
  Building wheel for segment-anything (setup.py) ... [?25l[?25hdone
  Created wheel for segment-anything: filename=segment_anything-1.0-py3-none-any.whl size=36588 sha256=34082fd11bd97918f2e2768ed43565ca8bd26ec5d73df9a36ae9d6f9833b09cf
  Stored in directory: /tmp/pip-ephem-wheel-cache-2sa_2x42/wheels/10/cf/59/9ccb2f0a1bcc81d4fbd0e501680b5d088d690c6cfbc02dc99d
Successfully built segment-anything
Installing collected packages: segment-anything
Successfully 

### Import Packages

In [None]:
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
import cv2
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt

from torch.jit import Error

# numpy metrics
import os
from tqdm import tqdm

import cv2
from segment_anything import build_sam, SamAutomaticMaskGenerator
from PIL import Image, ImageDraw
import clip

import sys
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor

### Image Processing, Data Processing

In [None]:
# Convert Mask's Boundary Box from XYWH to XYXY format
def convert_box_xywh_to_xyxy(box):
  x1 = box[0]
  y1 = box[1]
  x2 = box[0] + box[2]
  y2 = box[1] + box[3]
  if(box[2]==0 or box[3]==0):
    print(box[2],box[3],[x1, y1, x2, y2])
  return [x1, y1, x2, y2]

# Show Only the Segmented Part in the Given Image
def segment_image(image, segmentation_mask):
    image_array = np.array(image)
    segmented_image_array = np.zeros_like(image_array)
    segmented_image_array[segmentation_mask] = image_array[segmentation_mask]
    segmented_image = Image.fromarray(segmented_image_array)
    black_image = Image.new("RGB", image.size, (0, 0, 0))
    transparency_mask = np.zeros_like(segmentation_mask, dtype=np.uint8)
    transparency_mask[segmentation_mask] = 255
    transparency_mask_image = Image.fromarray(transparency_mask, mode='L')
    black_image.paste(segmented_image, mask=transparency_mask_image)
    return black_image

def gt_to_anns_of_label_mask(mask_gt):
  labels = np.unique(mask_gt)
  anns = []
  for label in labels:
    # skip background
      if label == 0:
          continue
      mask = np.all(mask_gt == label, axis=-1)
      anns.append({
        'area': np.sum(mask),
        'segmentation': mask,
        'label': label,
      })
  return anns

### Retrieve Similarity between Image and Text

In [None]:
@torch.no_grad()
def retriev(image_features: list, search_text: str) -> int:
    # preprocessed_images = [preprocess(image).to(device) for image in elements]
    tokenized_text = clip.tokenize([search_text]).to(device)
    # stacked_images = torch.stack(preprocessed_images)
    # image_features = model.encode_image(stacked_images)
    text_features = model.encode_text(tokenized_text)
    image_features /= image_features.norm(dim=-1, keepdim=True)
    text_features /= text_features.norm(dim=-1, keepdim=True)
    probs = 100. * image_features @ text_features.T
    return probs[:, 0].softmax(dim=0)

### CUDA

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Load CLIP

In [None]:
model, preprocess = clip.load("ViT-B/32", device=device)

100%|███████████████████████████████████████| 338M/338M [00:03<00:00, 92.8MiB/s]


### Load SAM (Default Params)

In [None]:
sam_checkpoint = "/content/drive/MyDrive/CSCI567/sam_vit_h_4b8939.pth"
model_type = "vit_h"

sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam = sam.to(device)

In [None]:
mask_generator_default = SamAutomaticMaskGenerator(sam)

### Load SAM (Best Params)

### Class Labels

In [None]:
LABELS = ["Background","Hat","Hair","Glove",
        "Sunglasses","UpperClothes","Dress","Coat","Socks","Pants",
        "Jumpsuits","Scarf","Skirt","Face","Left-arm","Right-arm","Left-leg","Right-leg","Left-shoe","Right-shoe"]

## CLIP + SAM Evaluation on People Pose

In [None]:
def compute_pix_acc(predicted, target):
    assert predicted.shape == target.shape
    assert len(predicted.shape) == 2
    return (predicted == target).mean()

def compute_IOU(predicted, target):
    assert predicted.shape == target.shape
    assert len(predicted.shape) == 2
    intersection = np.logical_and(target, predicted).sum()
    union = np.logical_or(target, predicted).sum()
    assert union > 0
    return intersection / union
def compute_metric(name, masks, label):
    """ name: data_id
        mask: { label_id: numpy.ndarray(shape=(H, W)) }
        label: np.ndarray(shape=(H, W)) --> numbers from 0 to 19
    """
    pix_acc_metric = { "name": name }
    iou_metric = { "name": name }
    empty = np.zeros_like(label)
    for i, label_name in enumerate(LABELS):
        mask_i = masks.get(i, empty)
        label_i = (label == i)
        if label_i.sum() == 0:
            # pandas dataframe automatically skips nan
            # when computing .count() and .mean()
            iou_metric[label_name] = np.nan
            pix_acc_metric[label_name] = np.nan
        else:
            iou_metric[label_name] = compute_IOU(mask_i, label_i)
            pix_acc_metric[label_name] = compute_pix_acc(mask_i, label_i)

    return iou_metric, pix_acc_metric

In [None]:
def evaluate(sam_generator, data_cnt=100):
  root = "/content/drive/MyDrive/CSCI567/segment-anything/datasets/people_poses/"
  prompt = "The object of "
  with open(os.path.join(root, f"val_id.txt"), 'r') as lf:
      data_list = [ s.strip() for s in lf.readlines() ]

  try:
    miou_table = []
    pix_acc_table = []
    for data_name in (pbar := tqdm(data_list[:data_cnt])):
      img_path = root +'val_images/' + data_name + '.jpg'
      seg_path = root + 'val_segmentations/' + data_name + '.png'
      # Read Image and Ground truth mask
      image = cv2.imread(img_path)
      if image is None:
          print("\nimage is None", data_name)
          continue
      image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
      mask_gt = cv2.imread(seg_path)
      if mask_gt is None:
          print("\nmask_gt is None", data_name)
          continue

      # Generate masks for all object by SAM
      masks = sam_generator.generate(image)

      # Cut out all masks
      input_img = Image.open(img_path)
      cropped_boxes = []

      for mask in masks:
        crop_box = convert_box_xywh_to_xyxy(mask['bbox'])
        if(crop_box[0]==crop_box[2] or crop_box[1]==crop_box[3]):
          continue
        cropped_boxes.append(segment_image(input_img, mask["segmentation"]).crop(crop_box))

      preprocessed_images = [preprocess(img).to(device) for img in cropped_boxes]
      stacked_images = torch.stack(preprocessed_images)
      image_features = model.encode_image(stacked_images)

      # Get Mask By Label Id
      anns = gt_to_anns_of_label_mask(mask_gt)
      img_miou_sum , img_pixacc_sum, num_class = 0, 0, len(anns)
      predict_masks = {}
      for ann in anns:
        scores = retriev(image_features, prompt+LABELS[ann['label']])
        ## Get Label Index with Highest Score
        predict_idx = np.argmax(scores.cpu())
        predict_idx = predict_idx.cpu()
        predict_masks[ann['label']] = masks[predict_idx]["segmentation"]
      miou, pix_acc = compute_metric(data_name, predict_masks, mask_gt[:,:,0])
      miou_table.append(miou)
      pix_acc_table.append(pix_acc)
    return miou_table, pix_acc_table

  except Exception as e:
    print(e)
    print(miou_table)
    print(pix_acc_table)

def export_csv(miou_table, pix_acc_table, miou_csv_name="random_miou.csv", pix_acc_csv_name="random_pix_acc.csv", export = True):
  miou_table_ = pd.DataFrame(miou_table, columns=miou_table[0].keys()).set_index('name')
  pix_acc_table_ = pd.DataFrame(pix_acc_table, columns=pix_acc_table[0].keys()).set_index('name')
  if export:
    miou_table_.to_csv('/content/drive/MyDrive/CSCI567/segment-anything/notebooks/'+miou_csv_name)
    pix_acc_table_.to_csv('/content/drive/MyDrive/CSCI567/segment-anything/notebooks/'+pix_acc_csv_name)


  # print('miou:\n', miou_table_.mean(axis=None))
  # print('miou per class:\n', miou_table_.mean())
  print()
  # print('pix_acc:\n', pix_acc_table_.mean(axis=None))
  # print('pix_acc per class:\n', pix_acc_table_.mean())
  return miou_table_.mean(), pix_acc_table_.mean()

In [None]:
# mask_generator_default = SamAutomaticMaskGenerator(
#     sam,
#     points_per_side = 32,
#     points_per_batch = 64,
#     pred_iou_thresh = 0.88,
#     stability_score_thresh = 0.95,
#     stability_score_offset = 1.0,
#     box_nms_thresh = 0.7,
#     crop_n_layers = 0,
#     crop_nms_thresh = 0.7,
#     crop_overlap_ratio = 512 / 1500,
#     crop_n_points_downscale_factor = 1,
#     point_grids = None,
#     min_mask_region_area = 0,
#     output_mode = "binary_mask"
#   )

### Evaluate on Default Sam Generator

In [None]:
miou_table, pix_acc_table = evaluate(mask_generator_default)

  3%|▎         | 3/100 [00:23<12:30,  7.74s/it]

2 0 [20, 98, 22, 98]


 33%|███▎      | 33/100 [04:08<08:13,  7.37s/it]

0 9 [60, 178, 60, 187]


 56%|█████▌    | 56/100 [06:54<05:04,  6.93s/it]

6 0 [29, 116, 35, 116]
6 0 [29, 116, 35, 116]
6 0 [29, 116, 35, 116]


 77%|███████▋  | 77/100 [09:27<02:48,  7.31s/it]

0 7 [68, 176, 68, 183]
0 7 [68, 176, 68, 183]


100%|██████████| 100/100 [12:12<00:00,  7.32s/it]


In [None]:
miou_mean_default_100, pixacc_mean_default_100 = export_csv(miou_table, pix_acc_table, miou_csv_name="random_miou_default_100.csv", pix_acc_csv_name="random_pix_acc_default_100.csv")
print('miou per class\n', miou_mean_default_100)


miou per class
 Background      0.000000
Hat             0.317580
Hair            0.264699
Glove           0.091455
Sunglasses      0.001587
UpperClothes    0.203190
Dress           0.000000
Coat            0.344618
Socks           0.076216
Pants           0.371332
Jumpsuits       0.009348
Scarf                NaN
Skirt           0.300369
Face            0.159994
Left-arm        0.172775
Right-arm       0.109500
Left-leg        0.222619
Right-leg       0.110006
Left-shoe       0.137264
Right-shoe      0.104864
dtype: float64


### Evaluate on Other Sams

#### 1: points_per_side

In [None]:
mask_generator_1 = SamAutomaticMaskGenerator(
    sam,
    points_per_side = 16,
    points_per_batch = 64,
    pred_iou_thresh = 0.88,
    stability_score_thresh = 0.95,
    stability_score_offset = 1.0,
    box_nms_thresh = 0.7,
    crop_n_layers = 0,
    crop_nms_thresh = 0.7,
    crop_overlap_ratio = 512 / 1500,
    crop_n_points_downscale_factor = 1,
    point_grids = None,
    min_mask_region_area = 0,
    output_mode = "binary_mask"
  )

In [None]:
miou_table_1, pix_acc_table_1 = evaluate(mask_generator_1)

 40%|████      | 40/100 [02:11<03:12,  3.20s/it]

3 0 [52, 144, 55, 144]


 56%|█████▌    | 56/100 [03:02<02:17,  3.11s/it]

0 9 [55, 75, 55, 84]
0 10 [55, 74, 55, 84]


100%|██████████| 100/100 [05:22<00:00,  3.23s/it]


In [None]:
miou_mean_1_100, pixacc_mean_1_100 = export_csv(miou_table_1, pix_acc_table_1, miou_csv_name="random_miou_1_100.csv", pix_acc_csv_name="random_pix_acc_1_100.csv", export = False)
# print('miou per class\n', miou_mean_1_100)
print("miou per class difference\n", miou_mean_1_100-miou_mean_default_100)


miou per class difference
 Background      0.000000
Hat            -0.031417
Hair           -0.016496
Glove          -0.070638
Sunglasses      0.062702
UpperClothes    0.020409
Dress           0.000000
Coat            0.008805
Socks           0.018756
Pants          -0.034392
Jumpsuits       0.000974
Scarf                NaN
Skirt           0.045313
Face            0.010937
Left-arm       -0.011097
Right-arm       0.026943
Left-leg       -0.012151
Right-leg       0.015227
Left-shoe       0.008581
Right-shoe      0.000299
dtype: float64


#### 2: points_per_side

In [None]:
mask_generator_2 = SamAutomaticMaskGenerator(
    sam,
    points_per_side = 64,
    points_per_batch = 64,
    pred_iou_thresh = 0.88,
    stability_score_thresh = 0.95,
    stability_score_offset = 1.0,
    box_nms_thresh = 0.7,
    crop_n_layers = 0,
    crop_nms_thresh = 0.7,
    crop_overlap_ratio = 512 / 1500,
    crop_n_points_downscale_factor = 1,
    point_grids = None,
    min_mask_region_area = 0,
    output_mode = "binary_mask"
  )

In [None]:
miou_table_2, pix_acc_table_2 = evaluate(mask_generator_2)

  0%|          | 0/100 [00:00<?, ?it/s]

6 0 [44, 248, 50, 248]


  3%|▎         | 3/100 [00:59<32:00, 19.80s/it]

2 0 [21, 79, 23, 79]
1 0 [21, 79, 22, 79]
2 0 [21, 79, 23, 79]


  8%|▊         | 8/100 [02:40<31:43, 20.69s/it]

0 2 [50, 165, 50, 167]
0 2 [50, 165, 50, 167]
0 2 [50, 165, 50, 167]
0 2 [50, 165, 50, 167]


  9%|▉         | 9/100 [02:58<30:09, 19.89s/it]

7 0 [81, 0, 88, 0]


 33%|███▎      | 33/100 [10:57<21:59, 19.69s/it]

0 9 [60, 178, 60, 187]
0 9 [60, 178, 60, 187]
0 9 [60, 178, 60, 187]
0 3 [141, 164, 141, 167]
0 3 [141, 164, 141, 167]


 40%|████      | 40/100 [13:11<18:54, 18.90s/it]

0.0 1.0 [39.0, 136.0, 39.0, 137.0]


 53%|█████▎    | 53/100 [17:18<14:51, 18.96s/it]

0 0 [9, 0, 9, 0]
0 0 [9, 0, 9, 0]


 55%|█████▌    | 55/100 [17:52<13:32, 18.04s/it]

5.0 0.0 [103.0, 8.0, 108.0, 8.0]
5.0 0.0 [103.0, 8.0, 108.0, 8.0]
5.0 0.0 [103.0, 8.0, 108.0, 8.0]


 56%|█████▌    | 56/100 [18:10<13:09, 17.94s/it]

0 9 [55, 75, 55, 84]
0 9 [55, 75, 55, 84]
0 9 [55, 75, 55, 84]
0 9 [55, 75, 55, 84]
0 10 [55, 74, 55, 84]
0 10 [55, 74, 55, 84]
0 9 [55, 75, 55, 84]
0 9 [55, 75, 55, 84]
0 10 [55, 74, 55, 84]


 58%|█████▊    | 58/100 [18:49<13:09, 18.79s/it]

2 0 [75, 180, 77, 180]
1 0 [15, 203, 16, 203]
0 2 [99, 117, 99, 119]


 59%|█████▉    | 59/100 [19:09<13:05, 19.15s/it]

0 12 [33, 51, 33, 63]
0 12 [33, 51, 33, 63]
0 3 [29, 59, 29, 62]
0 3 [29, 59, 29, 62]
0 3 [29, 59, 29, 62]


 70%|███████   | 70/100 [22:39<09:29, 19.00s/it]

2 0 [125, 61, 127, 61]
2 0 [125, 61, 127, 61]
3 0 [133, 61, 136, 61]
2 0 [138, 37, 140, 37]
3 0 [133, 61, 136, 61]
0 5 [94, 10, 94, 15]
0 5 [94, 10, 94, 15]
2 0 [138, 37, 140, 37]


 75%|███████▌  | 75/100 [24:15<07:58, 19.12s/it]

0 4 [0, 15, 0, 19]


 77%|███████▋  | 77/100 [24:54<07:23, 19.28s/it]

0 8 [123, 215, 123, 223]
0 8 [123, 215, 123, 223]
0 7 [68, 176, 68, 183]
0 7 [68, 176, 68, 183]
0 7 [73, 136, 73, 143]
0 7 [73, 136, 73, 143]
0 7 [73, 136, 73, 143]


 96%|█████████▌| 96/100 [30:54<01:17, 19.31s/it]

2 0 [100, 305, 102, 305]
2 0 [100, 305, 102, 305]


100%|██████████| 100/100 [32:08<00:00, 19.28s/it]


In [None]:
miou_mean_2_100, pixacc_mean_2_100 = export_csv(miou_table_2, pix_acc_table_2, miou_csv_name="random_miou_2_100.csv", pix_acc_csv_name="random_pix_acc_2_100.csv", export = False)
# print('miou per class\n', miou_mean_1_100)
print("miou per class difference\n", miou_mean_2_100-miou_mean_default_100)


miou per class difference
 Background      0.000000
Hat             0.011985
Hair           -0.005930
Glove          -0.059620
Sunglasses      0.023174
UpperClothes   -0.002102
Dress           0.000000
Coat           -0.029023
Socks           0.000088
Pants          -0.043421
Jumpsuits       0.000091
Scarf                NaN
Skirt           0.046035
Face           -0.031783
Left-arm        0.000494
Right-arm       0.010561
Left-leg        0.000318
Right-leg       0.026868
Left-shoe       0.019276
Right-shoe     -0.033610
dtype: float64


#### 3: some thresholds

In [None]:
mask_generator_3 = SamAutomaticMaskGenerator(
    sam,
    points_per_side = 32,
    points_per_batch = 64,
    pred_iou_thresh = 0.95,
    stability_score_thresh = 0.95,
    stability_score_offset = 1.0,
    box_nms_thresh = 0.9,
    crop_n_layers = 0,
    crop_nms_thresh = 0.9,
    crop_overlap_ratio = 512 / 1500,
    crop_n_points_downscale_factor = 1,
    point_grids = None,
    min_mask_region_area = 0,
    output_mode = "binary_mask"
  )

In [None]:
miou_table_3, pix_acc_table_3 = evaluate(mask_generator_3)

100%|██████████| 100/100 [15:17<00:00,  9.17s/it]


In [None]:
miou_mean_3_100, pixacc_mean_3_100 = export_csv(miou_table_3, pix_acc_table_3, miou_csv_name="random_miou_3_100.csv", pix_acc_csv_name="random_pix_acc_3_100.csv", export = False)
# print('miou per class\n', miou_mean_1_100)
print("miou per class difference\n", miou_mean_3_100-miou_mean_default_100)


miou per class difference
 Background      0.000000
Hat             0.020196
Hair           -0.020855
Glove          -0.027154
Sunglasses      0.000000
UpperClothes    0.026187
Dress           0.000000
Coat           -0.016488
Socks          -0.052758
Pants           0.011221
Jumpsuits       0.000000
Scarf                NaN
Skirt           0.000380
Face           -0.011301
Left-arm       -0.080770
Right-arm      -0.004838
Left-leg       -0.078534
Right-leg      -0.017251
Left-shoe      -0.040621
Right-shoe     -0.024334
dtype: float64


#### 4: Hyperparameters in the predictor notebook: https://colab.research.google.com/drive/12yvkr9VomnceYOdQ5dbdoaPIqXRiOPmb

In [None]:
mask_generator_4 = SamAutomaticMaskGenerator(
    sam,
    points_per_side = 32,
    points_per_batch = 64,
    pred_iou_thresh = 0.86,
    stability_score_thresh = 0.92,
    stability_score_offset = 1.0,
    box_nms_thresh = 0.7,
    crop_n_layers = 1,
    crop_nms_thresh = 0.7,
    crop_overlap_ratio = 512 / 1500,
    crop_n_points_downscale_factor = 2,
    point_grids = None,
    min_mask_region_area = 100,
    output_mode = "binary_mask"
  )

In [None]:
miou_table_4, pix_acc_table_4 = evaluate(mask_generator_4)

  0%|          | 0/100 [00:00<?, ?it/s]

6 0 [44, 248, 50, 248]


  1%|          | 1/100 [02:50<4:41:56, 170.88s/it]

0 6 [25, 81, 25, 87]


  3%|▎         | 3/100 [03:37<1:30:58, 56.27s/it]

2 0 [20, 98, 22, 98]


  4%|▍         | 4/100 [03:58<1:08:20, 42.71s/it]

0 2 [128, 54, 128, 56]


  8%|▊         | 8/100 [05:32<41:32, 27.09s/it]

0 2 [50, 165, 50, 167]
0 4 [50, 154, 50, 158]
0 2 [50, 165, 50, 167]


 23%|██▎       | 23/100 [11:15<29:12, 22.77s/it]

0 2 [19, 93, 19, 95]


 30%|███       | 30/100 [13:53<26:55, 23.08s/it]

3 0 [99, 399, 102, 399]


 31%|███       | 31/100 [14:15<26:10, 22.76s/it]

1 0 [59, 81, 60, 81]
0 1 [27, 152, 27, 153]


 33%|███▎      | 33/100 [15:00<25:12, 22.57s/it]

0 9 [60, 178, 60, 187]


 34%|███▍      | 34/100 [15:23<24:50, 22.59s/it]

0 7 [26, 344, 26, 351]


 35%|███▌      | 35/100 [15:46<24:35, 22.70s/it]

4 0 [82, 29, 86, 29]
1 0 [118, 105, 119, 105]


 36%|███▌      | 36/100 [16:07<23:53, 22.40s/it]

0 29 [215, 66, 215, 95]
0 29 [215, 66, 215, 95]


 40%|████      | 40/100 [17:41<22:53, 22.90s/it]

0 3 [23, 130, 23, 133]


 45%|████▌     | 45/100 [19:35<21:05, 23.01s/it]

2 0 [24, 173, 26, 173]


 48%|████▊     | 48/100 [20:43<19:41, 22.72s/it]

0 4 [26, 272, 26, 276]
0 4 [26, 272, 26, 276]
0 1 [76, 310, 76, 311]


 50%|█████     | 50/100 [21:27<18:41, 22.42s/it]

4 0 [29, 26, 33, 26]


 53%|█████▎    | 53/100 [22:35<17:33, 22.41s/it]

0 1 [35, 0, 35, 1]
0 1 [35, 9, 35, 10]
0 0 [3, 13, 3, 13]


 55%|█████▌    | 55/100 [23:18<16:35, 22.12s/it]

1 0 [163, 221, 164, 221]
1 0 [163, 221, 164, 221]


 56%|█████▌    | 56/100 [23:41<16:20, 22.27s/it]

4 0 [16, 134, 20, 134]
0 1 [65, 139, 65, 140]
6 0 [29, 116, 35, 116]
6 0 [29, 116, 35, 116]
6 0 [29, 116, 35, 116]
0 9 [55, 75, 55, 84]


 59%|█████▉    | 59/100 [24:48<15:17, 22.38s/it]

0 12 [33, 51, 33, 63]


 61%|██████    | 61/100 [25:34<14:41, 22.60s/it]

0 24 [22, 0, 22, 24]
0 21 [22, 0, 22, 21]
0 24 [22, 0, 22, 24]
0 24 [22, 0, 22, 24]
0 24 [22, 0, 22, 24]
0 24 [22, 0, 22, 24]
0 25 [22, 0, 22, 25]
0 26 [22, 0, 22, 26]
0 26 [22, 0, 22, 26]


 67%|██████▋   | 67/100 [27:49<12:25, 22.59s/it]

0 19 [22, 184, 22, 203]


 73%|███████▎  | 73/100 [30:04<10:05, 22.41s/it]

0 32 [63, 0, 63, 32]
0 32 [63, 0, 63, 32]


 75%|███████▌  | 75/100 [30:49<09:18, 22.34s/it]

0 4 [93, 170, 93, 174]


 77%|███████▋  | 77/100 [31:34<08:33, 22.31s/it]

0 7 [73, 136, 73, 143]
0 8 [32, 1, 32, 9]
0 7 [68, 176, 68, 183]
0 7 [68, 176, 68, 183]


 79%|███████▉  | 79/100 [32:18<07:46, 22.20s/it]

1 0 [37, 63, 38, 63]


 82%|████████▏ | 82/100 [33:24<06:38, 22.16s/it]

0 4 [32, 69, 32, 73]


 96%|█████████▌| 96/100 [38:38<01:30, 22.61s/it]

0 3 [33, 4, 33, 7]


100%|██████████| 100/100 [40:08<00:00, 24.09s/it]


In [None]:
miou_mean_4_100, pixacc_mean_4_100 = export_csv(miou_table_4, pix_acc_table_4, miou_csv_name="random_miou_4_100.csv", pix_acc_csv_name="random_pix_acc_4_100.csv")
print("miou per class difference\n", miou_mean_4_100-miou_mean_default_100)



miou per class difference
 Background      0.000000
Hat            -0.028639
Hair           -0.039319
Glove          -0.031491
Sunglasses      0.036205
UpperClothes   -0.042745
Dress           0.000000
Coat           -0.001568
Socks           0.009255
Pants          -0.041042
Jumpsuits      -0.007388
Scarf                NaN
Skirt          -0.178598
Face           -0.002003
Left-arm       -0.024950
Right-arm       0.014727
Left-leg        0.046555
Right-leg       0.024230
Left-shoe      -0.028566
Right-shoe      0.014470
dtype: float64
