# Install package

In [None]:
!pip install -r requirements.txt

# Import package

In [None]:
import torch
import torchvision
import numpy as np
import cv2
import os
import glob
from yolox import models
from yolox.utils import *
from yolox.data import data_augment
from torchvision import transforms as T
from tqdm import tqdm
from d_close import DCLOSE
from tool import *

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'  # check device is using cuda or just using cpu

# D-CLOSE for YOLOX model

In [None]:
# load yolox model
model = models.yolox_l(pretrained=True)
model.eval()
transform = data_augment.ValTransform(legacy=False)

In [None]:
# read image
img_path = './images/000000504635.jpg'
output = './results/yolox'
org_img = cv2.imread(img_path)
org_img = cv2.cvtColor(org_img, cv2.COLOR_BGR2RGB)
h, w, c = org_img.shape
# preprocess image
ratio = min(640 / h, 640 / w)
img, _ = transform(org_img, None, (640, 640))
img = torch.from_numpy(img).unsqueeze(0).float()
img_np = img.squeeze().numpy().transpose(1, 2, 0).astype(np.uint8)
name_img = img_path.split('/')[-1].split('.')[0]

dclose = DCLOSE(arch="yolox", model=model, img_size=(640, 640), n_samples=4000)
# forward image
with torch.no_grad():  
    out = model(img.to(device))
    box, index = postprocess(out, num_classes=80, conf_thre=0.25, nms_thre=0.45, class_agnostic=True)
    box = box[0]
    rs = dclose(img, box)
np.save(f'{name_img}.npy', rs)
if not os.path.exists(output):
    os.makedirs(output)
output = os.path.join(output, name_img)
# visual results
visual(img_np, rs, box.cpu(), arch="yolox", save_file=output)

# D-CLOSE for Faster-RCNN model

In [None]:
# load model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval().to(device)

# read image
img_path = './images/000000504635.jpg'
output = './results/faster_rcnn'
transform = T.Compose([T.ToTensor()])
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
org_h, org_w, _ = img.shape
# preprocess image
img = transform(img)
img_np = img.numpy().transpose(1,2,0)
img_np = (255 * img_np).astype(np.uint8)
name_img = img_path.split('/')[-1].split('.')[0]

dclose = DCLOSE(arch="faster-rcnn", model=model, img_size=(img.shape[1:]), n_samples=4000)
# forward image
prediction = model([img.to(device)])
box  = get_prediction(prediction, 0.8)
rs = dclose(img, box)
# np.save(f'{name_img}.npy', rs)
if not os.path.exists(output):
    os.makedirs(output)
output = os.path.join(output, name_img)
# visual results
visual(img_np, rs, box, arch="faster_rcnn", save_file=output)

# Ground-truth of MS-COCO dataset

In [None]:
"""
Extract the class label and bounding box for each image in the MS-COCO validation dataset
Input: annotations_path (path to file annotations) 
Return: info_data: {"name_img": [[x1, y1, x2, y2], [x1', y1', x2', y2'],...]}
"""
from pycocotools.coco import COCO
annotations_path = './COCO/annotations/instances_val2017.json'
coco = COCO(annotations_path)
ids = coco.getImgIds()
info_data = dict()
class_ids = sorted(coco.getCatIds())
for id_ in ids:
    im_ann = coco.loadImgs(id_)[0]
    width = im_ann["width"]
    height = im_ann["height"]
    img_name = im_ann["file_name"]
    anno_ids = coco.getAnnIds(imgIds=[int(id_)], iscrowd=False)
    annotations = coco.loadAnns(anno_ids)
    objs = []
    for obj in annotations:
        x1 = np.max((0, obj["bbox"][0]))
        y1 = np.max((0, obj["bbox"][1]))
        x2 = np.min((width, x1 + np.max((0, obj["bbox"][2]))))
        y2 = np.min((height, y1 + np.max((0, obj["bbox"][3]))))
        if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
            obj["clean_bbox"] = [x1, y1, x2, y2]
            objs.append(obj)
    num_objs = len(objs)
    res = np.zeros((num_objs, 5))
    for ix, obj in enumerate(objs):
        cls = class_ids.index(obj["category_id"])
        res[ix, 0:4] = obj["clean_bbox"]
        res[ix, 4] = cls
    r = min(640 / height, 640 / width)
    res[:, :4] *= r
    info_data[img_name] = res

# Metric for YOLOX model

## Localization evaluation

In [None]:
from evaluation import metric, causal_metric, correspond_box
from yolox.utils import postprocess

# load yolox model
model = models.yolox_l(pretrained=True)
model.eval()
transform = data_augment.ValTransform(legacy=False)

# create array to save results
ebpg = np.zeros(80)
pg = np.zeros(80)
count = np.zeros(80)

# read image
img_path = './images/000000504635.jpg'
org_img = cv2.imread(img_path)
org_img = cv2.cvtColor(org_img, cv2.COLOR_BGR2RGB)
h, w, c = org_img.shape
ratio = min(640 / h, 640 / w)

# preprocess image
img, _ = transform(org_img, None, (640, 640))
img = torch.from_numpy(img).unsqueeze(0).float()
img_np = img.squeeze().numpy().transpose(1, 2, 0).astype(np.uint8)
file_name = img_path.split('/')[-1]
name_img = file_name.split('.')[0]

# forward image
with torch.no_grad():  
    out = model(img.to('cuda:0'))
    box, index = postprocess(out, num_classes=80, conf_thre=0.25, nms_thre=0.45, class_agnostic=True)
    box = box[0]
    # if box is None or len(info_data[file_name]) == 0:
    #     continue
    gt_box, idx_correspond = correspond_box(box.cpu().numpy(), info_data[file_name])
    # if len(gt_box) == 0:
    #     continue
    explanation_map = np.load(f'{name_img}.npy')
    ebpg_img, pg_img, count_img = metric(gt_box, explanation_map[idx_correspond,:,:])
    ebpg += ebpg_img
    pg += pg_img
    count += count_img
print("PG:", np.mean(pg[count!=0]/count[count!=0]))
print("EBPG:", np.mean(ebpg[count!=0]/count[count!=0]))

## Faithfulness Evaluation

In [None]:
from evaluation import causal_metric
from yolox.utils import postprocess

# create array to save results
del_auc = np.zeros(80)
ins_auc = np.zeros(80)
count = np.zeros(80)

# read image
img_path = './images/000000504635.jpg'
org_img = cv2.imread(img_path)
org_img = cv2.cvtColor(org_img, cv2.COLOR_BGR2RGB)
h, w, c = org_img.shape
ratio = min(640 / h, 640 / w)

# preprocess image
img, _ = transform(org_img, None, (640, 640))
img = torch.from_numpy(img).unsqueeze(0).float()
img_np = img.squeeze().numpy().transpose(1, 2, 0).astype(np.uint8)
file_name = img_path.split('/')[-1]
name_img = file_name.split('.')[0]

# forward image
with torch.no_grad():  
    out = model(img.to('cuda:0'))
    box, index = postprocess(out, num_classes=80, conf_thre=0.25, nms_thre=0.45, class_agnostic=True)
    box = box[0]
    # if box is None:
    #     continue
    explanation_map = np.load(f'{name_img}.npy')
    del_img, count_img = causal_metric(model, img_np, box, explanation_map, 'del', step = 2000)
    ins_img, count_img = causal_metric(model, img_np, box, explanation_map, 'ins', step = 2000)
    del_auc += del_img
    ins_auc += ins_img
    count += count_img
print("Deletion:", np.mean(del_auc[count!=0]/count[count!=0]))
print("Insertion:", np.mean(ins_auc[count!=0]/count[count!=0]))

# Evaluation with k-mean

In [None]:
# download MS-COCO validation dataset from link https://cocodataset.org/#download and put folder "val2017" in folder "COCO"
# ├── D-CLOSE/
# │   ├── COCO /
# │   │   ├── val2017 /
# │   │   │
# │   │   ├── annotations /
# │   │   │   
# │   │── ....
from sklearn.cluster import KMeans
X = []
for img_path in tqdm(sorted(glob.glob('./COCO/val2017/*.jpg'))):
    org_img = cv2.imread(img_path)
    org_img = cv2.cvtColor(org_img, cv2.COLOR_BGR2RGB)
    h, w, c = org_img.shape
    ratio = min(640 / h, 640 / w)
    img, _ = transform(org_img, None, (640, 640))
    img = torch.from_numpy(img).unsqueeze(0).float()
    img_np = img.squeeze().numpy().transpose(1, 2, 0).astype(np.uint8)
    
    with torch.no_grad():  
        out = model(img.to('cuda:0'))
        box, index = postprocess(out, num_classes=80, conf_thre=0.25, nms_thre=0.45, class_agnostic=True)
        box = box[0]
        if box is None:
          continue
        s = (box[:,3] - box[:,1])*(box[:,2] - box[:,0]) 
        [X.append([b.cpu().item()]) for b in s]      

In [None]:
kmeans = KMeans(n_clusters=3, random_state=0).fit(X)
kmeans.labels_

In [None]:
idx_1 = np.where(kmeans.labels_==1)
idx_0 = np.where(kmeans.labels_==0)
idx_2 = np.where(kmeans.labels_==2)
group_1 = np.array(X)[idx_1[0]]
group_0 = np.array(X)[idx_0[0]]
group_2 = np.array(X)[idx_2[0]]

In [None]:
"""
Label 0 (Group 0): Middle object group
Label 1 (Group 1): Small object group
Label 2 (Group 2): Large object group
"""
print(group_0.max(), group_1.max(), group_2.max())
print("Number of objects in Group 0:" , len(idx_0[0]))
print("Number of objects in Group 1:" , len(idx_1[0]))
print("Number of objects in Group 2:" , len(idx_2[0]))

## Example 

In [None]:
from evaluation import metric, causal_metric, correspond_box
from yolox.utils import postprocess

# load yolox model
model = models.yolox_l(pretrained=True)
model.eval()
transform = data_augment.ValTransform(legacy=False)

# create array to save results
ebpg = np.zeros((3, 80))
pg = np.zeros((3, 80))
count = np.zeros((3, 80))

# read image
img_path = './images/000000504635.jpg'
org_img = cv2.imread(img_path)
org_img = cv2.cvtColor(org_img, cv2.COLOR_BGR2RGB)
h, w, c = org_img.shape
ratio = min(640 / h, 640 / w)

# preprocess image
img, _ = transform(org_img, None, (640, 640))
img = torch.from_numpy(img).unsqueeze(0).float()
img_np = img.squeeze().numpy().transpose(1, 2, 0).astype(np.uint8)
file_name = img_path.split('/')[-1]
name_img = file_name.split('.')[0]

# forward image
with torch.no_grad():  
    out = model(img.to('cuda:0'))
    box, index = postprocess(out, num_classes=80, conf_thre=0.25, nms_thre=0.45, class_agnostic=True)
    box = box[0]
    # if box is None or len(info_data[file_name]) == 0:
    #     continue
    gt_box, idx_correspond = correspond_box(box.cpu().numpy(), info_data[file_name])
    # if len(gt_box) == 0:
    #     continue
    explanation_map = np.load(f'{name_img}.npy')
    for (j, i) in enumerate(idx_correspond):
        ebpg_img, pg_img, count_img = metric(gt_box[j][None,:], explanation_map[i][None,:])
        s = (box[i,3] - box[i,1])*(box[i,2] - box[i,0])
        group_idx = kmeans.predict([[s.cpu().item()]])
        ebpg[group_idx]+= ebpg_img
        pg[group_idx] += pg_img
        count[group_idx] += count_img
for i in range(3):
    print(f"PG (Group {i}):", np.mean(pg[i][count[i]!=0]/count[i][count[i]!=0]))
    print(f"EBPG (Group {i}):", np.mean(ebpg[i][count[i]!=0]/count[i][count[i]!=0]))