In [1]:
exp_name = 'dpl_016'

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
import torch
import torch.nn as nn
import torchvision
from torch.autograd import Variable
from torch.nn import functional as F

import numpy as np
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches

In [4]:
import pickle
import csv
import os
import time
import datetime
import lmdb

from skimage.feature import peak_local_max
import scipy
from scipy import ndimage as ndi
from multiprocessing import Pool

In [5]:
def save_obj(obj, name ):
    with open( name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open( name + '.pkl', 'rb') as f:
        return pickle.load(f)

now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
logfile = open(f'../../log/{exp_name}.txt', 'a')
logfile.write(f'\n\n\n\n ----------------- {now} ----------------- \n\n')
logfile.flush()
def log(string, stdout=True):
    if stdout:
        print(string)
    logfile.write(string + '\n')
    logfile.flush()

In [6]:
with open('../../metadata/ont_300/300-classes.csv', 'r') as fin:
    all_labels = [item.strip() for item in fin]
all_labels.append('background')
    
label2idx = {}
for i, l in enumerate(all_labels):
    label2idx[l] = i
    

In [7]:
lmdb_path = '../../data/OpenImages/lmdb/test-600.lmdb'
env = lmdb.open(lmdb_path, map_size=1e11, readonly=True, lock=False)
txn = env.begin(write=False)
cursor = txn.cursor()

In [8]:
with open('../../../model_fusion/temp/test_subset_ws_3.pkl', 'rb') as fin:
    test_img_ids = pickle.load(fin)

In [9]:
resnet = torchvision.models.resnet152() # resnet.eval() is very important, do not forget this line during testing!
print(resnet.fc)
resnet.fc = nn.Linear(2048, len(all_labels)-1)

resnet.avgpool = nn.AdaptiveAvgPool2d((1, 1))

#saved_state_dict = torch.load(os.path.join('data/snapshots/','d2_rel_open_1600.pth'))
saved_state_dict = torch.load(os.path.join('../../snapshots', 'train_019', 'ckpt_5000'))
resnet.load_state_dict(saved_state_dict)
resnet.cuda(0)
resnet.eval()
resnet.float()

Linear(in_features=2048, out_features=1000, bias=True)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [10]:
finalconv_name = 'layer4'

# hook the feature extractor
features_blobs = []
def hook_feature(module, input, output):
    features_blobs.append(output.data.cpu().numpy())

resnet._modules.get(finalconv_name).register_forward_hook(hook_feature)

# get the softmax weight
params = list(resnet.parameters())
weight_softmax = np.squeeze(params[-2].cpu().data.numpy())



In [11]:
def preproc(im):
    target_size = 256
    max_size = 1024
    im_size_min = np.min(im.shape[0:2])
    im_size_max = np.max(im.shape[0:2])
    im_scale = float(target_size) / float(im_size_min)
    # Prevent the biggest axis from being more than MAX_SIZE
    if np.round(im_scale * im_size_max) > max_size:
        im_scale = float(max_size) / float(im_size_max)
    im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
                    interpolation=cv2.INTER_LINEAR)    
    return im

In [12]:
batch_size = 1
model_output = {}
batch = []
batch_keys = []
img_shape = {}
for i, imgid in enumerate(test_img_ids):
    try:
        imgbin = txn.get(imgid.encode('utf-8'))
        imgbgr = cv2.imdecode(np.fromstring(imgbin, dtype='uint8'), cv2.IMREAD_COLOR)
        
        img_shape[imgid] = (imgbgr.shape[1], imgbgr.shape[0])
        
        imgbgr = preproc(imgbgr)
        imgrgb = imgbgr[:,:,[2,1,0]]
        # assert(imgrgb.shape[0] == 224 and imgrgb.shape[1] == 224 and imgrgb.shape[2] == 3)
    except Exception as ex:
        log(str(ex))
        continue
    
    imgrgb = imgrgb.astype(float)/255.0
    
    imgrgb[:,:,0] = (imgrgb[:,:,0] - 0.485)/0.229
    imgrgb[:,:,1] = (imgrgb[:,:,1] - 0.456)/0.224
    imgrgb[:,:,2] = (imgrgb[:,:,2] - 0.406)/0.225

    imgrgb = imgrgb.transpose((2,0,1))
    batch.append(imgrgb)
    batch_keys.append(imgid)
    
    if len(batch) == batch_size or (i + 1) == len(test_img_ids):
        batch = np.stack(batch)
    
        features_blobs = []
        
        with torch.no_grad():
            inp = torch.from_numpy(batch).float().cuda(0)
            outputs = resnet(inp)
            outputs = torch.cat([outputs, torch.ones(batch.shape[0], 1).cuda(0)], dim=1)
            h_x = F.softmax(outputs)#.data.squeeze()
            class_probs, class_idx = h_x.sort(1, True)
            class_idx = np.asarray(class_idx)
            class_probs = np.asarray(class_probs)
        
        for ii in range(len(batch_keys)):
            model_output[batch_keys[ii]] = {
                'features_blobs': features_blobs[0][ii],
                'sorted_labels': class_idx[ii],
                'sorted_probs': class_probs[ii],                
            }
        
        batch = []
        batch_keys = []
        log(f'Processed {i + 1} out of {len(test_img_ids)} images.')



Processed 1 out of 5657 images.
Processed 2 out of 5657 images.
Processed 3 out of 5657 images.
Processed 4 out of 5657 images.
Processed 5 out of 5657 images.
Processed 6 out of 5657 images.
Processed 7 out of 5657 images.
Processed 8 out of 5657 images.
Processed 9 out of 5657 images.
Processed 10 out of 5657 images.
Processed 11 out of 5657 images.
Processed 12 out of 5657 images.
Processed 13 out of 5657 images.
Processed 14 out of 5657 images.
Processed 15 out of 5657 images.
Processed 16 out of 5657 images.
Processed 17 out of 5657 images.
Processed 18 out of 5657 images.
Processed 19 out of 5657 images.
Processed 20 out of 5657 images.
Processed 21 out of 5657 images.
Processed 22 out of 5657 images.
Processed 23 out of 5657 images.
Processed 24 out of 5657 images.
Processed 25 out of 5657 images.
Processed 26 out of 5657 images.
Processed 27 out of 5657 images.
Processed 28 out of 5657 images.
Processed 29 out of 5657 images.
Processed 30 out of 5657 images.
Processed 31 out of

In [13]:
ioa_thr = 0.9
topk_boxes = 20
rel_peak_thr = 0.7
rel_rel_thr = 0.7


In [14]:
def postprocess(imgid):
    features_blob = model_output[imgid]['features_blobs']
    class_idx = model_output[imgid]['sorted_labels']
    class_probs = model_output[imgid]['sorted_probs']
    
    nc, h, w = features_blob.shape
    
    detections = []

    for ii in range(class_idx.shape[0]):
        if all_labels[class_idx[ii]] == 'background':
            break
        cam = weight_softmax[class_idx[ii]].dot(features_blob.reshape((nc, h*w)))
        cam = cam.reshape(h, w)
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)

        bboxes = []
        box_scores = []

        peak_coords = peak_local_max(cam, exclude_border=False, threshold_rel=rel_peak_thr)

        heat_resized = cv2.resize(cam, img_shape[imgid])
        
        peak_coords_resized = ((peak_coords + 0.5) * 
                               np.asarray([list(reversed(img_shape[imgid]))]) / 
                               np.asarray([[h, w]])
                              ).astype('int32')

        for pk_coord in peak_coords_resized:
            pk_value = heat_resized[tuple(pk_coord)]
            mask = heat_resized > pk_value * rel_rel_thr
            labeled, n = ndi.label(mask) 
            l = labeled[tuple(pk_coord)]
            yy, xx = np.where(labeled == l)
            min_x = np.min(xx)
            min_y = np.min(yy)
            max_x = np.max(xx)
            max_y = np.max(yy)
            bboxes.append((min_x, min_y, max_x, max_y))
            box_scores.append(pk_value * class_probs[ii])

        box_idx = np.argsort(-np.asarray(box_scores))
        box_idx = box_idx[:min(topk_boxes, len(box_scores))]
        bboxes = [bboxes[i] for i in box_idx]
        box_scores = [box_scores[i] for i in box_idx]

        to_remove = []
        for iii in range(len(bboxes)):
            for iiii in range(iii):
                if iiii in to_remove:
                    continue
                b1 = bboxes[iii]
                b2 = bboxes[iiii]
                isec = max(min(b1[2], b2[2]) - max(b1[0], b2[0]), 0) * max(min(b1[3], b2[3]) - max(b1[1], b2[1]), 0)
                ioa1 = isec / ((b1[2] - b1[0]) * (b1[3] - b1[1]))
                ioa2 = isec / ((b2[2] - b2[0]) * (b2[3] - b2[1]))
                if ioa1 > ioa_thr and ioa1 == ioa2:
                    to_remove.append(iii)
                elif ioa1 > ioa_thr and ioa1 >= ioa2:
                    to_remove.append(iii)
                elif ioa2 > ioa_thr and ioa2 >= ioa1:
                    to_remove.append(iiii)

        for i in range(len(bboxes)): 
            if i not in to_remove:
                detections.append({
                    'label': all_labels[class_idx[ii]],
                    'score': box_scores[i],
                    'bbox': bboxes[i],
                    'bbox_normalized': np.asarray([
                        bboxes[i][0] / heat_resized.shape[1],
                        bboxes[i][1] / heat_resized.shape[0],
                        bboxes[i][2] / heat_resized.shape[1],
                        bboxes[i][3] / heat_resized.shape[0],
                    ]),
                    'model': 'WS'
                })
    
    
    return imgid, detections
    

In [15]:
det_results = {}
cnt = 0
with Pool(20) as p:
    for i, res in enumerate(p.imap_unordered(postprocess, model_output.keys())):
        key, val = res
        det_results[key] = val
        cnt += 1
        if cnt % 100 == 0:
            print(f'Postprocessed {cnt} out of {len(model_output)} images.')


Postprocessed 100 out of 5656 images.
Postprocessed 200 out of 5656 images.
Postprocessed 300 out of 5656 images.
Postprocessed 400 out of 5656 images.
Postprocessed 500 out of 5656 images.
Postprocessed 600 out of 5656 images.
Postprocessed 700 out of 5656 images.
Postprocessed 800 out of 5656 images.
Postprocessed 900 out of 5656 images.
Postprocessed 1000 out of 5656 images.
Postprocessed 1100 out of 5656 images.
Postprocessed 1200 out of 5656 images.
Postprocessed 1300 out of 5656 images.
Postprocessed 1400 out of 5656 images.
Postprocessed 1500 out of 5656 images.
Postprocessed 1600 out of 5656 images.
Postprocessed 1700 out of 5656 images.
Postprocessed 1800 out of 5656 images.
Postprocessed 1900 out of 5656 images.
Postprocessed 2000 out of 5656 images.
Postprocessed 2100 out of 5656 images.
Postprocessed 2200 out of 5656 images.
Postprocessed 2300 out of 5656 images.
Postprocessed 2400 out of 5656 images.
Postprocessed 2500 out of 5656 images.
Postprocessed 2600 out of 5656 ima

In [16]:
save_obj(det_results, f'../../results/det_results_{exp_name}')