In [None]:
import os
import cv2 
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from skimage import transform
from dataset_loader import DatasetLoader
from annotations_loader import AnnotationsLoader

def display_img(img, BGR=False):
    if isinstance(img, str):
        display_img(cv2.imread(img))
        return
    plt.figure(dpi=150)
    if BGR:
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    else:
        plt.imshow(np.array(img))

In [None]:
videos_loader = DatasetLoader()
# video = videos_ds[(ds, video_name)]
video = videos_loader[('MS', '2016-09-16_IDMSSM28_BOSCC_vid')]
video_name = os.path.basename(video.filepath).split('.')[0]
ds = videos_loader.current_ds

home_dir = '/proj/brizk/output/'
faces_dir = os.path.join(*[home_dir, 'retinaface'])
attention_dir = os.path.join(*[home_dir, 'attentiontarget'])

annotations = AnnotationsLoader(
    ds, video_name, faces_dir=faces_dir, attention_dir=attention_dir
)

### Retriving one image frame and its annotations

In [None]:
annotation = next(annotations)
df = annotations.faces_file
df['left'] -= (df['right']-df['left'])*0.1
df['right'] += (df['right']-df['left'])*0.1
df['top'] -= (df['bottom']-df['top'])*0.1
df['bottom'] += (df['bottom']-df['top'])*0.1

df.head(10)

In [None]:
print('current frame before query', video.current_frame_num)
frame_num = annotation['frame']
frame_img = cv2.cvtColor(video[frame_num], cv2.COLOR_BGR2RGB)
print('queried frame_num', frame_num)
print('current frame', video.current_frame_num)
height, width, _ = frame_img.shape

faces_per_frame = annotation['faces'].reset_index()
attention_per_frame = annotation['attention']
head_boxes = []
for i in faces_per_frame.index:
    head_boxes.append(np.array(
        [faces_per_frame.loc[i,'left'], faces_per_frame.loc[i,'top'],
         faces_per_frame.loc[i,'right'], faces_per_frame.loc[i,'bottom']]
    ).astype(np.int32))

head_box = head_boxes[0]
print(head_boxes)
display_img(frame_img)

### Visualizing attention target over one image

In [None]:
# src_dir = f'/proj/brizk/output/attentiontarget/{ds}/{video_name}.pkl'
# with open(src_dir, 'rb') as f:    
#     attention_target_obj = pickle.load(f)   
    
i = 0
observation_coordinates = attention_per_frame[i]['observation_coordinates']
raw_hm = attention_per_frame[i]['raw_hm']
inout = attention_per_frame[i]['inout']
norm_map = transform.resize(raw_hm, (height, width)) - inout


cv2.rectangle(frame_img, (head_box[0], head_box[1]), (head_box[2], head_box[3]), (0, 255, 0), 4)
cv2.circle(frame_img, observation_coordinates, int(height/50.0), (0, 255, 0), 4)
print('Observation is at', observation_coordinates)

display_img(frame_img)
ax = plt.gca()
rect = patches.Rectangle((head_box[0], head_box[1]), head_box[2]-head_box[0], head_box[3]-head_box[1], linewidth=2, edgecolor=(0,1,0), facecolor='none')
ax.add_patch(rect)

circ = patches.Circle((observation_coordinates[0], observation_coordinates[1]), height/50.0, facecolor=(0,1,0), edgecolor='none')
ax.add_patch(circ)
plt.plot((observation_coordinates[0],(head_box[0]+head_box[2])/2), (observation_coordinates[1],(head_box[1]+head_box[3])/2), '-', color=(0,1,0,1))

plt.imshow(norm_map, cmap = 'jet', alpha=0.2, vmin=0, vmax=255)

### Generating Bound Box from Heatmap using Numpy and CV2 only

In [None]:
img_copy = frame_img.copy()

min = np.min(norm_map)
max = np.max(norm_map)
print(f'norm_map min {min} max {max}')
# Grayscale then Otsu's threshold
gray = norm_map.copy()
gray[gray < 0] = 0
print(gray.shape)
gray = gray.astype(np.uint8)
print(gray.shape)
print(f'unint8 conversion - min {np.min(gray)} max {np.max(gray)}')
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
print(f'thresh - min {np.min(thresh)} max {np.max(thresh)}')
print(thresh.shape)


# Find contours
cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    print('contour shape', c.shape)
    x,y,w,h = cv2.boundingRect(c)
    cv2.rectangle(thresh, (x, y), (x + w, y + h), (155, 0,0), 10)
    cv2.rectangle(img_copy, (x, y), (x + w, y + h), (155,0,0), 10)
    print('rect bounds', x, y, w, h)

display_img(thresh, BGR=True)
display_img(img_copy)
display_img(norm_map, BGR=True)

### Generating Bound Box from Heatmap using /proj/rash/CAM-Python/BBoxGenerator

In [None]:
## Here is the code to generate the bounding box from the heatmap
# 
# to reproduce the ILSVRC localization result, you need to first generate
# the heatmap for each testing image by merging the heatmap from the
# 10-crops (it is exactly what the demo code is doing), then resize the merged heatmap back to the original size of
# that image. Then use this bbox generator to generate the bbox from the resized heatmap.
#
# The source code of the bbox generator is also released. Probably you need
# to install the correct version of OpenCV to compile it.
#
# Special thanks to Hui Li for helping on this code.
#
# Bolei Zhou, April 19, 2016

import os
import numpy as np
import cv2

import sys
REPO_PATH = '/proj/rash/CAM-Python'
sys.path.append(REPO_PATH)

from py_map2jpg import py_map2jpg

def im2double(im):
	return cv2.normalize(im.astype('float'), None, 0.0, 1.0, cv2.NORM_MINMAX)

bbox_threshold = [20, 100, 110] # parameters for the bbox generator
curParaThreshold = str(bbox_threshold[0])+' '+str(bbox_threshold[1])+' '+str(bbox_threshold[2])+' '
curHeatMapFile = 'heatmap_6.jpg'
curBBoxFile = 'heatmap_6.txt'

cv2.imwrite(curHeatMapFile, norm_map)

os.system(REPO_PATH + "/bboxgenerator/./dt_box "+curHeatMapFile+' '+curParaThreshold+' '+curBBoxFile)

with open(curBBoxFile) as f:
	for line in f:
		items = [int(x) for x in line.strip().split()]

boxData1 = np.array(items[0::4]).T
boxData2 = np.array(items[1::4]).T
boxData3 = np.array(items[2::4]).T
boxData4 = np.array(items[3::4]).T

boxData_formulate = np.array([boxData1, boxData2, boxData1+boxData3, boxData2+boxData4]).T

col1 = np.min(np.array([boxData_formulate[:,0], boxData_formulate[:,2]]), axis=0)
col2 = np.min(np.array([boxData_formulate[:,1], boxData_formulate[:,3]]), axis=0)
col3 = np.max(np.array([boxData_formulate[:,0], boxData_formulate[:,2]]), axis=0)
col4 = np.max(np.array([boxData_formulate[:,1], boxData_formulate[:,3]]), axis=0)

boxData_formulate = np.array([col1, col2, col3, col4]).T

curHeatMap = cv2.imread(curHeatMapFile)
curImg = frame_img

curHeatMap = im2double(curHeatMap)
curHeatMap = py_map2jpg(curHeatMap, None, 'jet')
curHeatMap = im2double(curImg)*0.2+im2double(curHeatMap)*0.7

for i in range(boxData_formulate.shape[0]): # for each bbox
	print(boxData_formulate[i][:2])
	print(boxData_formulate[i][2:])
	cv2.rectangle(curHeatMap, tuple(boxData_formulate[i][:2]), tuple(boxData_formulate[i][2:]), (255,0,0), 3)
	display_img(curHeatMap)

### Visualizing all attentions of one frame in single image

In [None]:
annotation = annotations[5000]

print('current frame before query', video.current_frame_num)
frame_num = annotation['frame']
frame_img = cv2.cvtColor(video[frame_num], cv2.COLOR_BGR2RGB)
print('queried frame_num', frame_num)
print('current frame', video.current_frame_num)
height, width, _ = frame_img.shape

faces_per_frame = annotation['faces'].reset_index()
attention_per_frame = annotation['attention']


display_img(frame_img)
for i in faces_per_frame.index:
    head_box = np.array(
        [faces_per_frame.loc[i,'left'], faces_per_frame.loc[i,'top'],
         faces_per_frame.loc[i,'right'], faces_per_frame.loc[i,'bottom']]
    ).astype(np.int32)
    
    observation_coordinates = attention_per_frame[i]['observation_coordinates']
    raw_hm = attention_per_frame[i]['raw_hm']
    inout = attention_per_frame[i]['inout']
    norm_map = transform.resize(raw_hm, (height, width)) - inout

    face_center = [head_box[2] - head_box[0], head_box[3] - head_box[1]]
    
    
     
    cv2.rectangle(frame_img, (head_box[0], head_box[1]), (head_box[2], head_box[3]), (0, 255, 0), 4)
    cv2.circle(frame_img, observation_coordinates, int(height/50.0), (0, 255, 0), 4)
    print('Observation is at', observation_coordinates)

    ax = plt.gca()
    rect = patches.Rectangle((head_box[0], head_box[1]), head_box[2]-head_box[0], head_box[3]-head_box[1], linewidth=2, edgecolor=(0,1,0), facecolor='none')
    ax.add_patch(rect)

    circ = patches.Circle((observation_coordinates[0], observation_coordinates[1]), height/50.0, facecolor=(0,1,0), edgecolor='none')
    ax.add_patch(circ)
    
    plt.imshow(norm_map, cmap = 'jet', alpha=0.2, vmin=0, vmax=255)
    # Grayscale then Otsu's threshold
    gray = norm_map.copy()
    gray[gray < 0] = 0
    gray = gray.astype(np.uint8)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    # Find contours
    cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        # print('contour shape', c.shape)
        x,y,w,h = cv2.boundingRect(c)
        cv2.rectangle(frame_img, (x, y), (x + w, y + h), (255,0 ,0), 10)
        print('rect bounds', x, y, w, h)
        
    plt.plot((observation_coordinates[0],(head_box[0]+head_box[2])/2), (observation_coordinates[1],(head_box[1]+head_box[3])/2), '-', color=(0,1,0,1))
display_img(frame_img)

### Looping over frames annotations (Targeting only at least of 2 attentions)

In [None]:

display_img(frame_img)
for i in faces_per_frame.index:
    head_box = np.array(
        [faces_per_frame.loc[i,'left'], faces_per_frame.loc[i,'top'],
         faces_per_frame.loc[i,'right'], faces_per_frame.loc[i,'bottom']]
    ).astype(np.int32)
    
    observation_coordinates = attention_per_frame[i]['observation_coordinates']
    raw_hm = attention_per_frame[i]['raw_hm']
    inout = attention_per_frame[i]['inout']
    norm_map = transform.resize(raw_hm, (height, width)) - inout

    face_center = [head_box[2] - head_box[0], head_box[3] - head_box[1]]
    
    
     
    cv2.rectangle(frame_img, (head_box[0], head_box[1]), (head_box[2], head_box[3]), (0, 255, 0), 4)
    cv2.circle(frame_img, observation_coordinates, int(height/50.0), (0, 255, 0), 4)
    print('Observation is at', observation_coordinates)

    ax = plt.gca()
    rect = patches.Rectangle((head_box[0], head_box[1]), head_box[2]-head_box[0], head_box[3]-head_box[1], linewidth=2, edgecolor=(0,1,0), facecolor='none')
    ax.add_patch(rect)

    circ = patches.Circle((observation_coordinates[0], observation_coordinates[1]), height/50.0, facecolor=(0,1,0), edgecolor='none')
    ax.add_patch(circ)
    
    plt.imshow(norm_map, cmap = 'jet', alpha=0.2, vmin=0, vmax=255)
    # Grayscale then Otsu's threshold
    gray = norm_map.copy()
    gray[gray < 0] = 0
    gray = gray.astype(np.uint8)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    # Find contours
    cnts = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        # print('contour shape', c.shape)
        x,y,w,h = cv2.boundingRect(c)
        cv2.rectangle(frame_img, (x, y), (x + w, y + h), (255,0 ,0), 10)
        print('rect bounds', x, y, w, h)
        
    plt.plot((observation_coordinates[0],(head_box[0]+head_box[2])/2), (observation_coordinates[1],(head_box[1]+head_box[3])/2), '-', color=(0,1,0,1))
display_img(frame_img)