In [None]:
%matplotlib inline

import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image
from torchvision import transforms

import sys
REPO_PATH = '/proj/brizk/attention-target-detection'
sys.path.append(REPO_PATH)
from config import input_resolution, output_resolution
from utils import imutils, evaluation
from model import ModelSpatial
import torch

from scipy.misc import imresize
import matplotlib.pyplot as plt
import matplotlib.patches as patches


from dataset_loader import DatasetLoader, RetinafaceInferenceGenerator

def display_img(img):
    if isinstance(img, str):
        display_img(cv2.imread(img))
        return
    plt.figure(dpi=100)
    plt.imshow(np.array(img))
    # px.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)).show()

In [None]:
home_dir = '/proj/brizk/output/'
faces_dir = os.path.join(*[home_dir, 'retinaface'])
faces_files = RetinafaceInferenceGenerator(faces_dir)
ds, filename = next(faces_files)
(faces_dir, ds, filename)

In [None]:
video_name = filename.split(".")[0]

column_names = ['frame', 'confidence', 'left', 'top', 'right', 'bottom']
   
df = pd.read_csv(
    os.path.join(*[faces_dir, ds, filename]),
    header=None, names=column_names, usecols=range(6)
)

df['left'] -= (df['right']-df['left'])*0.1
df['right'] += (df['right']-df['left'])*0.1
df['top'] -= (df['bottom']-df['top'])*0.1
df['bottom'] += (df['bottom']-df['top'])*0.1

df.head()

In [None]:
videos_ds = DatasetLoader()
video = videos_ds[(ds, video_name)]

In [None]:
i = 1500
frame_num = df.loc[i, 'frame']
# print(frame_num)
# print(video.current_frame_num)
# while(video.current_frame_num < frame_num):
#     frame_img = next(video)
frame_img = video[frame_num]
print(video.current_frame_num)
frame_raw = Image.fromarray(cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB))
# frame_raw = Image.open(os.path.join(*[home_dir, 'imgs', 'MS' , video_name, str(frame_num) + ".jpg" ]))
width, height = frame_raw.size
frame_raw

In [None]:
def _get_transform():
    transform_list = []
    transform_list.append(transforms.Resize((input_resolution, input_resolution)))
    transform_list.append(transforms.ToTensor())
    transform_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))
    return transforms.Compose(transform_list)

In [None]:
# set up data transformation
test_transforms = _get_transform()

head_box = np.array([df.loc[i,'left'], df.loc[i,'top'], df.loc[i,'right'], df.loc[i,'bottom']]).astype(np.int32)
head_img = frame_raw.crop((head_box)) # head crop
head = test_transforms(head_img) # transform inputs
frame = test_transforms(frame_raw)
head_channel = imutils.get_head_box_channel(head_box[0], head_box[1], head_box[2], head_box[3], width, height,
                                            resolution=input_resolution).unsqueeze(0)
head = head.unsqueeze(0).cuda()
frame = frame.unsqueeze(0).cuda()
head_channel = head_channel.unsqueeze(0).cuda()

print(head.shape)
head_img


In [None]:
model = ModelSpatial()
model_dict = model.state_dict()
pretrained_dict = torch.load(os.path.join(REPO_PATH, 'model_demo.pt'))
pretrained_dict = pretrained_dict['model']
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)

model.cuda()
model.train(False)

In [None]:
with torch.no_grad():
    # forward pass
    raw_hm, _, inout = model(frame, head_channel, head)

    # # heatmap modulation
    raw_hm = raw_hm.cpu().detach().numpy() * 255
    raw_hm = raw_hm.squeeze()
    inout = inout.cpu().detach().numpy()
    inout = 1 / (1 + np.exp(-inout))
    inout = (1 - inout) * 255
    norm_map = imresize(raw_hm, (height, width)) - inout
    # pred_x, pred_y = evaluation.argmax_pts(raw_hm)
    # norm_p = [pred_x/output_resolution, pred_y/output_resolution]
    # observation_coordinates = tuple(map(int, (norm_p[0]*width, norm_p[1]*height)))

In [None]:
# plt.gca().set_axis_off()
# plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, 
#             hspace = 0, wspace = 0)
# plt.margins(0,0)

plt.figure(dpi=100)
plt.imshow(frame_raw)
ax = plt.gca()
rect = patches.Rectangle((head_box[0], head_box[1]), head_box[2]-head_box[0], head_box[3]-head_box[1], linewidth=2, edgecolor=(0,1,0), facecolor='none')
ax.add_patch(rect)

# if args.vis_mode == 'arrow':
# if inout < args.out_threshold: # in-frame gaze
pred_x, pred_y = evaluation.argmax_pts(raw_hm)
norm_p = [pred_x/output_resolution, pred_y/output_resolution]
circ = patches.Circle((norm_p[0]*width, norm_p[1]*height), height/50.0, facecolor=(0,1,0), edgecolor='none')
ax.add_patch(circ)
plt.plot((norm_p[0]*width,(head_box[0]+head_box[2])/2), (norm_p[1]*height,(head_box[1]+head_box[3])/2), '-', color=(0,1,0,1))

# plt.savefig(str(i) + "_out.jpg", dpi=1200)

# else:

plt.imshow(norm_map, cmap = 'jet', alpha=0.2, vmin=0, vmax=255)


# plt.show(block=False)
# plt.pause(0.2)


In [None]:
out_img = np.array(frame_raw)

cv2.rectangle(out_img, (head_box[0], head_box[1]), (head_box[2], head_box[3]), (0, 255, 0), 4)

pred_x, pred_y = evaluation.argmax_pts(raw_hm)
norm_p = [pred_x/output_resolution, pred_y/output_resolution]
observation_coordinates = tuple(map(int, (norm_p[0]*width, norm_p[1]*height)))
cv2.circle(out_img, observation_coordinates, int(height/50.0), (0, 255, 0), 4)

print('Observation is at', observation_coordinates)

display_img(out_img)
# cv2.applyColorMap(out_img, norm_map, cv2.COLORMAP_JET)

# norm_map[norm_map < 0] = 0
# cv2.addWeighted(out_img, 0.2, norm_map.astype(np.int32) , 0.1, 0)

# px.imshow(np.array(out_img)).show() 
# cv2.imwrite(str(frame_num) + "_out.jpg", cv2.cvtColor(out_img, cv2.COLOR_RGB2BGR))

In [None]:
o_x = observation_coordinates[0]
o_y = observation_coordinates[1]
print((o_x, o_y))
print(norm_map.shape)

step= 1
snippet = norm_map[o_x-step:o_x+step+1, o_y-step:o_y +step+1]
print(snippet.shape)
snippet

# NOTE!!!! I do not understand how come is the observation coordinates map to negative value in the norm_map
print(norm_map[o_y, o_x])


In [None]:
thres = 255/2.0
mask = norm_map.copy()
print(mask[o_y, o_x])

mask[mask < thres] = 0
mask = mask/255.0

mask = mask.reshape((mask.shape[0], mask.shape[1], 1))

# mask
display_img(frame_raw*mask)


# np.save('heatmap', raw_hm)

In [None]:
norm_map.shape

In [None]:
import pickle
src_dir = '/proj/brizk/output/attentiontarget/MS/2016-09-16_IDMSSM28_BOSCC_vid.pkl'
with open(src_dir, 'rb') as f:    
    attention_target_obj = pickle.load(f)   
    
observation_coordinates = attention_target_obj[0]['observation_coordinates']
raw_hm = attention_target_obj[0]['raw_hm']
norm_map = imresize(raw_hm, (height, width)) - inout

In [None]:
out_img = np.array(frame_raw)
cv2.rectangle(out_img, (head_box[0], head_box[1]), (head_box[2], head_box[3]), (0, 255, 0), 4)
cv2.circle(out_img, observation_coordinates, int(height/50.0), (0, 255, 0), 4)
print('Observation is at', observation_coordinates)

display_img(out_img)

In [None]:

plt.figure(dpi=100)
plt.imshow(frame_raw)
ax = plt.gca()
rect = patches.Rectangle((head_box[0], head_box[1]), head_box[2]-head_box[0], head_box[3]-head_box[1], linewidth=2, edgecolor=(0,1,0), facecolor='none')
ax.add_patch(rect)

circ = patches.Circle((observation_coordinates[0], observation_coordinates[1]), height/50.0, facecolor=(0,1,0), edgecolor='none')
ax.add_patch(circ)
plt.plot((observation_coordinates[0],(head_box[0]+head_box[2])/2), (observation_coordinates[1],(head_box[1]+head_box[3])/2), '-', color=(0,1,0,1))

plt.imshow(norm_map, cmap = 'jet', alpha=0.2, vmin=0, vmax=255)
