In [1]:
import sys,os,time,cv2
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

import ops
from resnet_tsm import resnet18 as resnet
from utils import get_dtstr, emo2txt, imread_to_rgb, crop_img
import face_recognition
import moviepy.editor as mpe

# restore net
net = resnet().cuda()
ckpt = torch.load(ops.weight_path+'/resnet18_tsm_weights.tar')
net.load_state_dict(ckpt['model_state_dict'])
net.eval()

# episode_path
DB_PATH = '/home/jhchoi/datasets2/friends/'
epi_sel = 'ep06'

# output path
OUT_PATH = '../data/friends_s01_'+epi_sel+'.jsonl'
out_json = open(OUT_PATH, 'w+')


In [2]:
num_f = len(os.listdir(os.path.join(DB_PATH, epi_sel)))

f_b_size = 4      # buffer size
f_interval = 5    # frame interval

for i in range(0, num_f, f_interval):
    sys.stdout.write("\r"+str(i)+'/'+str(num_f))
    
    # skip frame #0
    if i==0:
        continue
    
    # new frame buffer
    f_buffer = []
    for j in range(f_b_size):
        f_fname = str('%05d'%(i-f_b_size+j+1))+'.jpg'
        f_img = imread_to_rgb(os.path.join(DB_PATH, epi_sel, f_fname))
        f_buffer.append(f_img)
    
    # === process buffer
    # detect faces
    faces_coor = np.array(face_recognition.face_locations((f_buffer[-1]*255).astype(np.uint8))).astype(int)
    if faces_coor.ndim < 2:
        # no face, continue
        continue
    else:
        num_faces = faces_coor.shape[0]
    # refine coordinates as (xmin,ymin,xmax,ymax)
    faces_coor = np.array([faces_coor[:,3], faces_coor[:,0], faces_coor[:,1], faces_coor[:,2]])

    # extract faces and get emotions
    for j in range(num_faces):
        # get face crops from buffer
        coor = faces_coor[:,j]
        w = coor[2] - coor[0]
        h = coor[3] - coor[1]
        m = 0.5
        s = np.sqrt((w+(w+h)*m)*(h+(w+h)*m))

        f_crops = [crop_img(im, int(coor[0]+w*0.5), int(coor[1]+h*0.5), int(s), int(s), True) for im in f_buffer]
        f_batch = [cv2.resize(f_c, (224,224)) for f_c in f_crops]
        # net forward - get emotion
        f_batch = torch.Tensor(np.array(f_batch)).cuda()
        f_batch = f_batch.unsqueeze(0).permute(0,1,4,2,3)
        with torch.no_grad():
            f_emo = net(f_batch)[0,-2:,:].mean(0).argmax().detach().cpu().numpy()
        # write text
        f_emo_txt =  emo2txt(f_emo)
        json_txt = str('{"type": "emotion", "class": "%s", "seconds": %.3f, "coordinates": [%d,%d,%d,%d], "object": "%d"}\n'%\
                       (f_emo_txt, float(i)/24., coor[0],coor[1],coor[2],coor[3], j))
        out_json.write(json_txt)

out_json.close()

29905/29907