In [1]:
import sys,os,time,cv2
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

import ops
from resnet_tsm import resnet18 as resnet
from utils import get_dtstr, emo2txt, imread_to_rgb, crop_img
import face_recognition
import moviepy.editor as mpe

# restore net
net = resnet().cuda()
ckpt = torch.load(ops.weight_path+'/resnet18_tsm_weights.tar')
net.load_state_dict(ckpt['model_state_dict'])
net.eval()

# episode_path
DB_PATH = '/home/jhchoi/datasets2/friends/'
f_b_size = 4

# demo path
DEMO_PATH = '../../demo/'

def clear_cache(vid_imgs):
    for f in vid_imgs:
        os.remove(f)
    print 'done'
    return


In [25]:
epi_sel = 'ep04'

f_start_ms = (4,26)
f_duration = 5

f_start_sec = f_start_ms[0]*60 + f_start_ms[1]
f_end_sec = f_start_sec + f_duration

f_start = int(f_start_sec*23.976) - f_b_size
f_end   = int(f_end_sec*23.976)

vid_imgs = []
f_buffer = []
for i, f_i in enumerate(range(f_start, f_end)):
    sys.stdout.write("\r"+str(i+1)+'/'+str(f_end-f_start))
    f_fname = str('%05d'%f_i)+'.jpg'
    
    # wait frame buffer
    f_img = imread_to_rgb(os.path.join(DB_PATH, epi_sel, f_fname))
    f_buffer.append(f_img)
    if i>f_b_size-1:
        f_buffer.remove(f_buffer[0])
    else:
        continue
        
    # detect faces
    faces_coor = np.array(face_recognition.face_locations((f_img*255).astype(np.uint8))).astype(int)
    if faces_coor.ndim < 2:
        # no face, continue
        continue
    else:
        num_faces = faces_coor.shape[0]
    # refine coordinates as (xmin,ymin,xmax,ymax)
    faces_coor = np.array([faces_coor[:,3], faces_coor[:,0], faces_coor[:,1], faces_coor[:,2]])
    
    # draw bb for faces
    f_img_bb = f_img.copy()
    for j in range(num_faces):
        coor = faces_coor[:,j]
        f_img_bb = cv2.rectangle(f_img_bb, (coor[0], coor[1]), (coor[2], coor[3]), (1,1,0), 3)
    #cv2.imwrite(os.path.join(DEMO_PATH,f_fname), (f_img_bb[:,:,[2,1,0]]*255).astype(np.uint8))
    
    # extract faces and get emotions
    for j in range(num_faces):
        # get face crops from buffer
        coor = faces_coor[:,j]
        w = coor[2] - coor[0]
        h = coor[3] - coor[1]
        m = 0.5
        s = np.sqrt((w+(w+h)*m)*(h+(w+h)*m))

        f_crops = [crop_img(im, int(coor[0]+w*0.5), int(coor[1]+h*0.5), int(s), int(s), True) for im in f_buffer]
        f_batch = [cv2.resize(f_c, (224,224)) for f_c in f_crops]
        # net forward - get emotion
        f_batch = torch.Tensor(np.array(f_batch)).cuda()
        f_batch = f_batch.unsqueeze(0).permute(0,1,4,2,3)
        with torch.no_grad():
            #f_emo = net(f_batch).mean(dim=1)[0].argmax().detach().cpu().numpy()
            f_emo = net(f_batch)[0,-2:,:].mean(0).argmax().detach().cpu().numpy()
        # write text
        f_emo_txt =  emo2txt(f_emo)
        font_size = 2
        f_img_bb = cv2.putText(f_img_bb, f_emo_txt,(coor[0], coor[3]+font_size*12), cv2.FONT_HERSHEY_PLAIN, font_size, (1,1,0), 2)
        
    imf_name = os.path.join(DEMO_PATH,'temp',epi_sel+'_'+f_fname)
    vid_imgs.append(imf_name)
    cv2.imwrite(imf_name, (f_img_bb[:,:,[2,1,0]]*255).astype(np.uint8))
        

124/124

In [26]:
vid_clip = mpe.ImageSequenceClip(vid_imgs, fps=6)
vid_clip.write_videofile(os.path.join(DEMO_PATH, epi_sel+'_'+str(f_start)+'_'+str(f_end)+'.mp4'))


t:   0%|          | 0/117 [00:00<?, ?it/s, now=None]

Moviepy - Building video ../../demo/ep04_6373_6497.mp4.
Moviepy - Writing video ../../demo/ep04_6373_6497.mp4



                                                              

Moviepy - Done !
Moviepy - video ready ../../demo/ep04_6373_6497.mp4
