In [1]:
import torch
from torch.utils.data import DataLoader 
import torchvision
import torch.nn as nn
import pandas as pd
import numpy as np
from tqdm import tqdm
import cv2

from utils.util import torch_fix_seed, get_video_name_list
from dataset import GazeImageList
from preprocess import L2CSNet_ImageTransform
from networks import L2CSNet_networks

In [2]:
torch_fix_seed()

In [3]:
dataset = GazeImageList(
    labels_path="/mnt/iot-qnap3/mochida/medical-care/emotionestimation/data/labels/PIMD_A/emo_and_au(video1-25).csv",
    video_name_list=get_video_name_list('/mnt/iot-qnap3/mochida/medical-care/emotionestimation/data/labels/PIMD_A/emo_and_au(video1-25)-video_name_list.csv'),
    gaze_transform=L2CSNet_ImageTransform(phase='test')
)

dataloader = DataLoader(
    dataset,
    batch_size=1,
    shuffle=False,
    num_workers=2
)

In [4]:
device = (torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu"))

In [5]:
#* L2CSNet(Gaze Estimator)
#* its architecture is same as ResNet50
gaze_feat_extractor = L2CSNet_networks.L2CS(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 90)
gaze_feat_extractor.load_state_dict(torch.load('/mnt/iot-qnap3/mochida/medical-care/emotionestimation/data/params/L2CSNet-snapshots/Gaze360/L2CSNet_gaze360.pkl'))

<All keys matched successfully>

In [6]:
for param in gaze_feat_extractor.parameters():
    param.requires_grad = False

gaze_feat_extractor = gaze_feat_extractor.to(device)

gaze_feat_extractor.eval()

L2CS(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1)

In [7]:
pbar = tqdm(total=len(dataloader))
softmax = nn.Softmax(dim=1)
idx_tensor = [idx for idx in range(90)]
idx_tensor = torch.FloatTensor(idx_tensor).cuda(device)

gaze_middle_features = []
gaze_logits = []
pitch_logits = []
yaw_logits = []
gaze_list = []
img_path_list = []
with torch.no_grad():
    for i, batch in enumerate(dataloader):
        imgs, img_paths, _ = batch
        imgs = imgs.to(device)
        
        # culc forward
        yaw, pitch, mid_feat = gaze_feat_extractor(imgs)
        gaze_middle_features += mid_feat.detach().cpu().numpy().tolist()
        pitch_logits += pitch.detach().cpu().numpy().tolist()
        yaw_logits += yaw.detach().cpu().numpy().tolist()
        gaze_logits += torch.concat((pitch, yaw), dim=-1).detach().cpu().numpy().tolist()
        
        # get 90 class probabilities
        pitch_predicted = softmax(pitch)
        yaw_predicted = softmax(yaw)
        
        # Get continuous predictions in degrees.
        pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 4 - 180
        yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 4 - 180
        
        pitch_predicted = pitch_predicted.cpu().detach().numpy()
        yaw_predicted = yaw_predicted.cpu().detach().numpy()
        
        # convert degrees to radians       
        # pitch_predicted *= np.pi/180.0
        # yaw_predicted *= np.pi/180.0
        
        # save outputs
        img_path_list += img_paths
        gaze_list.append([pitch_predicted, yaw_predicted])
        
        # update tqdm bar
        pbar.update(1)
    
    path = '/mnt/iot-qnap3/mochida/medical-care/emotionestimation/data/interim/black.jpg'
    black_img = cv2.imread(path)
    black_img = L2CSNet_ImageTransform(phase='test')(black_img)
    black_img = black_img.to(device)
    pitch, yaw, mid_feat = gaze_feat_extractor(black_img.unsqueeze(0))
    gaze_middle_features += mid_feat.detach().cpu().numpy().tolist()
    pitch_logits += pitch.detach().cpu().numpy().tolist()
    yaw_logits += yaw.detach().cpu().numpy().tolist()
    gaze_logits += torch.concat((pitch, yaw), dim=-1).detach().cpu().numpy().tolist()
    gaze_list.append([0, 0])
    img_path_list.append(path)
       
# close tqdm bar
pbar.close()

100%|██████████| 19931/19931 [08:34<00:00, 38.71it/s]


In [8]:
df_path = pd.DataFrame(img_path_list, columns=["img_path"])
df_mid_gaze = pd.DataFrame(gaze_middle_features, columns=[i for i in range(2048)])
df_pitch = pd.DataFrame(pitch_logits, columns=[i for i in range(90)])
df_yaw = pd.DataFrame(yaw_logits, columns=[i for i in range(90)])
df_pitchyaw = pd.DataFrame(gaze_logits, columns=[i for i in range(180)])
df_gaze = pd.DataFrame(gaze_list, columns=["pitch", "yaw"])

gaze_list = pd.concat([df_path, df_gaze], axis=1)
gaze_mid_list = pd.concat([df_path, df_mid_gaze], axis=1)
gaze_pitch_list = pd.concat([df_path, df_pitch], axis=1)
gaze_yaw_list = pd.concat([df_path, df_yaw], axis=1)
gaze_pitchyaw_list = pd.concat([df_path, df_pitchyaw], axis=1)

In [9]:
gaze_list.to_csv('/mnt/iot-qnap3/mochida/medical-care/emotionestimation/data/processed/PIMD_A/gaze(deg).csv')
# gaze_mid_list.to_pickle('/mnt/iot-qnap3/mochida/medical-care/emotionestimation/data/processed/PIMD_A/L2CSNet_feature.pkl')
gaze_pitch_list.to_pickle('/mnt/iot-qnap3/mochida/medical-care/emotionestimation/data/processed/PIMD_A/L2CSNet_pitch_logits.pkl')
gaze_yaw_list.to_pickle('/mnt/iot-qnap3/mochida/medical-care/emotionestimation/data/processed/PIMD_A/L2CSNet_yaw_logits.pkl')
gaze_pitchyaw_list.to_pickle('/mnt/iot-qnap3/mochida/medical-care/emotionestimation/data/processed/PIMD_A/L2CSNet_pitchyaw_logits.pkl')