In [3]:
import os
import torch 
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
 
from torchvision import datasets
from torchvision.utils import save_image
import numpy as np
from torch.utils.data import Dataset, DataLoader
import pickle
import numpy as np
import pandas as pd
from ast import literal_eval
import glob
from PIL import Image, ImageDraw
import time
import cv2


In [4]:
class LSTM_posetrack(nn.Module):
    def __init__(self, args):

        super(LSTM_posetrack, self).__init__()
         
        self.pose_encoder = nn.LSTM(input_size=28, hidden_size=args.hidden_size)
        self.vel_encoder = nn.LSTM(input_size=28, hidden_size=args.hidden_size)
        
        self.pose_embedding = nn.Sequential(nn.Linear(in_features=args.hidden_size, out_features=28),
                                           nn.ReLU())
        
        self.vel_decoder = nn.LSTMCell(input_size=28, hidden_size=args.hidden_size)
        
        self.fc_vel    = nn.Linear(in_features=args.hidden_size, out_features=28)
        
        self.hardtanh = nn.Hardtanh(min_val=-1*args.hardtanh_limit,max_val=args.hardtanh_limit)
        self.relu = nn.LeakyReLU() 
        self.softmax = nn.Softmax(dim=1)
        
        self.mask_encoder = nn.LSTM(input_size=14, hidden_size=args.hidden_size)
        self.mask_decoder = nn.LSTMCell(input_size=14, hidden_size=args.hidden_size)
        self.fc_mask    = nn.Linear(in_features=args.hidden_size, out_features=14)
        
        self.sigmoid = nn.Sigmoid()
        
        self.args = args
        
    def forward(self, pose=None, vel=None, mask=None):


        _, (hidden_vel, cell_vel) = self.vel_encoder(vel.permute(1,0,2))
        hidden_vel = hidden_vel.squeeze(0)
        cell_vel = cell_vel.squeeze(0)


        _, (hidden_pose, cell_pose) = self.pose_encoder(pose.permute(1,0,2))
        hidden_pose = hidden_pose.squeeze(0)
        cell_pose = cell_pose.squeeze(0)

        
        _, (hidden_dec2, cell_dec2) = self.mask_encoder(mask.permute(1,0,2))
        hidden_dec2 = hidden_dec2.squeeze(0)
        cell_dec2 = cell_dec2.squeeze(0)
        
        outputs = []
        vel_outputs    = torch.tensor([], device=self.args.device)
        mask_outputs    = torch.tensor([], device=self.args.device)
        
        VelDec_inp = vel[:,-1,:]
        MaskDec_inp = mask[:,-1,:]
        
        hidden_dec = hidden_pose + hidden_vel
        cell_dec = cell_pose + cell_vel
        for i in range(self.args.output//self.args.skip):
            hidden_dec, cell_dec = self.vel_decoder(VelDec_inp, (hidden_dec, cell_dec))
            vel_output  = self.hardtanh(self.fc_vel(hidden_dec))
            vel_outputs = torch.cat((vel_outputs, vel_output.unsqueeze(1)), dim = 1)
            VelDec_inp  = vel_output.detach()
            
            hidden_dec2, cell_dec2 = self.mask_decoder(MaskDec_inp, (hidden_dec2, cell_dec2))
            mask_output  = self.sigmoid(self.fc_mask(hidden_dec2))
            mask_outputs = torch.cat((mask_outputs, mask_output.unsqueeze(1)), dim = 1)
            MaskDec_inp  = mask_output.detach()
                    
            
        outputs.append(vel_outputs)    
        outputs.append(mask_outputs) 
        return tuple(outputs)



In [13]:
class args():
    def __init__(self):
        self.jaad_dataset = '/data/smailait-data/JAAD/processed_annotations' #folder containing parsed jaad annotations (used when first time loading data)
        self.dtype        = 'train'
        self.from_file    = False #read dataset from csv file or reprocess data
        self.save         = True
        self.file         = '/data/smailait-data/jaad_train_16_16.csv'
        self.save_path    = '/data/smailait-data/jaad_train_16_16.csv'
        self.model_path    = '/data/smailait-data/models/multitask_pv_lstm_trained.pkl'
        self.loader_workers = 1
        self.loader_shuffle = False
        self.pin_memory     = False
        self.image_resize   = [240, 426]
        self.device         = 'cuda'
        self.batch_size     = 50
        self.n_epochs       = 1000
        self.hidden_size    = 1000
        self.hardtanh_limit = 100
        self.input  = 16
        self.output = 14
        self.stride = 16
        self.skip   = 1
        # self.task   = 'bounding_box-intention'
        self.task   = 'pose'
        self.use_scenes = False       
        self.lr = 0.01
        
args = args()

mse = nn.MSELoss()

l1e = nn.L1Loss()
bce = nn.BCELoss()
train_s_scores = []
train_pose_scores=[]
val_pose_scores=[]
train_c_scores = []
val_s_scores   = []
val_c_scores   = []
net = LSTM_posetrack(args).to(args.device)



In [20]:
class myDataset_posetrack(torch.utils.data.Dataset):
    def __init__(self, args, dtype,fname):
        
        self.args = args
        self.dtype = dtype
        self.fname=fname
        print("Loading",self.dtype)
        sequence_centric = pd.read_csv(self.fname+self.dtype+".csv")
        df = sequence_centric.copy()      
        for v in list(df.columns.values):
            print(v+' loaded')
            try:
                df.loc[:,v] = df.loc[:, v].apply(lambda x: literal_eval(x))
            except:
                continue
        sequence_centric[df.columns] = df[df.columns]
        self.data = sequence_centric.copy().reset_index(drop=True)
    
        print('*'*30)
        

    def __len__(self):
        return len(self.data)
    
    
    def __getitem__(self, index):

        seq = self.data.iloc[index]
        outputs = []

        obs = torch.tensor([seq.Pose[i] for i in range(0,self.args.input,self.args.skip)])
        obs_speed = (obs[1:] - obs[:-1])
        outputs.append(obs_speed)
        true = torch.tensor([seq.Future_Pose[i] for i in range(0,self.args.output,self.args.skip)])
        true_speed = torch.cat(((true[0]-obs[-1]).unsqueeze(0), true[1:]-true[:-1]))
        outputs.append(true_speed)
        outputs.append(obs)
        outputs.append(true)
        
        if "posetrack" in self.fname:
            obs_mask = torch.tensor([seq.Mask[i] for i in range(0,self.args.output,self.args.skip)])
            true_mask = torch.tensor([seq.Future_Mask[i] for i in range(0,self.args.output,self.args.skip)])

            outputs.append(obs_mask)
            outputs.append(true_mask)
        
        return tuple(outputs)    
    
    
def data_loader_posetrack(args,data,fname):
    dataset = myDataset_posetrack(args,data,fname)
    dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=args.batch_size, shuffle=args.loader_shuffle,
        pin_memory=args.pin_memory)

    return dataloader

train_loader=data_loader_posetrack(args,"train",'processed_csvs/posetrack_')
val_loader=data_loader_posetrack(args,"valid" ,'processed_csvs/posetrack_')

Loading train
Pose loaded
Future_Pose loaded
Mask loaded
Future_Mask loaded
******************************
Loading valid
Pose loaded
Future_Pose loaded
Mask loaded
Future_Mask loaded
******************************


In [22]:
def ADE_c(pred, true):
    b,n,p=pred.size()[0],pred.size()[1],pred.size()[2]
#     print(b,n,p)
    pred = torch.reshape(pred, (b,n,int(p/2),2))
    true = torch.reshape(true, (b,n,int(p/2),2))
    
    displacement=torch.sqrt((pred[:,:,:,0]-true[:,:,:,0])**2+(pred[:,:,:,1]-true[:,:,:,1])**2)
    ade = torch.mean(torch.mean(displacement,dim=1))

    return ade


def FDE_c(pred, true):
    b,n,p=pred.size()[0],pred.size()[1],pred.size()[2]
#     print(b,n,p)
    pred = torch.reshape(pred, (b,n,int(p/2),2))
    true = torch.reshape(true, (b,n,int(p/2),2))
    
    displacement=torch.sqrt((pred[:,-1,:,0]-true[:,-1,:,0])**2+(pred[:,-1,:,1]-true[:,-1,:,1])**2)

    fde = torch.mean(torch.mean(displacement,dim=1))
    
    return fde

def speed2pos(preds, obs_p):
    pred_pos = torch.zeros(preds.shape[0], preds.shape[1], preds.shape[2]).to('cuda')
    current = obs_p[:,-1,:]
    
    for i in range(preds.shape[1]):
        pred_pos[:,i,:] = current + preds[:,i,:]
        current = pred_pos[:,i,:]
        
    for i in range(preds.shape[2]):
        pred_pos[:,:,i] = torch.min(pred_pos[:,:,i], 1920*torch.ones(pred_pos.shape[0], pred_pos.shape[1], device='cuda'))
        pred_pos[:,:,i] = torch.max(pred_pos[:,:,i], torch.zeros(pred_pos.shape[0], pred_pos.shape[1], device='cuda'))
        
    return pred_pos

In [None]:
net = LSTM_posetrack(args).to(args.device)
optimizer = optim.Adam(net.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=10, 
                                                 threshold = 1e-8, verbose=True)
print('='*100)
print('Training ...')

for epoch in range(200):
    start = time.time()
    
    avg_epoch_train_s_loss = 0
    avg_epoch_val_s_loss   = 0
    avg_epoch_train_p_loss   = 0
    avg_epoch_val_p_loss     = 0 
    
    ade  = 0
    fde  = 0
    ade_train  = 0
    fde_train  = 0
    counter = 0
    
    for idx, (obs_s, target_s, obs_pose, target_pose, obs_mask, target_mask) in enumerate(train_loader):
        counter += 1        
        
        obs_s    = obs_s.to(device='cuda')
        target_s = target_s.to(device='cuda')
        obs_pose    = obs_pose.to(device='cuda')
        target_pose = target_pose.to(device='cuda')
        obs_mask    = obs_mask.to(device='cuda')
        target_mask = target_mask.to(device='cuda')
        
#         print(obs_s.shape)
        
        net.zero_grad()
    
        (speed_preds,mask_preds) = net(pose=obs_pose, vel=obs_s,mask=obs_mask)

        speed_loss  = l1e(speed_preds, target_s)
        mask_loss  = bce(mask_preds, target_mask)
    
        preds_p = speed2pos(speed_preds, obs_pose) 
        ade_train += float(ADE_c(preds_p, target_pose))
        fde_train += float(FDE_c(preds_p, target_pose))
        
        loss= 0.7*speed_loss+0.3*mask_loss
        loss.backward()

        optimizer.step()
        
    
        avg_epoch_train_s_loss += float(speed_loss)
        avg_epoch_train_p_loss += float(mask_loss)

    avg_epoch_train_s_loss /= counter
    avg_epoch_train_p_loss /= counter
    train_s_scores.append(avg_epoch_train_s_loss)
    ade_train  /= counter
    fde_train  /= counter    

    counter=0

    for idx, (obs_s, target_s, obs_pose, target_pose, obs_mask, target_mask) in enumerate(val_loader):
        counter+=1
        obs_s    = obs_s.to(device='cuda')
        target_s = target_s.to(device='cuda')
        obs_pose    = obs_pose.to(device='cuda')
        target_pose = target_pose.to(device='cuda')
        obs_mask    = obs_mask.to(device='cuda')
        target_mask = target_mask.to(device='cuda')
        
        with torch.no_grad():
            (speed_preds,mask_preds) = net(pose=obs_pose,vel=obs_s,mask=obs_mask)

            speed_loss  = l1e(speed_preds, target_s)
            mask_loss  = bce(mask_preds, target_mask)
            
            loss= 0.7*speed_loss+0.3*mask_loss
            avg_epoch_val_s_loss += float(speed_loss)
            avg_epoch_val_p_loss += float(mask_loss)
        
            preds_p = speed2pos(speed_preds, obs_pose)
            ade += float(ADE_c(preds_p, target_pose))
            fde += float(FDE_c(preds_p, target_pose))

        
    avg_epoch_val_s_loss /= counter
    avg_epoch_val_p_loss /= counter
    val_s_scores.append(avg_epoch_val_s_loss)
    
    ade  /= counter
    fde  /= counter     
   
    scheduler.step(avg_epoch_val_s_loss)
    
     
    print('e:', epoch, '| ts: %.2f'% avg_epoch_train_s_loss,  '| vs: %.2f'% avg_epoch_val_s_loss,'| tm: %.2f'% avg_epoch_train_p_loss,  '| vm: %.2f'% avg_epoch_val_p_loss, '| ade_train: %.2f'% ade_train, '| ade_val: %.2f'% ade, '| fde_train: %.2f'% fde_train,'| fde_val: %.2f'% fde,
          '| t:%.2f'%(time.time()-start))


print('='*100) 
# print('Saving ...')
# torch.save(net.state_dict(), args.model_path)
print('Done !')

Training ...
e: 0 | ts: 9.78 | vs: 11.84 | tm: 0.67 | vm: 0.47 | ade_train: 76.90 | ade_val: 90.18 | fde_train: 128.28 | fde_val: 150.90 | t:3.47
e: 1 | ts: 9.68 | vs: 11.87 | tm: 0.45 | vm: 0.43 | ade_train: 75.34 | ade_val: 90.49 | fde_train: 126.30 | fde_val: 152.07 | t:3.32
e: 2 | ts: 9.65 | vs: 11.82 | tm: 0.41 | vm: 0.43 | ade_train: 74.49 | ade_val: 89.99 | fde_train: 125.16 | fde_val: 151.50 | t:3.32
e: 3 | ts: 9.63 | vs: 11.84 | tm: 0.39 | vm: 0.37 | ade_train: 74.17 | ade_val: 89.93 | fde_train: 124.74 | fde_val: 151.52 | t:3.33
e: 4 | ts: 9.60 | vs: 11.80 | tm: 0.33 | vm: 0.33 | ade_train: 73.67 | ade_val: 89.05 | fde_train: 123.91 | fde_val: 149.93 | t:3.33
e: 5 | ts: 9.58 | vs: 11.81 | tm: 0.31 | vm: 0.33 | ade_train: 73.10 | ade_val: 89.36 | fde_train: 123.00 | fde_val: 150.51 | t:3.36
e: 6 | ts: 9.56 | vs: 11.80 | tm: 0.30 | vm: 0.31 | ade_train: 72.85 | ade_val: 89.50 | fde_train: 122.50 | fde_val: 150.81 | t:3.37
e: 7 | ts: 9.54 | vs: 11.79 | tm: 0.28 | vm: 0.32 | ade_

e: 60 | ts: 8.59 | vs: 11.79 | tm: 0.12 | vm: 0.29 | ade_train: 60.00 | ade_val: 86.87 | fde_train: 100.49 | fde_val: 148.08 | t:3.90
e: 61 | ts: 8.55 | vs: 11.81 | tm: 0.12 | vm: 0.30 | ade_train: 59.66 | ade_val: 86.67 | fde_train: 99.81 | fde_val: 147.81 | t:3.72
e: 62 | ts: 8.53 | vs: 11.81 | tm: 0.12 | vm: 0.30 | ade_train: 59.42 | ade_val: 86.75 | fde_train: 99.40 | fde_val: 148.00 | t:3.67
e: 63 | ts: 8.51 | vs: 11.81 | tm: 0.12 | vm: 0.30 | ade_train: 59.24 | ade_val: 86.98 | fde_train: 99.05 | fde_val: 148.38 | t:3.71
e: 64 | ts: 8.50 | vs: 11.81 | tm: 0.12 | vm: 0.31 | ade_train: 59.14 | ade_val: 86.94 | fde_train: 98.88 | fde_val: 148.27 | t:3.65
e: 65 | ts: 8.49 | vs: 11.83 | tm: 0.11 | vm: 0.31 | ade_train: 59.04 | ade_val: 87.05 | fde_train: 98.68 | fde_val: 148.67 | t:3.70
e: 66 | ts: 8.48 | vs: 11.84 | tm: 0.11 | vm: 0.31 | ade_train: 58.98 | ade_val: 86.91 | fde_train: 98.66 | fde_val: 148.14 | t:3.70
e: 67 | ts: 8.46 | vs: 11.84 | tm: 0.11 | vm: 0.31 | ade_train: 58.8

e: 120 | ts: 8.24 | vs: 11.93 | tm: 0.10 | vm: 0.34 | ade_train: 57.28 | ade_val: 87.56 | fde_train: 95.58 | fde_val: 149.71 | t:3.88
e: 121 | ts: 8.24 | vs: 11.93 | tm: 0.10 | vm: 0.35 | ade_train: 57.27 | ade_val: 87.57 | fde_train: 95.58 | fde_val: 149.72 | t:3.87
e: 122 | ts: 8.24 | vs: 11.93 | tm: 0.10 | vm: 0.35 | ade_train: 57.27 | ade_val: 87.58 | fde_train: 95.57 | fde_val: 149.74 | t:4.04
e: 123 | ts: 8.24 | vs: 11.93 | tm: 0.10 | vm: 0.35 | ade_train: 57.27 | ade_val: 87.58 | fde_train: 95.56 | fde_val: 149.74 | t:3.93
e: 124 | ts: 8.24 | vs: 11.93 | tm: 0.10 | vm: 0.35 | ade_train: 57.26 | ade_val: 87.59 | fde_train: 95.56 | fde_val: 149.75 | t:3.88
Epoch   126: reducing learning rate of group 0 to 1.9531e-05.
e: 125 | ts: 8.24 | vs: 11.94 | tm: 0.10 | vm: 0.35 | ade_train: 57.26 | ade_val: 87.58 | fde_train: 95.55 | fde_val: 149.76 | t:3.95


In [None]:
import json

with open("posetrack_test_in.json", "r") as read_file:
    data = json.load(read_file)

with open("posetrack_test_masks_in.json", "r") as read_file:
    data_m = json.load(read_file)
    
out_data=[]
out_mask=[]
for i in range(len(data)):
    lp=[]
    lm=[]
    for j in range(len(data[i])):
        pose=torch.tensor(data[i][j]).unsqueeze(0).to(args.device)
        mask=torch.tensor(data_m[i][j]).unsqueeze(0).to(args.device)
        vel= pose[:,1:] - pose[:,:-1]
#         print(vel.shape)
#         break
        (speed_preds,mask_preds) = net(pose=pose,vel=vel,mask=mask)
        
        preds_p = speed2pos(speed_preds, pose)
        pred=preds_p.squeeze(0)
        mask_pred=mask_preds.squeeze(0)
        lp.append(pred.tolist())
        lm.append(mask_pred.detach().cpu().numpy().round().tolist())
#         print(pred.shape)
#         break
    out_data.append(lp)
    out_mask.append(lm)
#     break
with open('posetrack_test_masks_out.json', 'w') as f:
    json.dump(out_mask, f)
with open('posetrack_test_out.json', 'w') as f:
    json.dump(out_data, f)