# PoTion Representation Extractor 

This notebook computes PoTion [1] representation of a given video based on estimated pose. The steps are outlined throughout the notebook. 

#### References:
[1] *PoTion: Pose MoTion Representation for Action Recognition*, Choutas et al.


In [None]:
T = 8
root_dir      = '/usr/local/data01/zahra/repos/VideoFeatExtratotor'
videos_dir    = '/usr/local/data02/zahra/datasets/Tempuckey/videos'
frames_dir    = '/usr/local/data02/zahra/datasets/Tempuckey/frames'
op_dir        = '/usr/local/data02/zahra/datasets/Tempuckey/feats/openpose'
potion_dir    = '/usr/local/data02/zahra/datasets/Tempuckey/feats/potion/win_{}'.format(T)
potion_resnet_dir    = '/usr/local/data02/zahra/datasets/Tempuckey/feats/potion/win_{}_resnet'.format(T)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import pandas as pd
import json

from numpy import newaxis

from utils.sys_utils import *
from utils.video_utils import *

In [None]:
vids = get_files_path(op_dir)
vidnames = set(['_'.join(v.split('/')[-1].split('_')[:3]) for v in vids])
# need to remove video_FACEOFF_000523 from the completed videos bcz it may not have completed all the frames (it was the last video to be processed so it may have been incpmplete)
# print(list(vids)[:3])
# print(list(vidnames)[:3])
# print(len(completed_vids))

In [None]:
file_path = '/usr/local/data02/zahra/datasets/Tempuckey/labels/tempuckey_video_info_gt_labels_split.csv'
labels_df = pd.read_csv(file_path)
test_videos = list(labels_df[labels_df['split'] == 'test'].video_name)
test_videos = set([v.split('.')[0] for v in test_videos])

In [None]:
test_vids_with_pose = test_videos.intersection(vidnames)
if len(test_videos) == len(test_vids_with_pose):
    print('all test videos have pose')

# 1. Load heatmaps for all the frames in a video 

- ### <span style="color:green"> **2.1**</span> obtain joint heatmaps 
  - returns $H_j^t[x,y]$
  - in our BODY\_25 model, each image 1,2,...,25 contains all the same joints detected for everyone in the image

In [None]:
def heatmap_files_by_vname(op_dir):
    files = get_files_path(op_dir)
    files = [f for f in files if '_heatmaps_' in f]
    hm_by_vname = {}
    for f in files:
        v = '_'.join(f.split('/')[-1].split('_')[:3])
        if v in hm_by_vname.keys():
            hm_by_vname[v].append(f)
        else:
            hm_by_vname[v] = [f]
            
    return hm_by_vname

In [None]:
def create_colorized_heatmaps(potion_dir, vname, beg_f, end_f, heatmaps_t, win = 8):
    T = win # create PoTion for a rolling window of size T=8 and T=32
    L = heatmaps_t.shape[0]

    count = 0
    
    beg_idx = 0
    end_idx = 0

    for t in range(T, L+T, T):
        count += 1
        beg_idx = end_idx 
        end_idx = t 
        # print(beg_idx, end_idx)
        # print(heatmaps_t[beg_idx:end_idx].shape)
        colorized_heatmap_t = get_colorized_heatmaps(heatmaps_t[beg_idx:end_idx], T)
        output_path = '{}/{}_colorized_heatmaps_T_25_w_h_3rgb_{}_{}_{}_{}'.format(potion_dir, vname, beg_f, end_f, str(beg_idx), str(end_idx-1))
        np.save(output_path, colorized_heatmap_t)
        print('stored colorized heatmap at {}\n'.format(output_path))
    
    return count # number of colorized heatmaps generated and saved

## >> Extract Heatmaps for every frame in the videos 

In [None]:
hm_by_vname = heatmap_files_by_vname(op_dir)
videos = list(test_vids_with_pose)

In [None]:
for v in videos:
    print('processing video {}'.format(v))
    vid_hm_files = hm_by_vname[v]
    for path_ in vid_hm_files:
        print('loading {}'.format(path_))
        try:
            heatmaps_t = np.load(path_) #, allow_pickle = True)
        except ValueError as e:
            print('could not load video {} heatmap from \n{} '.format(v, path_))
            print(e)
            continue
            
        # print(heatmaps_t.shape)
        beg_f = path_.split('/')[-1].split('.')[0].split('_')[8]
        end_f = path_.split('/')[-1].split('.')[0].split('_')[9]
        # print(beg_f, end_f)
        count = create_colorized_heatmaps(potion_dir, v, beg_f, end_f, heatmaps_t, win = T)
        print('generated {} colorized heatmaps'.format(count))
        
        print('**************************\n')
    print('completed processing video {}\n\n-------------------------'.format(v))

# 3. Obtain PoTion representation of the original pose keypoints


- ### <span style="color:green"> **3.1**</span> obtain colorization 
 - returns a unique colorization scheme o(t) for a given number of channels c = \[2,3,..\] 

 - for c = 2 we have $o(t) = (\frac{t-1}{T-1}, 1-\frac{t-1}{T-1})$
 - for c = 3 we have
   - $t<\frac{T}{2}$:     $o(t) = (1 - \frac{t-1}{\frac{T}{2}-1} , \frac{t-1}{\frac{T}{2}-1} , 0)$
   - $t\geq \frac{T}{2}$: $o(t) = (0 , 1 - \frac{t - \frac{T}{2}}{\frac{T}{2}-1}, \frac{t-\frac{T}{2}}{\frac{T}{2}-1})$


- ### <span style="color:green"> **3.2**</span> obtain colorized heatmapts
 - returns colorized heatmapts of joint $j$ at pixel $x$ and $y$ at time $t$ using $C_j^t[x,y,c] = H_j^t[x,y] o(t)$

- ### <span style="color:green"> **3.3**</span> Aggregated Colorized Heatmaps 
 - #### <span style="color:green"> **3.3.1**</span> Sum of heatmaps for each joint j
   $$\mathcal{S}_j = \sum_{t=1}^T C_j^t $$
 - #### <span style="color:green"> **3.3.2**</span> PoTion representation
   $$\mathcal{U_j}[x,y,c] = \frac{S_j[x,y,c]}{max_{x',y'} S_j[x',y',c]} $$
 - #### <span style="color:green"> **3.3.3**</span> Intensity image
   $$\mathcal{I}_j = \sum_{c=1}^C \mathcal{U}_j[x,y,c]$$
 - #### <span style="color:green"> **3.3.4**</span> normalized PoTion representation
   $$\mathcal{N}_j[x,y,c] = \frac{\mathcal{U}_j[x,y,c]}{\epsilon + \mathcal{I}_j[x,y]}$$

## <span style="color:green"> **3.1**</span> Obtain Colorization scheme

In [None]:
def get_colorization(T,C = 3):
    # T = total_num_frames + 1   
    # T must be an even number. If T is odd, we can simply add one to make numbers even. 
    # all that matters is to get unique colors) 
    if T%2 != 0:
        T += 1
    
    red = []
    blue = []
    green = []
    
    t = 0
    
    for i in range(T-1):
        t = i+1

        if C == 2:
            r, g, b = (t-1)/(T-1), 1 - (t-1)/(T-1), 0.
            
        if C == 3:
            if t <= T/2:
                r, g, b = 1 - ((t-1) / (T/2-1)), (t-1) / (T/2-1), 0.
            elif t < T:
                r, g, b = 0., 1 - (t - (T/2)) / (T/2-1), (t - (T/2)) / (T/2-1)
        
        red.append(r)
        blue.append(b)
        green.append(g)

    return red,green,blue

In [None]:
ww = 32
red,green,blue = get_colorization(ww+2, C = 3) # T+2 to  ensure we get more than or equal unique colors for each frame 

import matplotlib.pyplot as plt
%matplotlib inline

plt.plot(red, color = 'r')
plt.plot(green, color = 'g')
plt.plot(blue, color = 'b')
plt.show()


## <span style="color:green"> **3.2**</span> Obtain Colorized Heatmaps

In [None]:
def get_colorized_heatmaps(poseHeatMaps_t, T):
    print(poseHeatMaps_t.shape)

    red,green,blue = get_colorization(T + 2, C = 3)
    
    num_frames = poseHeatMaps_t.shape[0]
    
    # T: countFrames
    C_t = []
    for t in range(min(T,num_frames)):
        C_j = []
        for j in range(25):
            #print(t,j)
            # j_num = j+1 # joint number 1 to 25
            H = np.array(poseHeatMaps_t[t][j])
            H = H/(H.max()-H.min())
            # takes a single channel image H and returns an RGB image based on the colorization scheme at time t
            C_j.append(H[:,:, newaxis] * np.array([red[t],green[t],blue[t]]))
        C_t.append(C_j)
    
    return np.array(C_t) # (T, 25, w , h, 3_rgb) => ex. (61, 25, 368, 656, 3)

In [None]:
# openpose joint index
# //     {0,  "Nose"},
# //     {1,  "Neck"},
# //     {2,  "RShoulder"},
# //     {3,  "RElbow"},
# //     {4,  "RWrist"},
# //     {5,  "LShoulder"},
# //     {6,  "LElbow"},
# //     {7,  "LWrist"},
# //     {8,  "MidHip"},
# //     {9,  "RHip"},
# //     {10, "RKnee"},
# //     {11, "RAnkle"},
# //     {12, "LHip"},
# //     {13, "LKnee"},
# //     {14, "LAnkle"},
# //     {15, "REye"},
# //     {16, "LEye"},
# //     {17, "REar"},
# //     {18, "LEar"},
# //     {19, "LBigToe"},
# //     {20, "LSmallToe"},
# //     {21, "LHeel"},
# //     {22, "RBigToe"},
# //     {23, "RSmallToe"},
# //     {24, "RHeel"},
# //     {25, "Background"}

In [None]:
def colorized_heatmaps_files_by_vname(potion_dir):
    files = get_files_path(potion_dir)
    files = [f for f in files if '_colorized_heatmaps_' in f]
    hm_by_vname = {}
    for f in files:
        v = '_'.join(f.split('/')[-1].split('_')[:3])
        if v in hm_by_vname.keys():
            hm_by_vname[v].append(f)
        else:
            hm_by_vname[v] = [f]
            
    return hm_by_vname

## 3.3 Aggregated Colorized Heatmaps to Obtain PoTion

#### <span style="color:green"> **3.3.1**</span> Sum of heatmaps for each joint j
   $$\mathcal{S}_j = \sum_{t=1}^T C_j^t $$


#### <span style="color:green"> **3.3.2**</span> PoTion representation
   $$\mathcal{U_j}[x,y,c] = \frac{S_j[x,y,c]}{max_{x',y'} S_j[x',y',c]} $$

#### <span style="color:green"> **3.3.3**</span> Intensity image
   $$\mathcal{I}_j = \sum_{c=1}^C \mathcal{U}_j[x,y,c]$$


#### <span style="color:green"> **3.3.4**</span> normalized PoTion representation
   $$\mathcal{N}_j[x,y,c] = \frac{\mathcal{U}_j[x,y,c]}{\epsilon + \mathcal{I}_j[x,y]}$$

In [None]:
def get_normalized_PoTion(colorized_heatmap_t, vid_name, potion_dir, beg_f, end_f, beg_minor_f, end_minor_f):
    ###### PoTion Representation ######
    # 3.3.1 Sum of heatmaps for each joint j
    # 3.3.2 PoTion representation
    # 3.3.3 intensity image
    # 3.3.4 normalized PoTion representation

    n_channels = 3
    epsilon = 1
    count = 0
    C_t = colorized_heatmap_t
    
    T = colorized_heatmap_t.shape[0]
    for j in range(25):
        # get colorized heatmap for a specific joint
        Cj_t = C_t[:,j,:,:,:]

        ## 3.3.1 aggregate the colorized heatmap over T
        S_j = Cj_t.sum(axis=0)

        ## 3.3.2 potion: normalize the aggregated colorizedheatmap
        U_j = np.zeros(list(S_j.shape))
        for c in range(n_channels):
            U_j[:,:,c] = S_j[:,:,c] / S_j[:,:,c].max()

        ## 3.3.3 intensity image
        I_j = np.sum(U_j, axis = 2) # sums over all channels c to obtain a single channel image

        ## 3.3.4 normalized PoTion representation
        N_j = U_j/(epsilon+I_j[:,:,newaxis])

        output_path ='{}/{}_normalized_PoTion_joint_{}_{}_{}_{}_{}.PoTion'.format(potion_dir, vid_name, j, beg_f, end_f, beg_minor_f, end_minor_f)
        print('saving norm PoTion {}'.format(output_path))
        np.save(output_path, np.array(N_j))
        count += 1
    return count

## >> Extract PoTion images with window = T for all the 25 joints (one per joint over T)

In [None]:
# get colorized heatmaps path by video name
ch_files_by_vname = colorized_heatmaps_files_by_vname(potion_dir)

In [None]:
for v in videos_with_colorized_heatmap:
    print('processing video {}'.format(v))
    vid_ch_files = ch_files_by_vname[v]
    for path_ in vid_ch_files:
        print('loading {}'.format(path_))
        try:
            ColorizedHeatmaps_t = np.load(path_) #, allow_pickle = True)
        except ValueError as e:
            print('could not load video {} from \n{} '.format(v, path_))
            print(e)
            continue
        
        # beg_f = path_.split('/')[-1].split('.')[0].split('_')[9]
        beg_f = path_.split('/')[-1].split('.')[0].split('_')[9]
        end_f = path_.split('/')[-1].split('.')[0].split('_')[10]
        beg_minor_f = path_.split('/')[-1].split('.')[0].split('_')[11]
        end_minor_f = path_.split('/')[-1].split('.')[0].split('_')[12]
        
        count = get_normalized_PoTion(ColorizedHeatmaps_t, v, potion_dir, beg_f, end_f, beg_minor_f, end_minor_f)
        print('generated {} PoTion images'.format(count))
        print('**************************\n')
    print('completed processing video {}\n\n-------------------------'.format(v))    

## Extract Resnet features from PoTion Image

In [None]:
def potion_images_by_vname(files_lst):
    
    files_lst = sorted(files_lst)
    
    potion_by_vname = {}
    for f in files_lst:
        v = '_'.join(f.split('/')[-1].split('_')[:3])
        if v in potion_by_vname.keys():
            potion_by_vname[v].append(f)
        else:
            potion_by_vname[v] = [f]
        
    return potion_by_vname

In [None]:
## fixing frame number in previously generated filenames 
# cnt = 0
# for files_by_vid in potion_imgs_joint4_by_video.values():
#     for v in files_by_vid:
#         f_num_padded = v.split('/')[-1].split('_')[-1].split('.')[0].zfill(4)
#         path_ = '/'.join(v.split('/')[:-1])
#         name_ = '_'.join(v.split('/')[-1].split('_')[:-1])
#         new_fname = '{}/{}_{}.PoTion.npy'.format(path_, name_, f_num_padded)
#         print('mv {} {}'.format(v, new_fname))
#         cnt+=1 

In [None]:
from PIL import Image
from torchvision import transforms
import torch

potion_imgs_all_videos = get_files_path(potion_dir)
potion_imgs_all_videos_joint4 = [i for i in potion_imgs_all_videos if 'joint_4' in i and i.split('.')[-2] == 'PoTion']

In [None]:
# get them categorized by video name so that we can extract resnet feats per video 
potion_imgs_joint4_by_video = potion_images_by_vname(potion_imgs_all_videos_joint4)

In [None]:
def get_feat_vector(model, img):
    model.eval()

    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
    ])
    input_tensor = preprocess(img)
    input_batch = input_tensor.unsqueeze(0)

    with torch.no_grad():
        output = model(input_batch)

    return torch.nn.functional.softmax(output[0], dim=0)


In [None]:
model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet152', pretrained=True)

for k,potion_imgs in potion_imgs_joint4_by_video.items():
    feat_vecs = []
    
    for path_ in potion_imgs:
        with open(path_,'rb') as f:
            img = np.load(f)
        img = img / (img.max() - img.min()) 
        img = Image.fromarray(np.uint8(img*255))

        feat_vec = get_feat_vector(model, img)
        feat_vecs.append(feat_vec)

    feat_vecs = np.array([t.numpy() for t in feat_vecs])
    output_path = '{}/{}_PoTion_joint_4_resnet_feats.npy'.format(potion_resnet_dir, k)
    print('{}\n shape: {}'.format(output_path, feat_vecs.shape))
    np.save(output_path, feat_vecs)

In [None]:
# for a single video
# feat_vecs = []

# model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet152', pretrained=True)
    
# for path_ in potion_imgs:
#     with open(path_,'rb') as f:
#         img = np.load(f)
#     img = img / (img.max() - img.min()) 
#     img = Image.fromarray(np.uint8(img*255))
    
#     feat_vec = get_feat_vector(model, img)
#     feat_vecs.append(feat_vec)

# feat_vecs = np.array([t.numpy() for t in feat_vecs])

# output_path = '{}/video_FACEOFF_000478_PoTion_joint_4_resnet_feats.npy'.format(potion_dir)
# np.save(output_path, feat_vecs)