# Video Actor Synchroncy and Causality (VASC)
## RAEng: Measuring Responsive Caregiving Project
### Caspar Addyman, 2020
### https://github.com/infantlab/VASC

# Step 2: Reorganise the OpenPose JSON wire frame data

This script uses output from [OpenPose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) human figure recognition neural network to create labeled wireframes for each figure in each frame of a video. It uses the Python API version of OpenPose.

Points are labelled as so:
```
COCO Output Format
Nose – 0, Neck – 1, Right Shoulder – 2, Right Elbow – 3, Right Wrist – 4,
Left Shoulder – 5, Left Elbow – 6, Left Wrist – 7, Right Hip – 8,
Right Knee – 9, Right Ankle – 10, Left Hip – 11, Left Knee – 12,
LAnkle – 13, Right Eye – 14, Left Eye – 15, Right Ear – 16,
Left Ear – 17, Background – 18
```

The `write_json flag` saves the people pose data using a custom JSON writer. Each JSON file has a people array of objects, where each object has:

> An array pose_keypoints_2d containing the body part locations and detection confidence formatted as x1,y1,c1,x2,y2,c2,.... The coordinates x and y can be normalized to the range [0,1], [-1,1], [0, source size], [0, output size], etc., depending on the flag keypoint_scale (see flag for more information), while c is the confidence score in the range [0,1].

In [1]:
import os
import math
import glob
import json
import cv2  #computervision toolkit
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [2]:
nPoints = 18

# COCO Output Format
keypointsMapping = ['Nose', 'Neck', 'R-Sho', 'R-Elb', 'R-Wr', 'L-Sho', 
                    'L-Elb', 'L-Wr', 'R-Hip', 'R-Knee', 'R-Ank', 'L-Hip', 
                    'L-Knee', 'L-Ank', 'R-Eye', 'L-Eye', 'R-Ear', 'L-Ear']

POSE_PAIRS = [[1,2], [1,5], [2,3], [3,4], [5,6], [6,7],
              [1,8], [8,9], [9,10], [1,11], [11,12], [12,13],
              [1,0], [0,14], [14,16], [0,15], [15,17],
              [2,17], [5,16] ]

# index of pafs correspoding to the POSE_PAIRS
# e.g for POSE_PAIR(1,2), the PAFs are located at indices (31,32) of output, Similarly, (1,5) -> (39,40) and so on.
mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44],
          [19,20], [21,22], [23,24], [25,26], [27,28], [29,30],
          [47,48], [49,50], [53,54], [51,52], [55,56],
          [37,38], [45,46]]

pointcolors = [ [0,100,255], [0,100,255], [0,255,255], [0,100,255], [0,255,255], [0,100,255],
         [0,255,0], [255,200,100], [255,0,255], [0,255,0], [255,200,100], [255,0,255],
         [0,0,255], [255,0,0], [200,200,0], [255,0,0], [200,200,0], [0,0,0]]

#will colour each person 0:9 a different colour to help us keep track
personcolours = [ [255,0,0], [0,255,0], [0,0,255],[0,255,255], [255,0,255], [255,255,0],[128,255,255], [255,128,255], [255,255,128],[0,0,0]]

#useful to have the indices of the x & y coords and the confidence scores 
#recall that we get them in the order [x0,y0,c0,x1,y1,c1,x2,etc]
xs = [0+3*i for i in range(nPoints)]
ys = [1+3*i for i in range(nPoints)]
cs = [2+3*i for i in range(nPoints)]

#### Where are the JSON files?

This routine needs to know where to find the processed videos and what are the base names. These are listed in the `processed.json` file we created.

In [3]:
# where's the project folder?

jupwd =  os.getcwd() + "\\"
projectpath = os.getcwd() + "\\..\\lookit\\"

# locations of videos and output
videos_in = projectpath 
videos_out_openpose   = projectpath + "\\out\\openpose"
videos_out_timeseries = projectpath + "\\out\\timeseries"
videos_out_analyses   = projectpath + "\\out\\analyses"

print(videos_in)
print(videos_out_openpose)
print(videos_out_timeseries)
print(videos_out_analyses)

C:\Users\cas\OneDrive - Goldsmiths College\Projects\Measuring Responsive Caregiving\VASC\..\lookit\
C:\Users\cas\OneDrive - Goldsmiths College\Projects\Measuring Responsive Caregiving\VASC\..\lookit\\out\openpose
C:\Users\cas\OneDrive - Goldsmiths College\Projects\Measuring Responsive Caregiving\VASC\..\lookit\\out\timeseries
C:\Users\cas\OneDrive - Goldsmiths College\Projects\Measuring Responsive Caregiving\VASC\..\lookit\\out\analyses


In [4]:
#retrieve the list of base names of processed videos.
with open(videos_out_openpose + '\\processed.json') as json_file:
    processed = json.load(json_file)

print(processed)

['lookit.01', 'lookit.02', 'lookit.03', 'lookit.04', 'lookit.05', 'lookit.06', 'lookit.07', 'lookit.08', 'lookit.09', 'lookit.10']


### Extract all the numeric data from the json files

We loop through the list of names in `processed` and search for all json files associated with that name. We then extract all the coordinates and confidence scores for all identified people in each frame and store them in one big multidimensional padded array.

```
1st dimension - number of videos
2nd dimension - max nummber of frames
3rd dimension - max number of people
4th dimension - number of values (per person) output by openpose
```

For example, if we had the following videos 

```
video1 - 200 frames  - 3 people (max) 
video2 - 203 frames  - 2 people (max) 
video3 - 219 frames  - 4 people (max) 
```

then we'd create a `3 x 219 x 4 x 75` array.

In [None]:
for vidbasename in processed:
    #use glob to get all the individual json files.
    alljson = glob.glob(videos_out_openpose + "\\" + processed[0] + "*.json")

In [11]:
#use glob to get all the individual json files.
alljson = glob.glob(videos_out_openpose + "\\" + processed[0] + "*.json")

In [12]:
videos_out_openpose + "\\" + processed[0] + "*.json"

'C:\\Users\\cas\\OneDrive - Goldsmiths College\\Projects\\Measuring Responsive Caregiving\\VASC\\..\\lookit\\\\out\\openpose\\lookit.01*.json'

In [14]:
nframes = len(alljson) #how many frames in the video?
maxpeople = 10 #maximum people we might expect (large upper bound)
ncoords = 75 #the length of the array coming back from openpose x,y coords of each point plus pafs

keypoints_list = np.zeros([nframes,maxpeople,ncoords]) #big array to hold all the numbers
npeople = np.zeros(nframes)  #how many people detected per frame?
print("This video has {0} frames.".format(nframes))

This video has 162 frames.


In [15]:
# We will first combine all the data into one giant numpy array. Then we will use this for everything that comes after. 
# such as normalising.. confirming that person 1 is same individal all way through etc.

i = 0
for frame in alljson:
    with open(frame, "r") as read_file:
        data = json.load(read_file)
        j = 0
        for p in data["people"]:
            keypoints = p["pose_keypoints_2d"]  
            keypoints_list[i,j,:]=keypoints
            j += 1
        npeople[i] = j
        i += 1


In [16]:
print(max(npeople))


4.0


In [None]:
#Let's pic a frame a draw the wireframe skeleton
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib

In [None]:
thisframe = 1400
thisperson = 0

In [None]:
cap = cv2.VideoCapture("attachment_avoidant_AGRT6VjnTm8_360p.mp4")

total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1

print(total_frames)

framenumber = thisframe
cap.set(cv2.CAP_PROP_POS_FRAMES,framenumber) # Where frame_no is the frame you want
ret, frame = cap.read() # Read the frame
#cv2.imwrite("frame%#05d.jpg" % (framenumber+1), frame)

plt.figure(figsize=[14,10])
plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))


# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

In [None]:
print(npeople[thisframe])
print(keypoints_list[thisframe,thisperson,:])



In [17]:
def getkeypointcoord(keypointlist,index):
    x = index * 3
    #some shuffling around to get coords in right format for plotting.
    coords = keypointlist[x:x+2]
    coords = map(int,coords)
    coords = tuple(coords)
    return coords

def averagePoint(keypointList,indices):
    """Function to find the "centre of mass" for this person.
    It will take the average of the non-zero keypoints
    Args:
        keypointList: 1d array of keypoints.
        indices: a set of indices to average over.
    Returns:
        Average
    """
    tot = 0
    N = 0
    for i in indices:
        if keypoints[i]>0:
            tot += keypoints[i]
            N += 1
    return tot / N

def diffKeypoints(keypoints1,keypoints2,indices):
    """Function to find how far apart one set of points is from another.
    This is useful for seeing if we have same person labelled correctly
    from one frame to next. If any point goes out of frame (loc == 0)
    then we don't include that pair. 
    Args:
        keypoints1: 1st array of keypoints.
        keypoints2: 1st array of keypoints.
        indices: a set of indices to compare over.
    Returns:
        diff per index (if any i)
    """
    out = []
    for i in indices:
        if keypoints1[i]>0 and keypoints2[i]:
            out.append(keypoints1[i] - keypoints2[i])
        else:
            out.append(None)
    return out

In [None]:
frameClone = frame.copy()
for pers in range(int(npeople[thisframe])):
    keypoints = keypoints_list[thisframe,pers,:]
    for i in range(nPoints):
        coords = getkeypointcoord(keypoints,i)
        if sum(coords) > 0:
            cv2.circle(frameClone,coords, 2, [0,0,255], -1, cv2.LINE_AA)

    for i in range(nPoints):
        l = POSE_PAIRS[i]
        A = getkeypointcoord(keypoints,l[0])
        B = getkeypointcoord(keypoints,l[1])
        if sum(A) > 0 and sum(B) > 0:
            cv2.line(frameClone, (A[0], A[1]), (B[0], B[1]), pointcolors[i], 2, cv2.LINE_AA)    
     
    avx = averagePoint(keypoints,xs)
    avy = averagePoint(keypoints,ys)
    print(avx)
    print(avy)
    print(averagePoint(keypoints,cs))

    cgloc  = tuple((int(avx),int(avy)))
    txtloc = tuple((int(avx),int(avy) - 30))
    cv2.circle(frameClone,cgloc, 2, [0,0,0], -1, cv2.LINE_AA)
    cv2.putText(frameClone, str(pers), txtloc, font, fontScale, personcolours[pers])

plt.figure(figsize=[15,15])
plt.imshow(frameClone[:,:,[2,1,0]])

In [None]:
plt.clf() 



    

plt.figure(figsize=[15,15])
plt.imshow(frameClone[:,:,[2,1,0]])


In [None]:
npeople[thisframe]

In [None]:
def averagePoint(keypointList,indices):

In [None]:


# font 
font = cv2.FONT_HERSHEY_SIMPLEX   
# fontScale 
fontScale = .7
# Blue color in BGR 
color = (255, 0, 0) 


plt.figure(figsize=[15,15])
plt.imshow(frameClone[:,:,[2,1,0]])

In [None]:
with open("attachment_avoidant_AGRT6VjnTm8_360p_000000000181_keypoints.json", "r") as read_file:
    data = json.load(read_file)
    

detected_keypoints = []
keypoints_list = np.zeros((1,18*3))
keypoint_id = 0
threshold = 0.1

for p in data["people"]:
    keypoints = p["pose_keypoints_2d"]        
    keypoint_id = 0
    keypoints_with_id = []
    keypoints_list = keypoints
    for part in range(nPoints):
        thisone =  {keypointsMapping[part] : {"x" : keypoints[keypoint_id], "y" : keypoints[keypoint_id + 1], "conf" : keypoints[keypoint_id + 2]}}
        keypoints_with_id.append(thisone)
        keypoint_id += 3
    #print(keypoints_with_id)
    print(keypoints_list)



In [None]:
cap = cv2.VideoCapture("attachment_avoidant_AGRT6VjnTm8_360p.mp4")

while(cap.isOpened()):
    ret, frame = cap.read()
    cv2.imshow('frame',frame)
    if cv2.waitKey(25) & 0xFF == ord('q'):
        break


# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

In [None]:


cap = cv2.VideoCapture("attachment_avoidant_AGRT6VjnTm8_360p.mp4")
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1

print(total_frames)

framenumber = 1000
cap.set(cv2.CAP_PROP_POS_FRAMES,framenumber) # Where frame_no is the frame you want
ret, frame = cap.read() # Read the frame
#cv2.imwrite("frame%#05d.jpg" % (framenumber+1), frame)

cv2.imshow('babies', frame)
cv2.waitKey(0)

In [None]:
# cv2.imshow('babies', frame)

for part in range(nPoints):
    probMap = output[0,part,:,:]
    probMap = cv2.resize(probMap, (frame.shape[1], frame.shape[0]))
    keypoints = getKeypoints(probMap, threshold)
    print("Keypoints - {} : {}".format(keypointsMapping[part], keypoints))
    keypoints_with_id = []
    for i in range(len(keypoints)):
        keypoints_with_id.append(keypoints[i] + (keypoint_id,))
        keypoints_list = np.vstack([keypoints_list, keypoints[i]])
        keypoint_id += 1

    detected_keypoints.append(keypoints_with_id)

In [None]:
cap.release()
cv2.destroyAllWindows()

In [None]:
import cv2
import time
import os


In [None]:
video_to_frames("attachment_avoidant_AGRT6VjnTm8_360p.mp4", "./frames/")