# Video Actor Synchroncy and Causality (VASC)
## RAEng: Measuring Responsive Caregiving Project
### Caspar Addyman, 2020
### https://github.com/infantlab/VASC

# Step 3: Analyse the data using SyncPy

This script uses output from  human figure recognition neural network to create labeled wireframes for each figure in each frame of a video. 
In this step we start with a clean numpy array of all [OpenPose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) data from all pairs of individuals. We then use [SyncPy](https://github.com/syncpy/SyncPy) for data analysis. SyncPy was developed by Giovanna Varni, Mohamed Chetouani and colleagues at the Institut des Systèmes Intelligentes et Robotique (ISIR) at the Université Pierre et Marie Curie (UPMC), Paris 6, France.  

A technical paper is found here: [Varni et al. (2015)](https://dl.acm.org/doi/10.1145/2823513.2823520)
 

**NOTE:**
At present (June 2020) the official version of SyncPy is written in Python2 format. Therefore, we have forked it and created a Python3 compatible version at https://github.com/InfantLab/SyncPy



In [None]:
import sys
import os
import json
import numpy as np       
import pandas as pd
import pyarrow.parquet as pq

import matplotlib.pyplot as plt
%matplotlib inline

import logging
import ipywidgets as widgets  #let's us add buttons and sliders to this page.
from ipycanvas import Canvas

import vasc #a module of our own functions (found in vasc.py in this folder)

#turn on debugging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
%pdb on

In [None]:
jupwd =  os.getcwd() + "\\"
projectpath = os.getcwd() + "\\..\\SpeakNSign\\"
# projectpath = os.getcwd() + "\\..\\lookit\\"

# locations of videos and output
videos_in = projectpath 
videos_out   = projectpath + "out"
#videos_out = "E:\\SpeakNSign\\out"
videos_out_openpose   = videos_out + "\\openpose"
videos_out_timeseries = videos_out + "\\timeseries"
videos_out_analyses   = videos_out + "\\analyses"

print(videos_out_openpose)
print(videos_out_timeseries)
print(videos_out_analyses)

### 3.1 Load the clean data as a DataFrame

Reload the clean data file created in step 2. 

In [None]:
#retrieve the list of base names of processed videos.
try:
    with open(videos_out + '\\clean.json') as json_file:
        videos = json.load(json_file)
        print("Existing clean.json found..")
except:
    print("File clean.json not found.")

In [None]:
print('reading parquet file:')
df = pq.read_table(videos_out_timeseries + '\\cleandata.parquet').to_pandas()

#sort the column names as this helps with indexing
df = df.sort_index(axis = 1)
print(df.head())

Next we set all 0 values to as missing value `np.nan` to enable interpolation.
Then use numpy's built in `interpolate` method. 

In [None]:
df = df.replace(0.0, np.nan)

#are we going to use all the data or a subset?
first = 501
last = 8500

df = df.truncate(before  = first, after = last)

In [None]:
df = df.interpolate()

In [None]:
df.shape

In [None]:
print(df.head())

We create a dictionary of the subsets of OpenPose coordinates we want to average and then call `mean` on the Pandas dataframe. e.g.

```
meanpoints = {
               "headx" : [0, 3, 45, 48, 51, 54],
               "heady" : [1, 4, 46, 49, 52, 55],
               "allx" :  [0, 3, 6, 9, ...],
               "ally" :  [1, 4, 7, 10, ...]
             }
```

Then we call the `vasc.averageCoordinateTimeSeries` function to average across sets of coordinates. For a given set of videos and people.

In:
```
videos = "All"
people = "Both"
df2 = vasc.averageCoordinateTimeSeries(df,meanpoints,videos,people)
df2.head
```

Out:
```
person      infant                                          parent   
avgs         headx       heady          xs          ys       headx   
501     565.996600  369.840600  534.895615  398.482538  471.686200   
502     567.231800  369.887600  534.354198  398.706552  471.849400   
503     567.228600  370.159600  534.444328  398.678133  471.711600   
504     566.912600  369.857000  535.369536  398.551636  472.309400
...            ...         ...         ...         ...         ...
```


In [None]:
meanpoints = {"headx" : vasc.headx,
              "heady" : vasc.heady,
              "xs" : vasc.xs, 
              "ys": vasc.ys}

vids = "All"
people = ["infant","parent"]

df2 = vasc.averageCoordinateTimeSeries(df,meanpoints,vids,people)

df2.head

### 3.2 Data analysis

We need to let Jupyter know where to find SyncPy and then import the functions we need.


In [None]:
sys.path.insert(0, '..\\SyncPy\\src\\')   # To be able to import packages from parent directory
sys.path.insert(0, '..\\SyncPy\\src\\Methods')

In [None]:
""" Import wanted module with every parent packages """
import Methods.DataFrom2Persons.Univariate.Continuous.Linear.Correlation as Correlation

""" Import Utils modules """
from Methods.utils import Standardize
from Methods.utils import ResampleAndInterpolate
from Methods.utils.ExtractSignal import ExtractSignalFromCSV

In [None]:
n=np.arange(first,last+1)#number of samples


"""Plot input signals"""
plt.ion()
f, axarr = plt.subplots(2, sharex=True)
axarr[0].set_title('Infant')
axarr[1].set_title('Parent')
#axarr[0].set_xlabel('Frames')
axarr[1].set_xlabel('Frames')

vid = 'SS003'
part = ["infant","parent"]

#to select a single column..
#infant = df2[(vid, part[0], 'headx')]
#parent = df2[(vid, part[1], 'headx')]

#selecting multiple columns slightly messier
infant = df2.loc[:,(vid, part[0], ('headx','heady'))]
parent = df2.loc[:,(vid, part[1], ('headx','heady'))]

axarr[0].plot(n,infant , label="i")
axarr[1].plot(n,parent, label="p", color='b')
axarr[0].legend(loc='best')
axarr[1].legend(loc='best')


plt.show() 
print(type(infant))

In [None]:
""" Define class attributes of the wanted method """

tau_max = 999                       # the maximum lag at which correlation should be computed (in samples)
plot=True                           # plot of the correlation fucntion
standardization = True              # standardization of the time series to mean 0 and variance 1
corr_tau_max = True                 # return of the maximum of correlation and its lag
corr_coeff = True                   # computation of the correlation coefficient (Pearson's version)
scale=True                          # scale factor to have correlaton in [-1,1]

""" Instanciate the class with its attributes """
print("\n")

try : 
    c=Correlation.Correlation(tau_max, plot, standardization, corr_tau_max, corr_coeff, scale)
except TypeError as err :
    print("TypeError in Correlation constructor : \n" + str(err))
    sys.exit(-1)
except ValueError as err :
    print("ValueError in Correlation constructor : \n" + str(err))
    sys.exit(-1)
except Exception as e :
    print("Exception in Correlation constructor : \n" + str(e))
    sys.exit(-1)

print("An instance the class is now created with the following parameters:\n" +
      "tau max = " + str(tau_max) + "\n" +
      "plot = " + str(plot) + "\n" +
      "standardization= " + str(standardization) + "\n" +
      "corr_tau_max = " + str(corr_tau_max) + "\n" +
      "corr_coeff =" + str(corr_coeff) +"\n" +
      "scale =" + str(scale))

""" Compute the method and get the result """
print("\n")
print("Computing...")


try : 
    res= c.compute([infant, ])
except TypeError as err :
    print("TypeError in Correlation computation : \n" + str(err))
    sys.exit(-1)
except ValueError as err :
    print("ValueError in Correlation computation : \n" + str(err))
    sys.exit(-1)
except Exception as e :
    print("Exception in Correlation computation : \n" + str(e))
    sys.exit(-1)

""" Display result """
print("\n")
print("**************************************** \n")
print('Correlation complete result :')
print("****************************************\n")
print("Correlation function array:")
print(res['corr_funct'])
print("Maximum value of the correlation %f and lag (in samples) %d:" %(res['max_corr'],res['t_max']))
print("Pearson's correlation coefficient %f:" %(res['corr_coeff']))

In [None]:
#loop through colcuate for each pair
for vid in videos:
    infant = df2.loc[:,(vid, 'infant', ('headx','heady'))]
    parent = df2.loc[:,(vid, 'parent', ('headx','heady'))]
    try : 
        res= c.compute([infant, parent])
    except TypeError as err :
        print("TypeError in Correlation computation : \n" + str(err))
        sys.exit(-1)
    except ValueError as err :
        print("ValueError in Correlation computation : \n" + str(err))
        sys.exit(-1)
    except Exception as e :
        print("Exception in Correlation computation : \n" + str(e))
        sys.exit(-1)
        
    print(vid)
    print("Correlation function array:")
    print(res['corr_funct'])
    print("Maximum value of the correlation %f and lag (in samples) %d:" %(res['max_corr'],res['t_max']))
    print("Pearson's correlation coefficient %f:" %(res['corr_coeff']))
    print("Correlation function array:")
    print(res['corr_funct'])
    print("Maximum value of the correlation %f and lag (in samples) %d:" %(res['max_corr'],res['t_max']))
    print("Pearson's correlation coefficient %f:" %(res['corr_coeff']))



In [None]:

https://towardsdatascience.com/granger-causality-and-vector-auto-regressive-model-for-time-series-forecasting-3226a64889a6

https://www.machinelearningplus.com/time-series/time-series-analysis-python/