# Processing action units 

In [15]:
import pandas as pd
import pickle

## Loading the file 

In [16]:
filepath = "/Users/dionnespaltman/Desktop/V3/action_units_temp.pkl"

# Load the dataset
action_units = pd.read_pickle(filepath)

In [3]:
print(action_units.columns)

Index(['Frame', 'Face_id', 'Confidence', 'Success', ' AU01_r', ' AU02_r',
       ' AU04_r', ' AU05_r', ' AU06_r', ' AU07_r', ' AU09_r', ' AU10_r',
       ' AU12_r', ' AU14_r', ' AU15_r', ' AU17_r', ' AU20_r', ' AU23_r',
       ' AU25_r', ' AU26_r', ' AU45_r', ' AU01_c', ' AU02_c', ' AU04_c',
       ' AU05_c', ' AU06_c', ' AU07_c', ' AU09_c', ' AU10_c', ' AU12_c',
       ' AU14_c', ' AU15_c', ' AU17_c', ' AU20_c', ' AU23_c', ' AU25_c',
       ' AU26_c', ' AU28_c', ' AU45_c', 'ID', 'Timeframe'],
      dtype='object')


In [4]:
display(action_units.head(5))

Unnamed: 0,Frame,Face_id,Confidence,Success,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,...,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c,ID,Timeframe
103,104,0.0,0.88,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
104,105,0.0,0.98,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
111,112,0.0,0.98,1.0,0.31,0.59,0.0,0.0,0.31,0.31,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
114,115,0.0,0.98,1.0,0.0,0.12,0.0,0.0,0.32,0.06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
153,154,0.0,0.98,1.0,1.26,1.67,0.0,0.0,0.39,0.26,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"


The temporary data has 184 unique variables (missing IDs from 147 to 287). 

In [5]:
print(len(action_units['ID'].unique()))

184


In [13]:
# Count occurrences of each value in the "Timeframe" column
timeframe_counts = action_units['Timeframe'].value_counts()

print(timeframe_counts)

[4, 5, 6]       2118735
[3, 4, 5, 6]     285556
[7]              269554
[1]              163222
[3]              134364
[2]               78159
[4]               68762
[4, 5]            40338
[4, 6]            15713
Name: Timeframe, dtype: int64


## Missing values 

In [8]:
nan_counts_au_df = action_units.isna().sum()
print(nan_counts_au_df)

Frame              0
Face_id            0
Confidence         0
Success            0
 AU01_r       243394
 AU02_r       243394
 AU04_r       243394
 AU05_r       243394
 AU06_r       243394
 AU07_r       243394
 AU09_r       243394
 AU10_r       243394
 AU12_r       243394
 AU14_r       243394
 AU15_r       243394
 AU17_r       243394
 AU20_r       243394
 AU23_r       243394
 AU25_r       243394
 AU26_r       243394
 AU45_r       243394
 AU01_c       243394
 AU02_c       243394
 AU04_c       243394
 AU05_c       243394
 AU06_c       243394
 AU07_c       243394
 AU09_c       243394
 AU10_c       243394
 AU12_c       243394
 AU14_c       243394
 AU15_c       243394
 AU17_c       243394
 AU20_c       243394
 AU23_c       243394
 AU25_c       243394
 AU26_c       243394
 AU28_c       243394
 AU45_c       243394
ID                 0
Timeframe          0
dtype: int64


## Dealing with missing values - MICE

Link: https://www.machinelearningplus.com/machine-learning/mice-imputation/?utm_content=cmp-true

In [17]:
# need to enable iterative imputer explicitly since its still experimental
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [18]:
# Define imputer
imputer = IterativeImputer(random_state=100, max_iter=10)

In [19]:
action_units.head(10)

Unnamed: 0,Frame,Face_id,Confidence,Success,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,...,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c,ID,Timeframe
0,104,0.0,0.88,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
1,105,0.0,0.98,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
2,112,0.0,0.98,1.0,0.31,0.59,0.0,0.0,0.31,0.31,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
3,115,0.0,0.98,1.0,0.0,0.12,0.0,0.0,0.32,0.06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
4,154,0.0,0.98,1.0,1.26,1.67,0.0,0.0,0.39,0.26,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"
5,155,0.0,0.98,1.0,1.5,2.48,0.0,0.0,0.6,0.26,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"
6,156,0.0,0.88,1.0,0.88,2.77,0.0,0.0,0.59,0.02,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"
7,157,0.0,0.98,1.0,0.34,2.22,0.0,0.0,0.73,0.0,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"
8,160,0.0,0.98,1.0,0.0,1.3,0.0,0.0,0.19,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,80,"[4, 5, 6]"
9,161,0.0,0.98,1.0,0.0,1.79,0.0,0.0,0.25,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,80,"[4, 5, 6]"


In [20]:
columns = list(action_units.columns)
print(columns)

['Frame', 'Face_id', 'Confidence', 'Success', 'AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', ' AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c', 'AU07_c', 'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c', 'AU20_c', 'AU23_c', 'AU25_c', 'AU26_c', ' AU28_c', 'AU45_c', 'ID', 'Timeframe']


In [21]:
# Use Numeric Features
action_units_train = action_units.loc[:, ['AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', ' AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c', 'AU07_c', 'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c', 'AU20_c', 'AU23_c', 'AU25_c', 'AU26_c', ' AU28_c', 'AU45_c']]
action_units_train.head(10)

Unnamed: 0,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,AU09_r,AU10_r,AU12_r,AU14_r,...,AU12_c,AU14_c,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c
0,0.0,0.0,0.0,0.0,1.34,0.22,0.57,1.56,1.52,0.54,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.34,0.22,0.57,1.56,1.52,0.54,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.31,0.59,0.0,0.0,0.31,0.31,0.23,0.21,0.62,0.12,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.12,0.0,0.0,0.32,0.06,0.1,0.6,0.5,0.29,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.26,1.67,0.0,0.0,0.39,0.26,0.0,1.22,0.64,0.18,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
5,1.5,2.48,0.0,0.0,0.6,0.26,0.0,1.89,1.02,0.55,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
6,0.88,2.77,0.0,0.0,0.59,0.02,0.0,1.84,1.22,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
7,0.34,2.22,0.0,0.0,0.73,0.0,0.0,1.88,1.51,1.44,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0
8,0.0,1.3,0.0,0.0,0.19,0.0,0.04,1.18,0.69,0.72,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
9,0.0,1.79,0.0,0.0,0.25,0.0,0.12,1.57,0.99,0.88,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0


Running everything up until the imputer was very fast. Running the imputer took around 9 minutes (on the temp data!). 

In [22]:
# fit on the dataset
imputer.fit(action_units_train)

In [23]:
action_units_imputed = imputer.transform(action_units_train)
action_units_imputed[:10]

array([[0.  , 0.  , 0.  , 0.  , 1.34, 0.22, 0.57, 1.56, 1.52, 0.54, 0.11,
        1.96, 1.08, 1.3 , 0.78, 0.  , 0.65, 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 1.34, 0.22, 0.57, 1.56, 1.52, 0.54, 0.11,
        1.96, 1.08, 1.3 , 0.78, 0.  , 0.65, 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  ],
       [0.31, 0.59, 0.  , 0.  , 0.31, 0.31, 0.23, 0.21, 0.62, 0.12, 0.  ,
        0.04, 0.49, 0.15, 0.  , 0.27, 0.38, 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  ],
       [0.  , 0.12, 0.  , 0.  , 0.32, 0.06, 0.1 , 0.6 , 0.5 , 0.29, 0.2 ,
        0.36, 0.91, 0.75, 0.55, 0.  , 0.36, 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  ],
       [1.26, 1.67, 0.  , 0.

In [24]:
# Replace with imputed values
action_units.loc[:, ['AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', ' AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c', 'AU07_c', 'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c', 'AU20_c', 'AU23_c', 'AU25_c', 'AU26_c', ' AU28_c', 'AU45_c']] = action_units_imputed
action_units.head(10)

Unnamed: 0,Frame,Face_id,Confidence,Success,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,...,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c,ID,Timeframe
0,104,0.0,0.88,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
1,105,0.0,0.98,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
2,112,0.0,0.98,1.0,0.31,0.59,0.0,0.0,0.31,0.31,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
3,115,0.0,0.98,1.0,0.0,0.12,0.0,0.0,0.32,0.06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
4,154,0.0,0.98,1.0,1.26,1.67,0.0,0.0,0.39,0.26,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"
5,155,0.0,0.98,1.0,1.5,2.48,0.0,0.0,0.6,0.26,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"
6,156,0.0,0.88,1.0,0.88,2.77,0.0,0.0,0.59,0.02,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"
7,157,0.0,0.98,1.0,0.34,2.22,0.0,0.0,0.73,0.0,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"
8,160,0.0,0.98,1.0,0.0,1.3,0.0,0.0,0.19,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,80,"[4, 5, 6]"
9,161,0.0,0.98,1.0,0.0,1.79,0.0,0.0,0.25,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,80,"[4, 5, 6]"


In [25]:
nan_counts_au_df = action_units.isna().sum()
print(nan_counts_au_df)

Frame         0
Face_id       0
Confidence    0
Success       0
AU01_r        0
AU02_r        0
AU04_r        0
AU05_r        0
AU06_r        0
AU07_r        0
AU09_r        0
AU10_r        0
AU12_r        0
AU14_r        0
AU15_r        0
AU17_r        0
AU20_r        0
AU23_r        0
AU25_r        0
AU26_r        0
 AU45_r       0
AU01_c        0
AU02_c        0
AU04_c        0
AU05_c        0
AU06_c        0
AU07_c        0
AU09_c        0
AU10_c        0
AU12_c        0
AU14_c        0
AU15_c        0
AU17_c        0
AU20_c        0
AU23_c        0
AU25_c        0
AU26_c        0
 AU28_c       0
AU45_c        0
ID            0
Timeframe     0
dtype: int64


## Saving the action units file with the imputed values 

In [26]:
# action_units.to_pickle("/Users/dionnespaltman/Desktop/V3/action_units_temp_imputed.pkl")

# action_units.to_csv("/Users/dionnespaltman/Desktop/V3/action_units_temp_imputed.csv", index=False)

## TS Fresh requirements

In [27]:
# Necessary imports
!pip install tsfresh

import pandas as pd
from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters
from sklearn.preprocessing import StandardScaler
from tsfresh.feature_extraction import ComprehensiveFCParameters




## Action units

In [18]:
au_long_format = pd.melt(action_units, id_vars=['ID', 'Frame'],
                                   var_name='Measurement', value_name='Value',
                                   value_vars=[' AU01_r', ' AU02_r',
       ' AU04_r', ' AU05_r', ' AU06_r', ' AU07_r', ' AU09_r', ' AU10_r',
       ' AU12_r', ' AU14_r', ' AU15_r', ' AU17_r', ' AU20_r', ' AU23_r',
       ' AU25_r', ' AU26_r', ' AU45_r'])

In [19]:
display(au_long_format)

Unnamed: 0,ID,Frame,Measurement,Value
0,80,104,AU01_r,0.00
1,80,105,AU01_r,0.00
2,80,112,AU01_r,0.31
3,80,115,AU01_r,0.00
4,80,154,AU01_r,1.26
...,...,...,...,...
53964846,25,2196,AU45_r,0.00
53964847,25,2197,AU45_r,0.00
53964848,25,2198,AU45_r,0.00
53964849,25,2199,AU45_r,0.00


## Missing values 

In [20]:
nan_counts = au_long_format.isna().sum()
print(nan_counts)

ID                   0
Frame                0
Measurement          0
Value          4137698
dtype: int64


## ...

## ...

## ...

## ...

## ...