This code is the main script of Elizabeth Soskin's Thesis.
Research topic: Facial expression patterns analysis for the identification of autism in children during child evaluation

for questions please contact: lizaokop@post.bgu.ac.il

# Needed packages

In [1]:
import pandas as pd
import numpy as np
import os
import glob
import datetime
from scipy.interpolate import interp1d

# Paths - Data Location & Outputs

### Main data folder with all txts from affectivas output

In [2]:
# Where txt data files output from Affectiva stored
directory = r"C:\Users\liza\txt_data"

### Here all outputs will be saved:

In [3]:
# Where output will be saved
Output_directory = r"C:\Users\liza\Final Outputs"

## Diagnosis file: (including ID and diagnosis group)

In [4]:
# main list with all ID's and diagnosis
diagnosis = r"C:\Users\liza\Diagnosis.xlsx"

### Creating the main file

In [5]:
df_main = pd.read_excel(diagnosis)

In [6]:
df_main = df_main[ ['ID'] + ['Group']]

In [7]:
df_main['ID'] = pd.to_numeric(df_main['ID'])

In [8]:
df_main.head()

Unnamed: 0,ID,Group
0,673249918,0
1,1021408222,0
2,1019778562,0
3,1021093597,0
4,669779629,0


### Affectivas columns as an output

In [9]:
info = ['Name', 'FrameIndex']
Feelings = ['Smile', 'Anger', 'Sadness', 'Disgust', 'Surprise', 'Fear']
AUs = ['Brow Furrow', 'Brow Raise', 'Lip Corner Depressor', 'InnerBrowRaise', 'EyeClosure', 'NoseWrinkle', 'UpperLipRaise', 'LipSuck', 'LipPress', 'MouthOpen', 'ChinRaise', 'Smirk', 'LipPucker', 'Cheek Raise', 'Dimpler', 'Eye Widen', 'Lid Tighten', 'Lip Stretch', 'Jaw Drop']
Head = ['Pitch', 'Yaw', 'Roll']
Rest = ['Engagement', 'Valence', 'Attention', 'Interocular Distance', 'Joy', 'Contempt']

In [10]:
len(Feelings) + len(AUs) + len(Head)

28

# Preprocess

## Loading the data

### Choose wanted columns

In [11]:
selected_cols = info + Feelings + AUs + Head

### Combine files from folder

In [12]:
def To_DF(path):
    df = pd.read_csv(path, skiprows=[0, 1, 2, 3, 4], delimiter="\t", usecols=selected_cols)
    return df

df_all_data = []

for filename in os.listdir(directory):
    if filename.endswith(".txt"): 
        new_df = To_DF(os.path.join(directory, filename))
        df_all_data.append(new_df)
        #print(os.path.join(directory, filename))
    
df_Original = pd.concat(df_all_data, ignore_index=True, sort=True)

### Change name to ID and delete the name column

In [13]:
df_Original['ID'] = df_Original['Name'].str.split('_').str[0]
del df_Original["Name"]

### Re-order the columns in the df

In [14]:
df_Original = df_Original[ ['ID'] + ['FrameIndex']+ [ col for col in df_Original.columns if (col != 'ID' and col != 'FrameIndex') ] ]

### The Number of children imported:

In [15]:
df_Original["ID"].nunique()

44

### Size of the df:

In [16]:
print("Total frames: ", df_Original.shape[0])
print("Total columns: ", df_Original.shape[1])

Total frames:  4090779
Total columns:  30


## Running interpolation for future feature extraction

In [17]:
df = df_Original.copy()

In [18]:
df['MediaTime'] = ""
'''
in the original interpolation code, there was a media time, but we don't really need it, 
in this section, I'm not even importing it from the main data,
I didn't want to change all the following code because it considers the columns numbers,
so I'm just adding an empty column, we don't use it so it's not a big deal.
'''

"\nin the original interpolation code, there was a media time, but we don't really need it, \nin this section, I'm not even importing it from the main data,\nI didn't want to change all the following code because it considers the columns numbers,\nso I'm just adding an empty column, we don't use it so it's not a big deal.\n"

In [19]:
df = df[ ['ID'] +['MediaTime'] +['FrameIndex'] +['Anger'] +['Disgust'] +['Fear'] +['Sadness'] +['Smile'] +['Surprise']]

In [20]:
df.fillna(0, inplace=True)

In [21]:
df.to_csv("input_intr.csv")

In [22]:
df

Unnamed: 0,ID,MediaTime,FrameIndex,Anger,Disgust,Fear,Sadness,Smile,Surprise
0,10212252882,,0,0.0,0.0,0.0,0.0,0.0,0.0
1,10212252882,,1,0.0,0.0,0.0,0.0,0.0,0.0
2,10212252882,,2,0.0,0.0,0.0,0.0,0.0,0.0
3,10212252882,,3,0.0,0.0,0.0,0.0,0.0,0.0
4,10212252882,,4,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
4090774,1015608034,,91654,0.0,0.0,0.0,0.0,0.0,0.0
4090775,1015608034,,91655,0.0,0.0,0.0,0.0,0.0,0.0
4090776,1015608034,,91656,0.0,0.0,0.0,0.0,0.0,0.0
4090777,1015608034,,91657,0.0,0.0,0.0,0.0,0.0,0.0


### Interpulation:

In [23]:
look_for_sequence_of = 5              # min of valid frames before using interp
max_hole_size = 10                    # max of missing frames allowd
Counter = 0

f_input = open("input_intr.csv", "r") # input file
first_line = f_input.readline()      

print("--")
whole_file = []

--


In [24]:
# csv to list
for line in f_input:
    l = line.split(',')
    whole_file.append(l)

In [25]:
# list of all missing frames (0.0)
def create_list_of_holes():
    mid_sequence = False
    current_index = 0
    sequence_start_index = 0
    sequence_end_index = 0

    holes = []

    # create list of all the holes
    for li in whole_file:
        current_index = int(li[0])      # index is in the first coulumn

        # li[4] = "Angry" position
        if li[4] == '0.0': # searching for the zeros
            if not mid_sequence:
                mid_sequence = True
                sequence_start_index = current_index
        else:
            if mid_sequence:
                mid_sequence = False
                sequence_end_index = current_index
                holes.append([sequence_start_index, sequence_end_index - 1])

    return holes

In [26]:
# intrp 
def fix_hole(start, stop, col):
    global Counter
    #print(f"fixing hole in {start, stop}")
    lowest_index_with_data = start - 1
    
    #    search for segment frame
    while whole_file[lowest_index_with_data][4] != '0.0':
        lowest_index_with_data -= 1

    lowest_index_with_data += 1
    highest_index_with_data = stop + 1
    while whole_file[highest_index_with_data][4] != '0.0':
        highest_index_with_data += 1

    highest_index_with_data -= 1
    #     --
    
    # append valid values of the segment
    x = np.array(range(lowest_index_with_data, start))
    x = np.append(x, range(stop+1, highest_index_with_data+1))
    
    # empty list in the size of x
    y = np.zeros_like(x, dtype=float)

    # fill y from above with original data
    for index, val in enumerate(x):
        y[index] = whole_file[val][col]

    
    #f = interp1d(x, y)                 # linear interp
    f2 = interp1d(x, y, kind='cubic')

    xnew = np.array(range(lowest_index_with_data, highest_index_with_data + 1)) # array in the original size including zeros

    offset = start - lowest_index_with_data # where am I in the original file
    
    
    for i in range(stop+1 - start): # number of zeros
        data_to_write = str(f2(xnew)[offset+i])
        if col == 9:
            data_to_write += "\n"
            
        whole_file[start+i][col] = data_to_write
    
    Counter +=1

In [27]:
# validate that we have 'look_for_sequence_of' valid franes before zeros (5 frames)
def check_for_predecessors(index, file):
    test_passed = True
    
    for i in range(look_for_sequence_of):
        if file[index-i-1][4] == '0.0':
            test_passed = False
    return test_passed

In [28]:
# --------------------------------------------------- starting here:
list_of_holes = create_list_of_holes()    
#print(f"list of current holes = {list_of_holes}")
print(f"the length of this list is = {len(list_of_holes)}")

the length of this list is = 51597


In [29]:
# check the holes - if it needs fixing
# list of holes looks like this : [start, end], [start, end], [start, end]....
# hole[0] = start index     hole[1] = end index
for hole in list_of_holes:
    if hole[0] - look_for_sequence_of < 0:  #deal with negetive indexes- if first seq is nulls - ignore it
        continue
    if hole[1] - hole[0] + 1 > max_hole_size:   # if hole is too big, skip
        continue

    x = check_for_predecessors(hole[0], whole_file) # validate that prev frames contain values
    if x: # test passed
        for col in range(4,10): # fix all emotions
            fix_hole(hole[0], hole[1], col)

In [30]:
# Fix if interpulation completed values over 100 or less then 0
for line in whole_file:
    for i in range(4, 10):
        if float(line[i]) > 100:
            line[i] = '100'
        if float(line[i]) < 0:
            line[i] = '0'
        
        if i == 9:
            if '\n' not in line[i]:
                line[i] += '\n'

In [31]:
print(Counter)
with open('inter_fixed_file.csv', 'w') as f:
    f.write(first_line[:])
    for line in whole_file:
        f.write(','.join(line[:]))

121590


In [32]:
Validation_After_interpulation = len(whole_file)

# Delete short segments:

In [33]:
Counter = 0
length_to_del = 5

f_input = open("inter_fixed_file.csv", "r") # input file
first_line = f_input.readline()      

#print("--")
whole_file = []

In [34]:
# csv to list
#line_length_array = []

for line in f_input:
    l = line.split(',')
    #line_length_array.append(len(l))
    whole_file.append(l)

#print(max(line_length_array))

In [35]:
len(whole_file)

4090779

In [36]:
# list of all missing frames (0.0)
def create_list_of_segmnts():
    mid_sequence = False
    current_index = 0
    sequence_start_index = 0
    sequence_end_index = 0

    segments = []

    # create list of all the holes
    for li in whole_file:
        current_index = int(li[0])      #index is first column
        row_sum = float(li[4]) +float(li[5]) +float(li[6]) +float(li[7]) +float(li[8]) +float(li[9])
        # li[4] = "Angry" position
        if  row_sum > 0: # searching for the values
            if not mid_sequence:
                mid_sequence = True
                sequence_start_index = current_index
        else:
            if mid_sequence:
                mid_sequence = False
                sequence_end_index = current_index
                segments.append([sequence_start_index, sequence_end_index - 1])

    return segments

In [37]:
# delete short seg 
def fix_seg(start, stop, col):
    global Counter
    #print(f"fixing hole in {start, stop}")
    
    for i in range(stop+1 - start): # number of values to change
        data_to_write = "0.0"
        if col == 9:
            data_to_write += "\n"
        
        #print("start+i: ",start+i)
        #print("col: ", col)
        len(whole_file)
        
        whole_file[start+i][col] = data_to_write
    Counter +=1


In [38]:
list_of_seg = create_list_of_segmnts()
#print(f"list of current Seg = {list_of_seg}")
#print(f"the length of this list is = {len(list_of_seg)}")

In [39]:
print(f"the length of this list is = {len(list_of_seg)}")
#8906382

the length of this list is = 31332


In [40]:
for seg in list_of_seg:
    if seg[1] - seg[0] +1 >= length_to_del:   # if seg is biger than 5 frames, skip
        continue
    else:
        for col in range(4,10): # fix all emotions
            fix_seg(seg[0], seg[1], col)

In [41]:
print(Counter)
with open('Full_Fixed_File.csv', 'w') as f:
    f.write(first_line[:])
    for line in whole_file:
        f.write(','.join(line[:]))

110136


In [42]:
Validation_After_Deleting = len(whole_file)

### Validation:

In [43]:
Validation = Validation_After_interpulation - Validation_After_Deleting
Validation
# Zero is good. if not Zero, not good - need to check the code. 

0

# 

# Main - Extracting Features:

In [44]:
df = df_Original.copy()

## 1. Percent of valid frames

In [45]:
df = df_Original.copy()

In [46]:
df1 = df.groupby('ID').count()[['FrameIndex']]

In [47]:
df = df.fillna(0)

In [48]:
df['row_sum'] = df['Anger'] + df['Disgust'] + df['Fear'] + df['Sadness'] + df['Smile'] +  df['Surprise']

In [49]:
df = df.drop(df[df.row_sum < 0.00000001].index)

In [50]:
df2 = df.groupby('ID').count()[['FrameIndex']]

In [51]:
Result = pd.merge(df1 ,df2 , how='outer', on='ID')
Result['Frames_Percentage'] = Result['FrameIndex_y']/Result['FrameIndex_x']
Result = Result.rename(columns={"FrameIndex_x": "Total_frames_num", "FrameIndex_y": "Valid_frames_num"})

In [52]:
Result = Result.reset_index()

In [53]:
Result['ID'] = pd.to_numeric(Result['ID'])

In [54]:
df_main = pd.merge(df_main ,Result , how='outer', on='ID')

In [55]:
df_main.head()

Unnamed: 0,ID,Group,Total_frames_num,Valid_frames_num,Frames_Percentage
0,673249918,0,85736,16471,0.192113
1,1021408222,0,90365,19580,0.216677
2,1019778562,0,111063,21325,0.192008
3,1021093597,0,82627,46000,0.556719
4,669779629,0,104995,24678,0.23504


## 2. Movement of the head

In [56]:
df = df_Original.copy()

In [57]:
df.fillna(0, inplace=True)

In [58]:
df['row_sum'] = df['Anger'] + df['Disgust'] + df['Fear'] + df['Sadness'] + df['Smile'] +  df['Surprise']

In [59]:
# creating new shifted column for all relevant columns
df['Shift_rowSum'] = df['row_sum'].shift(1)
df['Shift_Pitch'] = df['Pitch'].shift(1)
df['Shift_Roll'] = df['Roll'].shift(1)
df['Shift_Yaw'] = df['Yaw'].shift(1)

In [60]:
# creating the diff between two frames
df['Diff_Pitch'] = abs(df['Pitch'] - df['Pitch'].shift(1))
df['Diff_Roll'] = abs(df['Roll'] - df['Roll'].shift(1))
df['Diff_Yaw'] = abs(df['Yaw'] - df['Yaw'].shift(1))

In [61]:
# copy relevant to new df
df1 = df[df['Diff_Pitch'].notna()]

In [62]:
# remove null rows
df1 = df1.drop(df1[df1.row_sum < 0.0000001].index) 
df1 = df1.drop(df1[df1.Shift_rowSum < 0.0000001].index)

In [63]:
df1 = df1[ ['ID'] + ['Diff_Pitch']+ ['Diff_Roll']+ ['Diff_Yaw'] ] 

In [64]:
f_head_move = df1.groupby('ID').mean()
f_head_move.head()

Unnamed: 0_level_0,Diff_Pitch,Diff_Roll,Diff_Yaw
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1010598340,0.977104,1.243577,1.294176
10120119131,1.353724,1.541759,1.349941
1015608034,0.986076,1.095488,1.025045
1016155105,1.424711,1.397242,1.176961
1018172959,1.387216,1.452423,1.36588


In [65]:
f_head_move = f_head_move.reset_index()

In [66]:
f_head_move['ID'] = pd.to_numeric(f_head_move['ID'])

In [67]:
df_main = pd.merge(df_main ,f_head_move , how='outer', on='ID')

In [68]:
df_main.head()

Unnamed: 0,ID,Group,Total_frames_num,Valid_frames_num,Frames_Percentage,Diff_Pitch,Diff_Roll,Diff_Yaw
0,673249918,0,85736,16471,0.192113,1.412543,1.420279,1.432545
1,1021408222,0,90365,19580,0.216677,1.54034,1.761364,1.485873
2,1019778562,0,111063,21325,0.192008,1.482536,1.816111,1.490298
3,1021093597,0,82627,46000,0.556719,1.124373,1.295388,1.134773
4,669779629,0,104995,24678,0.23504,1.338867,1.550114,1.51838


## 3. Child distance from the >group/NT< average - This is only for smile

Vector analysis purpose - take all frames above the bar (75), average them per child. Get average vector per group, Then automatically calculate the distance between groups.

### This section will only prepare the files for the excel sheet for calculation

In [69]:
df = df_Original.copy()

In [70]:
df["ID"].nunique()

44

In [71]:
df = df.dropna(subset=['Brow Furrow', 'Brow Raise'])

In [72]:
df['Line_sum'] = df['Anger'] + df['Disgust'] + df['Fear'] + df['Sadness'] + df['Smile'] + df['Surprise']
df['Line'] = np.where(df.Line_sum > 0.000001, 1, 0) 
del df['Line_sum']

In [73]:
df_bars = df.copy()
bar_input = 75

In [74]:
# Set indicatior if value is above bar
#Anger_Name = "Anger" + str(bar_input)
#Disgust_Name = "Disgust" + str(bar_input)
#Fear_Name = "Fear" + str(bar_input)
#Sadness_Name = "Sadness" + str(bar_input)
Smile_Name = "Smile" + str(bar_input)
#Surprise_Name = "Surprise" + str(bar_input)

#df_bars[Anger_Name] = np.where(df_bars.Anger > bar_input , 1, 0) 
#df_bars[Disgust_Name] = np.where(df_bars.Disgust > bar_input , 1, 0) 
#df_bars[Fear_Name] = np.where(df_bars.Fear > bar_input , 1, 0) 
#df_bars[Sadness_Name] = np.where(df_bars.Sadness > bar_input , 1, 0) 
df_bars[Smile_Name] = np.where(df_bars.Smile > bar_input , 1, 0) 
#df_bars[Surprise_Name] = np.where(df_bars.Surprise > bar_input , 1, 0) 

In [75]:
# Keep only frames with values above bar (split to df by feeling)
#df_bars_Anger = df_bars.loc[lambda df_bars: df_bars[Anger_Name] > 0, :]
#df_bars_Disgust = df_bars.loc[lambda df_bars: df_bars[Disgust_Name] > 0, :]
#df_bars_Fear = df_bars.loc[lambda df_bars: df_bars[Fear_Name] > 0, :]
#df_bars_Sadness = df_bars.loc[lambda df_bars: df_bars[Sadness_Name] > 0, :]
df_bars_Smile = df_bars.loc[lambda df_bars: df_bars[Smile_Name] > 0, :]
#df_bars_Surprise = df_bars.loc[lambda df_bars: df_bars[Surprise_Name] > 0, :]

In [76]:
#df_Output_Anger = df_bars_Anger.groupby('ID').agg([np.average]).iloc[:,1:36]
#df_Output_Disgust = df_bars_Disgust.groupby('ID').agg([np.average]).iloc[:,1:36]
#df_Output_Fear = df_bars_Fear.groupby('ID').agg([np.average]).iloc[:,1:36]
#df_Output_Sadness = df_bars_Sadness.groupby('ID').agg([np.average]).iloc[:,1:36]
df_Output_Smile = df_bars_Smile.groupby('ID').agg([np.average]).iloc[:,1:36]
#df_Output_Surprise = df_bars_Surprise.groupby('ID').agg([np.average]).iloc[:,1:36]

In [77]:
df_Output_Smile.columns = df_Output_Smile.columns.get_level_values(0)

In [78]:
df_Output_Smile = df_Output_Smile.reset_index()

In [79]:
df_Output_Smile.head()

Unnamed: 0,ID,Anger,Brow Furrow,Brow Raise,Cheek Raise,ChinRaise,Dimpler,Disgust,Eye Widen,EyeClosure,...,Pitch,Roll,Sadness,Smile,Smirk,Surprise,UpperLipRaise,Yaw,Line,Smile75
0,1010598340,0.000137,0.036076,0.286698,50.442457,3.691158,13.716715,0.026448,0.000941,44.051035,...,-9.239663,1.920795,2e-05,95.047114,1.253576,1.314404,0.078069,-10.003702,1,1
1,10120119131,0.000195,0.035389,2.447218,22.191078,5.086548,7.351269,0.025905,1.496303,44.627526,...,-16.785078,5.489743,3.5e-05,89.739162,0.93622,2.351371,0.227903,-10.078008,1,1
2,1015608034,0.000351,0.014344,4.29285,32.144834,8.224106,12.80282,0.00903,1.69814,54.338198,...,-17.665457,-1.820972,2e-05,94.462501,0.592566,3.420287,0.06995,-12.735274,1,1
3,1016155105,0.000498,1.823084,0.428427,43.890558,9.138868,10.026663,0.413206,1.404502,41.318782,...,-8.008497,2.522965,0.003817,91.661194,2.104376,8.567728,0.739487,8.330293,1,1
4,1018172959,7.7e-05,0.044616,0.000341,33.431636,1.66925,9.592309,0.089414,0.120411,44.379836,...,-11.479736,7.021477,0.000162,93.758109,1.752549,1.593197,0.049914,-10.702538,1,1


In [80]:
df_diag = pd.read_excel(diagnosis)

In [81]:
del df_Output_Smile["Line"]
del df_Output_Smile["Smile75"]

In [82]:
# --for BU, all
#df_Output_Smile = df_Output_Smile[['ID'] + ['Smile'] + ['Anger'] + ['Sadness'] + ['Disgust'] + ['Surprise'] + ['Fear'] + ['Brow Furrow'] + ['Brow Raise'] + ['Lip Corner Depressor'] + ['InnerBrowRaise'] + ['EyeClosure'] + ['NoseWrinkle'] + ['UpperLipRaise'] + ['LipSuck'] + ['LipPress'] + ['MouthOpen'] + ['ChinRaise'] + ['Smirk'] + ['LipPucker'] + ['Cheek Raise'] + ['Dimpler'] + ['Eye Widen'] + ['Lid Tighten'] + ['Lip Stretch'] + ['Jaw Drop'] + ['Pitch'] + ['Yaw'] + ['Roll']]

In [83]:
# Only AUs
df_Output_Smile = df_Output_Smile[['ID'] + ['Brow Furrow'] + ['Brow Raise'] + ['Lip Corner Depressor'] + ['InnerBrowRaise'] + ['EyeClosure'] + ['NoseWrinkle'] + ['UpperLipRaise'] + ['LipSuck'] + ['LipPress'] + ['MouthOpen'] + ['ChinRaise'] + ['Smirk'] + ['LipPucker'] + ['Cheek Raise'] + ['Dimpler'] + ['Eye Widen'] + ['Lid Tighten'] + ['Lip Stretch'] + ['Jaw Drop']]

In [84]:
df_Output_Smile['ID'] = pd.to_numeric(df_Output_Smile['ID'])

In [85]:
with pd.ExcelWriter(Output_directory + "//" + 'Vectors_input_75.xlsx') as writer:
    df_diag.to_excel(writer, sheet_name='Diagnosis', index=False)
    #df_Output_Anger.to_excel(writer, sheet_name='V_Anger', index=True)
    #df_Output_Disgust.to_excel(writer, sheet_name='V_Disgust', index=True)
    #df_Output_Fear.to_excel(writer, sheet_name='V_Fear', index=True)
    #df_Output_Sadness.to_excel(writer, sheet_name='V_Sadness', index=True)
    df_Output_Smile.to_excel(writer, sheet_name='V_Smile', index=False)
    #df_Output_Surprise.to_excel(writer, sheet_name='V_Surprise', index=True)

## V2- automatic calc

In [86]:
# Merge with diagnosis
df_AU_Smile_Ave = pd.merge(df_Output_Smile ,df_diag , how='outer', on='ID')

In [87]:
# re-order
df_AU_Smile_Ave = df_AU_Smile_Ave[['ID'] + ['Group'] + ['Brow Furrow'] + ['Brow Raise'] + ['Lip Corner Depressor'] + ['InnerBrowRaise'] + ['EyeClosure'] + ['NoseWrinkle'] + ['UpperLipRaise'] + ['LipSuck'] + ['LipPress'] + ['MouthOpen'] + ['ChinRaise'] + ['Smirk'] + ['LipPucker'] + ['Cheek Raise'] + ['Dimpler'] + ['Eye Widen'] + ['Lid Tighten'] + ['Lip Stretch'] + ['Jaw Drop']]

In [88]:
#Group to 2 vectors per group
df_AU_Mean_vector = df_AU_Smile_Ave.copy()
del df_AU_Mean_vector["ID"]
df_AU_Mean_vector = df_AU_Mean_vector.groupby(["Group"]).mean()

In [89]:
# reset index
df_AU_Mean_vector = df_AU_Mean_vector.reset_index(level='Group')

### ASD-ASD , TD-TD

In [90]:
# Merge original average AU calc with average vector per group
df_dis = pd.merge(df_AU_Smile_Ave ,df_AU_Mean_vector , how='left', on='Group')

In [91]:
# (xi-yi)^2
df_dis["Brow Furrow"] = pow((df_dis["Brow Furrow_x"] - df_dis["Brow Furrow_y"]),2)
df_dis["Brow Raise"] = pow((df_dis["Brow Raise_x"] - df_dis["Brow Raise_y"]),2)
df_dis["Lip Corner Depressor"] = pow((df_dis["Lip Corner Depressor_x"] - df_dis["Lip Corner Depressor_y"]),2)
df_dis["InnerBrowRaise"] = pow((df_dis["InnerBrowRaise_x"] - df_dis["InnerBrowRaise_y"]),2)
df_dis["EyeClosure"] = pow((df_dis["EyeClosure_x"] - df_dis["EyeClosure_y"]),2)
df_dis["NoseWrinkle"] = pow((df_dis["NoseWrinkle_x"] - df_dis["NoseWrinkle_y"]),2)
df_dis["UpperLipRaise"] = pow((df_dis["UpperLipRaise_x"] - df_dis["UpperLipRaise_y"]),2)
df_dis["LipSuck"] = pow((df_dis["LipSuck_x"] - df_dis["LipSuck_y"]),2)
df_dis["LipPress"] = pow((df_dis["LipPress_x"] - df_dis["LipPress_y"]),2)
df_dis["MouthOpen"] = pow((df_dis["MouthOpen_x"] - df_dis["MouthOpen_y"]),2)
df_dis["ChinRaise"] = pow((df_dis["ChinRaise_x"] - df_dis["ChinRaise_y"]),2)
df_dis["Smirk"] = pow((df_dis["Smirk_x"] - df_dis["Smirk_y"]),2)
df_dis["LipPucker"] = pow((df_dis["LipPucker_x"] - df_dis["LipPucker_y"]),2)
df_dis["Cheek Raise"] = pow((df_dis["Cheek Raise_x"] - df_dis["Cheek Raise_y"]),2)
df_dis["Dimpler"] = pow((df_dis["Dimpler_x"] - df_dis["Dimpler_y"]),2)
df_dis["Eye Widen"] = pow((df_dis["Eye Widen_x"] - df_dis["Eye Widen_y"]),2)
df_dis["Lid Tighten"] = pow((df_dis["Lid Tighten_x"] - df_dis["Lid Tighten_y"]),2)
df_dis["Lip Stretch"] = pow((df_dis["Lip Stretch_x"] - df_dis["Lip Stretch_y"]),2)
df_dis["Jaw Drop"] = pow((df_dis["Jaw Drop_x"] - df_dis["Jaw Drop_y"]),2)

In [92]:
AUs_list = ['ID', 'Brow Furrow', 'Brow Raise', 'Lip Corner Depressor', 'InnerBrowRaise', 'EyeClosure', 'NoseWrinkle', 'UpperLipRaise', 'LipSuck', 'LipPress', 'MouthOpen', 'ChinRaise', 'Smirk', 'LipPucker', 'Cheek Raise', 'Dimpler', 'Eye Widen', 'Lid Tighten', 'Lip Stretch', 'Jaw Drop']
df_dis = df_dis[AUs_list]

In [93]:
column_list = list(df_dis)
column_list.remove("ID")

In [94]:
# Sum((xi-yi)^2)
df_dis["AUs_sum"] = (df_dis[column_list].sum(axis=1))
# sqrt(Sum((xi-yi)^2))
df_dis["Sqrt_AUs_sum"] = np.sqrt(df_dis[column_list].sum(axis=1))

In [95]:
df_dis_output = df_dis[['ID', 'Sqrt_AUs_sum']]

In [96]:
df_dis_output = df_dis_output.rename(columns={"Sqrt_AUs_sum":"Euclidean_distance_ASD_NT"})

In [97]:
df_dis_output.head()

Unnamed: 0,ID,Euclidean_distance_ASD_NT
0,1010598340,15.601441
1,10120119131,30.56821
2,1015608034,33.574491
3,1016155105,34.223811
4,1018172959,24.550748


In [98]:
# Merge to Main
df_dis_output['ID'] = pd.to_numeric(df_dis_output['ID'])
df_main = pd.merge(df_main ,df_dis_output , how='outer', on='ID')

### ASD-NT, NT-NT

In [99]:
#Stay with TD vector only
df_AU_Mean_vector = df_AU_Mean_vector.drop(df_AU_Mean_vector[df_AU_Mean_vector.Group != 0].index)

In [100]:
# "Force" ASD to be TD only for the merge
df_AU_Smile_Ave["Group"].replace({1: 0}, inplace=True)

In [101]:
# Merge original average AU calc with average vector per group
df_dis = pd.merge(df_AU_Smile_Ave ,df_AU_Mean_vector , how='left', on='Group')

In [102]:
# (xi-yi)^2
df_dis["Brow Furrow"] = pow((df_dis["Brow Furrow_x"] - df_dis["Brow Furrow_y"]),2)
df_dis["Brow Raise"] = pow((df_dis["Brow Raise_x"] - df_dis["Brow Raise_y"]),2)
df_dis["Lip Corner Depressor"] = pow((df_dis["Lip Corner Depressor_x"] - df_dis["Lip Corner Depressor_y"]),2)
df_dis["InnerBrowRaise"] = pow((df_dis["InnerBrowRaise_x"] - df_dis["InnerBrowRaise_y"]),2)
df_dis["EyeClosure"] = pow((df_dis["EyeClosure_x"] - df_dis["EyeClosure_y"]),2)
df_dis["NoseWrinkle"] = pow((df_dis["NoseWrinkle_x"] - df_dis["NoseWrinkle_y"]),2)
df_dis["UpperLipRaise"] = pow((df_dis["UpperLipRaise_x"] - df_dis["UpperLipRaise_y"]),2)
df_dis["LipSuck"] = pow((df_dis["LipSuck_x"] - df_dis["LipSuck_y"]),2)
df_dis["LipPress"] = pow((df_dis["LipPress_x"] - df_dis["LipPress_y"]),2)
df_dis["MouthOpen"] = pow((df_dis["MouthOpen_x"] - df_dis["MouthOpen_y"]),2)
df_dis["ChinRaise"] = pow((df_dis["ChinRaise_x"] - df_dis["ChinRaise_y"]),2)
df_dis["Smirk"] = pow((df_dis["Smirk_x"] - df_dis["Smirk_y"]),2)
df_dis["LipPucker"] = pow((df_dis["LipPucker_x"] - df_dis["LipPucker_y"]),2)
df_dis["Cheek Raise"] = pow((df_dis["Cheek Raise_x"] - df_dis["Cheek Raise_y"]),2)
df_dis["Dimpler"] = pow((df_dis["Dimpler_x"] - df_dis["Dimpler_y"]),2)
df_dis["Eye Widen"] = pow((df_dis["Eye Widen_x"] - df_dis["Eye Widen_y"]),2)
df_dis["Lid Tighten"] = pow((df_dis["Lid Tighten_x"] - df_dis["Lid Tighten_y"]),2)
df_dis["Lip Stretch"] = pow((df_dis["Lip Stretch_x"] - df_dis["Lip Stretch_y"]),2)
df_dis["Jaw Drop"] = pow((df_dis["Jaw Drop_x"] - df_dis["Jaw Drop_y"]),2)

In [103]:
AUs_list = ['ID', 'Brow Furrow', 'Brow Raise', 'Lip Corner Depressor', 'InnerBrowRaise', 'EyeClosure', 'NoseWrinkle', 'UpperLipRaise', 'LipSuck', 'LipPress', 'MouthOpen', 'ChinRaise', 'Smirk', 'LipPucker', 'Cheek Raise', 'Dimpler', 'Eye Widen', 'Lid Tighten', 'Lip Stretch', 'Jaw Drop']
df_dis = df_dis[AUs_list]

In [104]:
column_list = list(df_dis)
column_list.remove("ID")

In [105]:
# Sum((xi-yi)^2)
df_dis["AUs_sum"] = (df_dis[column_list].sum(axis=1))
# sqrt(Sum((xi-yi)^2))
df_dis["Sqrt_AUs_sum"] = np.sqrt(df_dis[column_list].sum(axis=1))

In [106]:
df_dis_output = df_dis[['ID', 'Sqrt_AUs_sum']]

In [107]:
df_dis_output = df_dis_output.rename(columns={"Sqrt_AUs_sum":"Euclidean_distance_NT_NT"})

In [108]:
df_dis_output.head()

Unnamed: 0,ID,Euclidean_distance_NT_NT
0,1010598340,15.601441
1,10120119131,33.458771
2,1015608034,23.786411
3,1016155105,48.831206
4,1018172959,23.749915


In [109]:
# Merge to Main
df_dis_output['ID'] = pd.to_numeric(df_dis_output['ID'])
df_main = pd.merge(df_main ,df_dis_output , how='outer', on='ID')

## 4. % Of frames with some emotion above the threshold of 75 (Ilan's measure)

In [110]:
df = df_Original.copy()

In [111]:
df.shape[0]

4090779

In [112]:
df = df[ ['ID'] + ['FrameIndex']+ ['Anger'] + ['Disgust'] + ['Fear'] + ['Sadness'] + ['Smile'] + ['Surprise']]

In [113]:
df.fillna(0, inplace=True)

In [114]:
df['row_sum'] = df['Anger'] + df['Disgust'] + df['Fear'] + df['Sadness'] + df['Smile'] +  df['Surprise']

In [115]:
df_No_zeros = df.drop(df[df.row_sum < 0.000001].index) # remove zeros
del df_No_zeros["row_sum"]

In [116]:
#deleted lines:
df.shape[0] - df_No_zeros.shape[0]

2980224

In [117]:
def Bars(df_input, bar_input):

    df_bars = df_input.copy()
    
    #create column names
    Anger_Name = "Anger" + str(bar_input)
    Disgust_Name = "Disgust" + str(bar_input)
    Fear_Name = "Fear" + str(bar_input)
    Sadness_Name = "Sadness" + str(bar_input)
    Smile_Name = "Smile" + str(bar_input)
    Surprise_Name = "Surprise" + str(bar_input)
    Sum_Name = "Sum" + str(bar_input)
    
    #check if row meets bar
    df_bars[Anger_Name] = np.where(df_bars.Anger > bar_input , 1, 0) 
    df_bars[Disgust_Name] = np.where(df_bars.Disgust > bar_input , 1, 0) 
    df_bars[Fear_Name] = np.where(df_bars.Fear > bar_input , 1, 0) 
    df_bars[Sadness_Name] = np.where(df_bars.Sadness > bar_input , 1, 0) 
    df_bars[Smile_Name] = np.where(df_bars.Smile > bar_input , 1, 0) 
    df_bars[Surprise_Name] = np.where(df_bars.Surprise > bar_input , 1, 0) 
    
    #Check if one of the emotions in the frame meets bar and separate them into new df
    df_bars[Sum_Name] = df_bars[Anger_Name] + df_bars[Disgust_Name] + df_bars[Fear_Name] + df_bars[Sadness_Name] + df_bars[Smile_Name] + df_bars[Surprise_Name]
    df_bars1 = df_bars.loc[lambda df_bars: df_bars[Sum_Name] > 0, :]
    df_Output = df_bars1.groupby('ID').agg([np.sum]).iloc[:,7:15] #summerize by ID number of frames that met the bar for ID
    
    return df_Output

In [118]:
#df_Bar25 = Bars(df_No_zeros , 25)
#df_Bar50 = Bars(df_No_zeros , 50)
df_Bar75 = Bars(df_No_zeros , 75)

In [119]:
df_Bar75.columns = df_Bar75.columns.get_level_values(0)

In [120]:
df_Bar75.head()

Unnamed: 0_level_0,Anger75,Disgust75,Fear75,Sadness75,Smile75,Surprise75,Sum75
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1010598340,0,23,0,3,4832,0,4858
10120119131,0,348,0,0,1262,432,2042
1015608034,0,12,8,0,3441,70,3531
1016155105,0,289,0,300,2323,848,3760
1018172959,3,40,0,2,2735,40,2820


# Combine:

In [121]:
df_Frames = df_No_zeros.groupby('ID').count()[['FrameIndex']]

In [122]:
df_Frames = df_Frames.rename(columns={"FrameIndex": "Valid_frames_num"})

In [123]:
df_Frames.head()

Unnamed: 0_level_0,Valid_frames_num
ID,Unnamed: 1_level_1
1010598340,15253
10120119131,35817
1015608034,56963
1016155105,29700
1018172959,11876


In [124]:
Output = pd.merge(df_Frames ,df_Bar75 , how='outer', on='ID')

In [125]:
Output.head()

Unnamed: 0_level_0,Valid_frames_num,Anger75,Disgust75,Fear75,Sadness75,Smile75,Surprise75,Sum75
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1010598340,15253,0,23,0,3,4832,0,4858
10120119131,35817,0,348,0,0,1262,432,2042
1015608034,56963,0,12,8,0,3441,70,3531
1016155105,29700,0,289,0,300,2323,848,3760
1018172959,11876,3,40,0,2,2735,40,2820


In [126]:
Output2 = Output.iloc[:,1:8].apply(lambda x: x/Output["Valid_frames_num"], axis=0)

In [127]:
Output2 = Output2.reset_index()

In [128]:
Output2.head()

Unnamed: 0,ID,Anger75,Disgust75,Fear75,Sadness75,Smile75,Surprise75,Sum75
0,1010598340,0.0,0.001508,0.0,0.000197,0.31679,0.0,0.318495
1,10120119131,0.0,0.009716,0.0,0.0,0.035235,0.012061,0.057012
2,1015608034,0.0,0.000211,0.00014,0.0,0.060408,0.001229,0.061988
3,1016155105,0.0,0.009731,0.0,0.010101,0.078215,0.028552,0.126599
4,1018172959,0.000253,0.003368,0.0,0.000168,0.230296,0.003368,0.237454


# Marge to main

In [129]:
Output2['ID'] = pd.to_numeric(Output2['ID'])

In [130]:
df_main = pd.merge(df_main ,Output2 , how='outer', on='ID')

In [131]:
df_main.head()

Unnamed: 0,ID,Group,Total_frames_num,Valid_frames_num,Frames_Percentage,Diff_Pitch,Diff_Roll,Diff_Yaw,Euclidean_distance_ASD_NT,Euclidean_distance_NT_NT,Anger75,Disgust75,Fear75,Sadness75,Smile75,Surprise75,Sum75
0,673249918,0,85736,16471,0.192113,1.412543,1.420279,1.432545,27.920872,27.920872,0.0,0.001336,0.0,0.000121,0.076134,0.047295,0.124886
1,1021408222,0,90365,19580,0.216677,1.54034,1.761364,1.485873,46.611333,46.611333,0.001685,0.000919,0.0,0.0,0.089632,0.002809,0.095046
2,1019778562,0,111063,21325,0.192008,1.482536,1.816111,1.490298,22.355514,22.355514,0.001032,0.002626,0.0,0.000188,0.110762,0.003189,0.117796
3,1021093597,0,82627,46000,0.556719,1.124373,1.295388,1.134773,21.763416,21.763416,0.000304,0.003826,0.0,0.0,0.127652,0.008065,0.139848
4,669779629,0,104995,24678,0.23504,1.338867,1.550114,1.51838,18.300407,18.300407,0.0,0.001499,0.000932,0.0,0.221169,0.001661,0.225261


#     

## 5. Average amount of Smiles above 75 & Average duration of Smiles above 75

In [132]:
df = pd.read_csv('Full_Fixed_File.csv') # read the file after interpolation

In [133]:
df

Unnamed: 0.1,Unnamed: 0,ID,MediaTime,FrameIndex,Anger,Disgust,Fear,Sadness,Smile,Surprise
0,0,10212252882,,0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,10212252882,,1,0.0,0.0,0.0,0.0,0.0,0.0
2,2,10212252882,,2,0.0,0.0,0.0,0.0,0.0,0.0
3,3,10212252882,,3,0.0,0.0,0.0,0.0,0.0,0.0
4,4,10212252882,,4,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
4090774,4090774,1015608034,,91654,0.0,0.0,0.0,0.0,0.0,0.0
4090775,4090775,1015608034,,91655,0.0,0.0,0.0,0.0,0.0,0.0
4090776,4090776,1015608034,,91656,0.0,0.0,0.0,0.0,0.0,0.0
4090777,4090777,1015608034,,91657,0.0,0.0,0.0,0.0,0.0,0.0


In [134]:
df.shape

(4090779, 10)

In [135]:
df['Smile_75'] = np.where(df.Smile > 75, 1, 0) # create bar=75 column for smile

In [136]:
# two help columns to couns the smile number and seg of smile
df['Smile_75_num'] = ""   # row[11]
df['Smile_75_seq'] = ""   # row[12]

In [137]:
#df_smile_time = 
df.groupby('ID').sum().iloc[:,9:10]/30 # amount of time(in sec) child smile during the video 

Unnamed: 0_level_0,Smile_75
ID,Unnamed: 1_level_1
664255171,103.5
664308115,28.033333
666238693,28.633333
666682207,513.566667
666778936,425.766667
666789877,179.2
666796231,29.1
666804703,90.833333
666830041,92.466667
666853903,53.733333


In [138]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4090779 entries, 0 to 4090778
Data columns (total 13 columns):
 #   Column        Dtype  
---  ------        -----  
 0   Unnamed: 0    int64  
 1   ID            int64  
 2   MediaTime     float64
 3   FrameIndex    int64  
 4   Anger         float64
 5   Disgust       float64
 6   Fear          float64
 7   Sadness       float64
 8   Smile         float64
 9   Surprise      float64
 10  Smile_75      int32  
 11  Smile_75_num  object 
 12  Smile_75_seq  object 
dtypes: float64(7), int32(1), int64(3), object(2)
memory usage: 390.1+ MB


In [139]:
array = df.to_numpy() # transforming the df to arrey

In [140]:
ID_num = array[0][1] # first ID is the first child in the array
frame_count = 0 # for the smiles
smile_num = 1 # starting with first smile

for row in array:
    if row[1] != ID_num: # new kid
        frame_count = 0
        smile_num = 1
        ID_num = row[1]
        
    if row[10] < 1: #not a smile
        if frame_count != 0:  #we were in the middle of a smile - need to reset
            frame_count = 0
            smile_num = smile_num +1
        row[11] = 0
        row[12] = 0
    
    else: #smile
        row[11] = smile_num
        frame_count = frame_count +1 # so the frame counter will start from 1
        row[12] = frame_count

In [141]:
# create new df with rellevant columns
df_smilez= pd.DataFrame({'ID': array[:, 1], 'Smile': array[:, 8], 'Smile_75': array[:, 10], 'Smile_num': array[:, 11], 'Smile_duration': array[:, 12]})

In [142]:
df_smilez.shape

(4090779, 5)

In [143]:
df_smilez.head()

Unnamed: 0,ID,Smile,Smile_75,Smile_num,Smile_duration
0,10212252882,0.0,0,0,0
1,10212252882,0.0,0,0,0
2,10212252882,0.0,0,0,0
3,10212252882,0.0,0,0,0
4,10212252882,0.0,0,0,0


### Smiles Count

In [144]:
df_count = df_smilez.groupby('ID').max()

In [145]:
df_count.reset_index(inplace=True)

In [146]:
df_count.head()

Unnamed: 0,ID,Smile,Smile_75,Smile_num,Smile_duration
0,664255171,99.96245,1.0,98.0,172.0
1,664308115,99.97299,1.0,35.0,93.0
2,666238693,100.0,1.0,32.0,214.0
3,666682207,100.0,1.0,229.0,682.0
4,666778936,100.0,1.0,284.0,703.0


In [147]:
df_count = df_count[['ID'] + ['Smile_num']]

In [148]:
df_count.head()

Unnamed: 0,ID,Smile_num
0,664255171,98.0
1,664308115,35.0
2,666238693,32.0
3,666682207,229.0
4,666778936,284.0


Comb with results

In [149]:
df_main['ID'] = pd.to_numeric(df_main['ID'])
df_count['ID'] = pd.to_numeric(df_count['ID'])

In [150]:
df_main = pd.merge(df_main ,df_count , how='outer', on='ID')

In [151]:
df_main.head()

Unnamed: 0,ID,Group,Total_frames_num,Valid_frames_num,Frames_Percentage,Diff_Pitch,Diff_Roll,Diff_Yaw,Euclidean_distance_ASD_NT,Euclidean_distance_NT_NT,Anger75,Disgust75,Fear75,Sadness75,Smile75,Surprise75,Sum75,Smile_num
0,673249918,0,85736,16471,0.192113,1.412543,1.420279,1.432545,27.920872,27.920872,0.0,0.001336,0.0,0.000121,0.076134,0.047295,0.124886,36.0
1,1021408222,0,90365,19580,0.216677,1.54034,1.761364,1.485873,46.611333,46.611333,0.001685,0.000919,0.0,0.0,0.089632,0.002809,0.095046,93.0
2,1019778562,0,111063,21325,0.192008,1.482536,1.816111,1.490298,22.355514,22.355514,0.001032,0.002626,0.0,0.000188,0.110762,0.003189,0.117796,90.0
3,1021093597,0,82627,46000,0.556719,1.124373,1.295388,1.134773,21.763416,21.763416,0.000304,0.003826,0.0,0.0,0.127652,0.008065,0.139848,145.0
4,669779629,0,104995,24678,0.23504,1.338867,1.550114,1.51838,18.300407,18.300407,0.0,0.001499,0.000932,0.0,0.221169,0.001661,0.225261,108.0


### Average duration of smiles

df_smilez.groupby('ID')

In [152]:
df_smilez["Smile_duration"] = pd.to_numeric(df_smilez["Smile_duration"])

In [153]:
df_smilez_du = df_smilez[ ['ID'] + ["Smile_duration"]]

In [154]:
df_smilez_du.shape

(4090779, 2)

In [155]:
df_smilez_du = df_smilez_du.drop(df_smilez_du[df_smilez_du.Smile_duration == 0].index)

In [156]:
df_smilez_du.shape

(150584, 2)

In [157]:
df_du = df_smilez_du.groupby('ID').mean()

In [158]:
df_du.reset_index(inplace=True)

In [159]:
df_du.head()

Unnamed: 0,ID,Smile_duration
0,664255171,36.251208
1,664308115,22.13912
2,666238693,41.49709
3,666682207,100.41267
4,666778936,74.397636


Comb with results

In [160]:
df_main['ID'] = pd.to_numeric(df_main['ID'])
df_du['ID'] = pd.to_numeric(df_du['ID'])

In [161]:
df_main = pd.merge(df_main ,df_du , how='outer', on='ID')

#    

# Flatness

In [162]:
df = df_Original.copy()

In [163]:
df.fillna(0, inplace=True)

In [164]:
df['row_sum'] = df['Anger'] + df['Disgust'] + df['Fear'] + df['Sadness'] + df['Smile'] +  df['Surprise']

In [165]:
df_No_zeros = df.drop(df[df.row_sum < 0.000001].index) # remove zeros
del df_No_zeros["row_sum"]

In [166]:
#deleted lines:
df.shape[0] - df_No_zeros.shape[0]

2980224

In [167]:
def Bars_below(df_input, bar_input):

    df_bars = df_input.copy()
    
    #create column names
    Anger_Name = "Anger" + str(bar_input)
    Disgust_Name = "Disgust" + str(bar_input)
    Fear_Name = "Fear" + str(bar_input)
    Sadness_Name = "Sadness" + str(bar_input)
    Smile_Name = "Smile" + str(bar_input)
    Surprise_Name = "Surprise" + str(bar_input)
    Sum_Name = "Sum"
    
    #check if row meets bar
    df_bars[Anger_Name] = np.where(df_bars.Anger < bar_input , 1, 0) 
    df_bars[Disgust_Name] = np.where(df_bars.Disgust < bar_input , 1, 0) 
    df_bars[Fear_Name] = np.where(df_bars.Fear < bar_input , 1, 0) 
    df_bars[Sadness_Name] = np.where(df_bars.Sadness < bar_input , 1, 0) 
    df_bars[Smile_Name] = np.where(df_bars.Smile < bar_input , 1, 0) 
    df_bars[Surprise_Name] = np.where(df_bars.Surprise < bar_input , 1, 0) 
    
    #Check if one of the emotions in the frame meets bar and separate them into new df
    df_bars[Sum_Name] = df_bars[Anger_Name] + df_bars[Disgust_Name] + df_bars[Fear_Name] + df_bars[Sadness_Name] + df_bars[Smile_Name] + df_bars[Surprise_Name]
    df_bars1 = df_bars.loc[lambda df_bars: df_bars[Sum_Name] > 0, :]
    #df_Output = df_bars1.groupby('ID').agg([np.sum]).iloc[:,7:15] #summerize by ID number of frames that met the bar for ID
    
    df_bars1 = df_bars1.drop(df_bars1[df_bars1.Sum != 6].index)
    
    del df_bars1[Anger_Name]
    del df_bars1[Disgust_Name]
    del df_bars1[Fear_Name]
    del df_bars1[Sadness_Name]
    del df_bars1[Smile_Name]
    del df_bars1[Surprise_Name]
    del df_bars1["Sum"]
    
    return df_bars1

In [168]:
# Set bar of flatness here
bar = 10
df_Bar_Flatt = Bars_below(df_No_zeros , bar)

In [169]:
df_Output_flat_Count = df_Bar_Flatt.groupby('ID').count()

In [170]:
df_Output_flat_Count.reset_index(inplace=True)

In [171]:
df_Output_flat_Count = df_Output_flat_Count[ ["ID"]+ ["FrameIndex"]]

In [172]:
del df_Bar_Flatt["FrameIndex"]

In [173]:
df_Output_flat = df_Bar_Flatt.groupby('ID').mean()

In [174]:
df_Output_flat.columns = df_Output_flat.columns.get_level_values(0)

In [175]:
df_Output_flat.reset_index(inplace=True)

In [176]:
df_main2 = pd.read_excel(diagnosis)

In [177]:
df_main2['ID'] = pd.to_numeric(df_main2['ID'])
df_Output_flat['ID'] = pd.to_numeric(df_Output_flat['ID'])
df_Output_flat_Count['ID'] = pd.to_numeric(df_Output_flat_Count['ID'])

In [178]:
df_Output_flat = pd.merge(df_Output_flat ,df_main2 , how='outer', on='ID')

In [179]:
df_Output_flat = pd.merge(df_Output_flat ,df_Output_flat_Count , how='outer', on='ID')

In [180]:
df_Output_flat = df_Output_flat[['ID'] +['Diagnosis'] + ['Group'] +['FrameIndex'] + ['Smile'] + ['Anger'] + ['Sadness'] + ['Disgust'] + ['Surprise'] + ['Fear'] + ['Brow Furrow'] + ['Brow Raise'] + ['Lip Corner Depressor'] + ['InnerBrowRaise'] + ['EyeClosure'] + ['NoseWrinkle'] + ['UpperLipRaise'] + ['LipSuck'] + ['LipPress'] + ['MouthOpen'] + ['ChinRaise'] + ['Smirk'] + ['LipPucker'] + ['Cheek Raise'] + ['Dimpler'] + ['Eye Widen'] + ['Lid Tighten'] + ['Lip Stretch'] + ['Jaw Drop'] + ['Pitch'] + ['Yaw'] + ['Roll']]

In [181]:
df_Output_flat = df_Output_flat.rename(columns={"FrameIndex": "Frames under bar"})

# Extra columns

In [182]:
df_main["Valid_frames_min"] = df_main["Valid_frames_num"]/(30*60)

In [183]:
df_main["Smile_frequency"] = df_main["Smile_num"]/df_main["Valid_frames_min"]

In [184]:
df_main.head()

Unnamed: 0,ID,Group,Total_frames_num,Valid_frames_num,Frames_Percentage,Diff_Pitch,Diff_Roll,Diff_Yaw,Euclidean_distance_ASD_NT,Euclidean_distance_NT_NT,...,Disgust75,Fear75,Sadness75,Smile75,Surprise75,Sum75,Smile_num,Smile_duration,Valid_frames_min,Smile_frequency
0,673249918,0,85736,16471,0.192113,1.412543,1.420279,1.432545,27.920872,27.920872,...,0.001336,0.0,0.000121,0.076134,0.047295,0.124886,36.0,44.715596,9.150556,3.934187
1,1021408222,0,90365,19580,0.216677,1.54034,1.761364,1.485873,46.611333,46.611333,...,0.000919,0.0,0.0,0.089632,0.002809,0.095046,93.0,22.904528,10.877778,8.54954
2,1019778562,0,111063,21325,0.192008,1.482536,1.816111,1.490298,22.355514,22.355514,...,0.002626,0.0,0.000188,0.110762,0.003189,0.117796,90.0,28.934487,11.847222,7.596717
3,1021093597,0,82627,46000,0.556719,1.124373,1.295388,1.134773,21.763416,21.763416,...,0.003826,0.0,0.0,0.127652,0.008065,0.139848,145.0,41.36036,25.555556,5.673913
4,669779629,0,104995,24678,0.23504,1.338867,1.550114,1.51838,18.300407,18.300407,...,0.001499,0.000932,0.0,0.221169,0.001661,0.225261,108.0,67.052708,13.71,7.877462


# Export main file

In [185]:
with pd.ExcelWriter(Output_directory + "//" + 'Main.xlsx') as writer:  
        df_main.to_excel(writer, sheet_name='Main', index=False)
        df_Bar75.to_excel(writer, sheet_name='Bars', index=True)
        df_Output_flat.to_excel(writer, sheet_name='Flatness', index=False)