# Processing action units + TS Fresh

In [1]:
import pandas as pd
import pickle

# Loading the file 

In [50]:
filepath = "/Users/dionnespaltman/Desktop/V4/action_units/clean_df.pkl"

# Load the dataset
action_units = pd.read_pickle(filepath)

In [51]:
nan_counts_au_df = action_units.isna().sum()
print(nan_counts_au_df)

Frame              0
Face_id            0
Confidence         0
Success            0
AU01_r        243394
AU02_r        243394
AU04_r        243394
AU05_r        243394
AU06_r        243394
AU07_r        243394
AU09_r        243394
AU10_r        243394
AU12_r        243394
AU14_r        243394
AU15_r        243394
AU17_r        243394
AU20_r        243394
AU23_r        243394
AU25_r        243394
AU26_r        243394
AU45_r        243394
AU01_c        243394
AU02_c        243394
AU04_c        243394
AU05_c        243394
AU06_c        243394
AU07_c        243394
AU09_c        243394
AU10_c        243394
AU12_c        243394
AU14_c        243394
AU15_c        243394
AU17_c        243394
AU20_c        243394
AU23_c        243394
AU25_c        243394
AU26_c        243394
AU28_c        243394
AU45_c        243394
ID                 0
Stage              0
dtype: int64


In [95]:
print(action_units.columns)

Index(['Frame', 'Face_id', 'Confidence', 'Success', 'AU01_r', 'AU02_r',
       'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r',
       'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r',
       'AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c', 'AU07_c',
       'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c', 'AU20_c',
       'AU23_c', 'AU25_c', 'AU26_c', 'AU28_c', 'AU45_c', 'ID', 'Stage'],
      dtype='object')


In [96]:
display(action_units.head(5))

Unnamed: 0,Frame,Face_id,Confidence,Success,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,...,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c,ID,Stage
103,104,0.0,0.88,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
104,105,0.0,0.98,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
111,112,0.0,0.98,1.0,0.31,0.59,0.0,0.0,0.31,0.31,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
114,115,0.0,0.98,1.0,0.0,0.12,0.0,0.0,0.32,0.06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
153,154,0.0,0.98,1.0,1.26,1.67,0.0,0.0,0.39,0.26,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"


When I made this notebook, I still thought I would get more data. However, we have to work with only the data I already received. This is data from 184 unique participants (missing IDs from 147 to 287). 

In [54]:
print(len(action_units['ID'].unique()))

184


Here I check information from what stages is available. At first, I wanted to use the recordings from the waiting room to add them as a feature. However, this is not possible, since there is so little data from these timepoints. So I am only going to use data from stage 4, 5 and 6 which is when the participants are in the donation chair. 

In [97]:
# Count occurrences of each value in the "Stage" column
stage_counts = action_units['Stage'].value_counts()

print(stage_counts)

[4, 5, 6]       2118735
[3, 4, 5, 6]     285556
[7]              269554
[1]              163222
[3]              134364
[2]               78159
[4]               68762
[4, 5]            40338
[4, 6]            15713
Name: Stage, dtype: int64


# Missing values 

There are quite some missing values. This is possible when participants look away from the camera. Then naturally OpenFace wasn't able to get any information from their faces. 

In [56]:
nan_counts_au_df = action_units.isna().sum()
print(nan_counts_au_df)

Frame              0
Face_id            0
Confidence         0
Success            0
AU01_r        243394
AU02_r        243394
AU04_r        243394
AU05_r        243394
AU06_r        243394
AU07_r        243394
AU09_r        243394
AU10_r        243394
AU12_r        243394
AU14_r        243394
AU15_r        243394
AU17_r        243394
AU20_r        243394
AU23_r        243394
AU25_r        243394
AU26_r        243394
AU45_r        243394
AU01_c        243394
AU02_c        243394
AU04_c        243394
AU05_c        243394
AU06_c        243394
AU07_c        243394
AU09_c        243394
AU10_c        243394
AU12_c        243394
AU14_c        243394
AU15_c        243394
AU17_c        243394
AU20_c        243394
AU23_c        243394
AU25_c        243394
AU26_c        243394
AU28_c        243394
AU45_c        243394
ID                 0
Stage              0
dtype: int64


# Dealing with missing values - MICE

Because we're missing about 7 percent of the action units, it would not be smart to simply delete these rows. We will use MICE to deal with the missing values. 
Link: https://www.machinelearningplus.com/machine-learning/mice-imputation/?utm_content=cmp-true 

In [57]:
# need to enable iterative imputer explicitly since its still experimental
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [58]:
# Define imputer
imputer = IterativeImputer(random_state=100, max_iter=10)

In [59]:
print(list(action_units.columns))

['Frame', 'Face_id', 'Confidence', 'Success', 'AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', 'AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c', 'AU07_c', 'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c', 'AU20_c', 'AU23_c', 'AU25_c', 'AU26_c', 'AU28_c', 'AU45_c', 'ID', 'Stage']


In [60]:
# Use Numeric Features
action_units_train = action_units.loc[:, ['AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', 'AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c', 'AU07_c', 'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c', 'AU20_c', 'AU23_c', 'AU25_c', 'AU26_c', 'AU28_c', 'AU45_c']]
action_units_train.head(5)

Unnamed: 0,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,AU09_r,AU10_r,AU12_r,AU14_r,...,AU12_c,AU14_c,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c
103,0.0,0.0,0.0,0.0,1.34,0.22,0.57,1.56,1.52,0.54,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
104,0.0,0.0,0.0,0.0,1.34,0.22,0.57,1.56,1.52,0.54,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
111,0.31,0.59,0.0,0.0,0.31,0.31,0.23,0.21,0.62,0.12,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
114,0.0,0.12,0.0,0.0,0.32,0.06,0.1,0.6,0.5,0.29,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
153,1.26,1.67,0.0,0.0,0.39,0.26,0.0,1.22,0.64,0.18,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0


Running everything up until the imputer was very fast. Running the imputer took around 6 minutes. 

In [61]:
# fit on the dataset
imputer.fit(action_units_train)

In [99]:
action_units_imputed = imputer.transform(action_units_train)
# action_units_imputed[:10]

In [63]:
# Replace with imputed values
action_units.loc[:, ['AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r', 
                     'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', 'AU45_r', 'AU01_c', 
                     'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c', 'AU07_c', 'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 
                     'AU15_c', 'AU17_c', 'AU20_c', 'AU23_c', 'AU25_c', 'AU26_c', 'AU28_c', 'AU45_c']] = action_units_imputed
action_units.head(5)

Unnamed: 0,Frame,Face_id,Confidence,Success,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,...,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c,ID,Stage
103,104,0.0,0.88,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
104,105,0.0,0.98,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
111,112,0.0,0.98,1.0,0.31,0.59,0.0,0.0,0.31,0.31,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
114,115,0.0,0.98,1.0,0.0,0.12,0.0,0.0,0.32,0.06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
153,154,0.0,0.98,1.0,1.26,1.67,0.0,0.0,0.39,0.26,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"


In [100]:
num_unique_ids = action_units['ID'].nunique()
print(num_unique_ids)

184


Then we will double check if everything went correctly. We should not have any missing values anymore. This is important to be able to implement TS Fresh. 

In [64]:
nan_counts_au_df = action_units.isna().sum()
print(nan_counts_au_df)

Frame         0
Face_id       0
Confidence    0
Success       0
AU01_r        0
AU02_r        0
AU04_r        0
AU05_r        0
AU06_r        0
AU07_r        0
AU09_r        0
AU10_r        0
AU12_r        0
AU14_r        0
AU15_r        0
AU17_r        0
AU20_r        0
AU23_r        0
AU25_r        0
AU26_r        0
AU45_r        0
AU01_c        0
AU02_c        0
AU04_c        0
AU05_c        0
AU06_c        0
AU07_c        0
AU09_c        0
AU10_c        0
AU12_c        0
AU14_c        0
AU15_c        0
AU17_c        0
AU20_c        0
AU23_c        0
AU25_c        0
AU26_c        0
AU28_c        0
AU45_c        0
ID            0
Stage         0
dtype: int64


# Saving the action units file with the imputed values 

In [65]:
action_units.to_pickle("/Users/dionnespaltman/Desktop/V4/action_units/imputed_df.pkl")

# Loading action units file with the imputed values 

In [66]:
imputed_df = pd.read_pickle("/Users/dionnespaltman/Desktop/V4/action_units/imputed_df.pkl")

In [102]:
display(imputed_df.head(5))

Unnamed: 0,Frame,Face_id,Confidence,Success,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,...,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c,ID,Stage
103,104,0.0,0.88,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
104,105,0.0,0.98,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
111,112,0.0,0.98,1.0,0.31,0.59,0.0,0.0,0.31,0.31,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
114,115,0.0,0.98,1.0,0.0,0.12,0.0,0.0,0.32,0.06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
153,154,0.0,0.98,1.0,1.26,1.67,0.0,0.0,0.39,0.26,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"


# Creating file with just Stage = [4, 5, 6]

In [68]:
# Count occurrences of Stage values in action_units DataFrame
stage_counts = imputed_df['Stage'].value_counts()
print(stage_counts)

[4, 5, 6]       2118735
[3, 4, 5, 6]     285556
[7]              269554
[1]              163222
[3]              134364
[2]               78159
[4]               68762
[4, 5]            40338
[4, 6]            15713
Name: Stage, dtype: int64


In [69]:
imputed_df_456 = imputed_df[imputed_df['Stage'].apply(lambda x: set(x) == set([4, 5, 6]))]
display(imputed_df_456.head(5))

Unnamed: 0,Frame,Face_id,Confidence,Success,AU01_r,AU02_r,AU04_r,AU05_r,AU06_r,AU07_r,...,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c,ID,Stage
103,104,0.0,0.88,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
104,105,0.0,0.98,1.0,0.0,0.0,0.0,0.0,1.34,0.22,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
111,112,0.0,0.98,1.0,0.31,0.59,0.0,0.0,0.31,0.31,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
114,115,0.0,0.98,1.0,0.0,0.12,0.0,0.0,0.32,0.06,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,80,"[4, 5, 6]"
153,154,0.0,0.98,1.0,1.26,1.67,0.0,0.0,0.39,0.26,...,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,80,"[4, 5, 6]"


Okay, so we had data from 184 participants. But we only have information from stages 4, 5 and 6 from 104 IDs. 

In [104]:
num_unique_ids = imputed_df_456['ID'].nunique()
print(num_unique_ids)

104


In [105]:
# Count occurrences of stage values in the dataframe
stage_counts = imputed_df_456['Stage'].value_counts()
print(stage_counts)

[4, 5, 6]    2118735
Name: Stage, dtype: int64


# TS Fresh requirements

Now we'll start to process the action units data using TS Fresh. First we import TS Fresh. 

In [106]:
# Necessary imports
!pip install tsfresh

import pandas as pd
from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters
from sklearn.preprocessing import StandardScaler
from tsfresh.feature_extraction import ComprehensiveFCParameters



# Appling TS Fresh on action units 

In [72]:
print(list(imputed_df_456.columns))

['Frame', 'Face_id', 'Confidence', 'Success', 'AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', 'AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c', 'AU07_c', 'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c', 'AU20_c', 'AU23_c', 'AU25_c', 'AU26_c', 'AU28_c', 'AU45_c', 'ID', 'Stage']


In [74]:
au_long_format = pd.melt(imputed_df_456, id_vars=['ID', 'Frame'],
                                   var_name='Measurement', value_name='Value',
                                   value_vars=['AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 
                                               'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 
                                               'AU25_r', 'AU26_r', 'AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 
                                               'AU06_c', 'AU07_c', 'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 
                                               'AU17_c', 'AU20_c', 'AU23_c', 'AU25_c', 'AU26_c', 'AU28_c', 'AU45_c'])

In [75]:
display(au_long_format)

Unnamed: 0,ID,Frame,Measurement,Value
0,80,104,AU01_r,0.00
1,80,105,AU01_r,0.00
2,80,112,AU01_r,0.31
3,80,115,AU01_r,0.00
4,80,154,AU01_r,1.26
...,...,...,...,...
74155720,27,21069.0,AU45_c,1.00
74155721,27,21070.0,AU45_c,1.00
74155722,27,21071.0,AU45_c,1.00
74155723,27,21072.0,AU45_c,1.00


In [91]:
num_unique_ids = au_long_format['ID'].nunique()
print(num_unique_ids)

104


In [76]:
nan_counts = au_long_format.isna().sum()
print(nan_counts)

ID             0
Frame          0
Measurement    0
Value          0
dtype: int64


In [107]:
# Convert 'ID' and 'Frame' columns to integer data type
au_long_format['ID'] = au_long_format['ID'].astype(int)
au_long_format['Frame'] = au_long_format['Frame'].astype(int)

# # Convert 'Measurement' column to string data type
au_long_format['Measurement'] = au_long_format['Measurement'].astype(str)

In [108]:
column_types = au_long_format.dtypes
print(column_types)

ID               int64
Frame            int64
Measurement     object
Value          float64
dtype: object


We will use the following six intensity level characteristics: sum, variance, standard deviation, maximum, mean, and mean-root square values. 
More information about the settings in TS Fresh can be found here: https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.sum_values 

In [80]:
# Specificeer de gewenste kenmerken
settings = {
    'sum_values': {},
    'variance': {},
    'standard_deviation': {},
    'maximum': {},
    'minimum': {},
    'median': {},
    'mean': {},
    'mean_abs_change': {}, 
    'agg_linear_trend': [{'attr': 'slope', 'f_agg': 'max', 'chunk_len': 5}, {'attr': 'slope', 'f_agg': 'min', 'chunk_len': 5}] 
}

# Extract features met aangepaste instellingen
extracted_features_au = extract_features(
    au_long_format,
    column_id='ID',  # Identifies the time series
    column_sort='Frame',  # Orders the time points
    column_kind='Measurement',  # Distinguishes between different variables if necessary
    column_value='Value',  # The actual measurements
    default_fc_parameters=settings
)

Feature Extraction: 100%|██████████| 20/20 [00:53<00:00,  2.68s/it]


In [111]:
display(extracted_features_au.head(5))

num_unique_ids = extracted_features_au.index.nunique()
print(num_unique_ids)

Unnamed: 0,AU10_c__sum_values,AU10_c__variance,AU10_c__standard_deviation,AU10_c__maximum,AU10_c__minimum,AU10_c__median,AU10_c__mean,AU10_c__mean_abs_change,"AU10_c__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""max""","AU10_c__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""min""",...,AU09_r__sum_values,AU09_r__variance,AU09_r__standard_deviation,AU09_r__maximum,AU09_r__minimum,AU09_r__median,AU09_r__mean,AU09_r__mean_abs_change,"AU09_r__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""max""","AU09_r__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""min"""
23,7873.0,0.24875,0.498748,1.0,0.0,1.0,0.53536,0.024685,-1.7e-05,-1.3e-05,...,1772.86,0.109496,0.330902,4.98,0.0,0.0,0.120554,0.019213,-3.2e-05,-2.2e-05
24,16831.0,0.235917,0.485713,1.0,0.0,1.0,0.618673,0.010881,5.3e-05,5.3e-05,...,3197.59,0.063365,0.251725,2.59,0.0,0.0,0.117537,0.024182,-2.9e-05,-1.9e-05
25,3506.0,0.168213,0.410138,1.0,0.0,0.0,0.214015,0.01111,-4.3e-05,-2.7e-05,...,2275.65,0.153114,0.391297,4.1,0.0,0.0,0.138912,0.024858,4.8e-05,3.3e-05
26,867.0,0.046539,0.215729,1.0,0.0,0.0,0.048933,0.003612,-3.1e-05,-2.7e-05,...,1755.79,0.052783,0.229745,3.25,0.0,0.0,0.099096,0.016996,-1.3e-05,-1.2e-05
27,200.0,0.009414,0.097026,1.0,0.0,0.0,0.009504,0.002281,-5e-06,-2e-06,...,20877.08,0.779602,0.882951,3.45,-4.43,1.05,0.992115,0.150508,8.9e-05,9.5e-05


104


# Saving the extracted features to pkl

In [82]:
extracted_features_au.to_pickle('/Users/dionnespaltman/Desktop/V4/extracted_features.pkl')

# Understanding extracted features 

In [84]:
display(extracted_features_au)

Unnamed: 0,AU10_c__sum_values,AU10_c__variance,AU10_c__standard_deviation,AU10_c__maximum,AU10_c__minimum,AU10_c__median,AU10_c__mean,AU10_c__mean_abs_change,"AU10_c__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""max""","AU10_c__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""min""",...,AU09_r__sum_values,AU09_r__variance,AU09_r__standard_deviation,AU09_r__maximum,AU09_r__minimum,AU09_r__median,AU09_r__mean,AU09_r__mean_abs_change,"AU09_r__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""max""","AU09_r__agg_linear_trend__attr_""slope""__chunk_len_5__f_agg_""min"""
23,7873.0,0.248750,0.498748,1.0,0.0,1.0,0.535360,0.024685,-0.000017,-0.000013,...,1772.86,0.109496,0.330902,4.98,0.00,0.00,0.120554,0.019213,-0.000032,-0.000022
24,16831.0,0.235917,0.485713,1.0,0.0,1.0,0.618673,0.010881,0.000053,0.000053,...,3197.59,0.063365,0.251725,2.59,0.00,0.00,0.117537,0.024182,-0.000029,-0.000019
25,3506.0,0.168213,0.410138,1.0,0.0,0.0,0.214015,0.011110,-0.000043,-0.000027,...,2275.65,0.153114,0.391297,4.10,0.00,0.00,0.138912,0.024858,0.000048,0.000033
26,867.0,0.046539,0.215729,1.0,0.0,0.0,0.048933,0.003612,-0.000031,-0.000027,...,1755.79,0.052783,0.229745,3.25,0.00,0.00,0.099096,0.016996,-0.000013,-0.000012
27,200.0,0.009414,0.097026,1.0,0.0,0.0,0.009504,0.002281,-0.000005,-0.000002,...,20877.08,0.779602,0.882951,3.45,-4.43,1.05,0.992115,0.150508,0.000089,0.000095
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140,4704.0,0.148649,0.385550,1.0,0.0,0.0,0.181643,0.010967,0.000057,0.000048,...,2324.06,0.044894,0.211882,3.71,0.00,0.00,0.089742,0.014612,-0.000008,-0.000007
142,9082.0,0.238788,0.488660,1.0,0.0,0.0,0.394116,0.020049,0.000013,0.000019,...,3068.82,0.144935,0.380703,4.70,0.00,0.00,0.133172,0.022507,0.000057,0.000042
144,252.0,0.013292,0.115292,1.0,0.0,0.0,0.013474,0.001925,0.000002,0.000001,...,2307.91,0.123643,0.351629,5.00,0.00,0.00,0.123398,0.018496,-0.000025,-0.000019
145,10297.0,0.220715,0.469804,1.0,0.0,0.0,0.328873,0.011115,0.000052,0.000050,...,3768.23,0.065670,0.256262,3.08,0.00,0.00,0.120352,0.016477,0.000025,0.000020


In [86]:
indexes = extracted_features_au.index.tolist()
print(len(indexes))

104
