forked from dimstudio/SharpFlow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
feature_extraction.py
53 lines (46 loc) · 2.21 KB
/
feature_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
import pandas as pd
from tsfresh import *
import pickle
import os
def extract_df_with_features(tensor, annotations, attributes, target_classes, folder):
folder_features = f"{folder}/features/"
# string_attributes = '-'.join(annotations['recordingID'].unique())
file_features = f"{folder}/features/extracted_features_N" + str(len(annotations)) + ".pkl"
if os.path.exists(file_features):
with open(file_features, "rb") as f:
extracted_features = pickle.load(f)
else:
m, n, r = tensor.shape
out_arr = np.column_stack((np.repeat(np.arange(m), n), tensor.reshape(m * n, -1)))
attributes = ['interval'] + attributes
out_df = pd.DataFrame(out_arr, columns=attributes)
out_df['time'] = out_df.groupby(['interval']).cumcount()
ef_df = extract_features(out_df, column_id="interval", column_sort="time", column_kind=None,
column_value=None)
ef_df[target_classes] = annotations[target_classes]
ef_df.index = ef_df.index.astype('int64')
ef_df = ef_df.replace([np.inf, -np.inf], np.nan).dropna(axis=1)
# drop the features where all NaNs
extracted_features = ef_df.dropna(axis=1, how='all')
# fill the NaNs in the remaining features
extracted_features = extracted_features.fillna(method='bfill').fillna(method='ffill')
extracted_features.loc[:, 'recordingID'] = annotations['recordingID']
os.makedirs(folder_features, exist_ok=True)
with open(file_features, "wb") as f:
pickle.dump(extracted_features, f)
return extracted_features
def extract_basic_features(tensor, annotations, attributes):
# dict_functions = {'_mean' : np.mean}
dict_functions = {'_gradient': gradient_mean}
df = pd.DataFrame()
for f in dict_functions:
attrs = list(map(lambda x: x + f, attributes))
values = dict_functions[f](tensor, axis=1)
tmp = pd.DataFrame(values, columns=attrs)
df = pd.concat([df, tmp], axis=1)
df.loc[:, 'recordingID'] = annotations['recordingID']
df.loc[:, 'mistake'] = annotations['mistake']
return df
def gradient_mean(x, axis):
return np.gradient(x, axis=axis).mean(axis=axis)