In [None]:
from tsfresh.examples import load_driftbif
from tsfresh.feature_extraction import ComprehensiveFCParameters, extract_features
from tsfresh.feature_extraction.settings import get_combiner_functions, get_simple_functions
import pandas as pd
import pprint
import timeit
from tqdm import tqdm
import matplotlib.pylab as plt
import seaborn as sns

In [None]:
# load data
X, _ = load_driftbif(10, 1000)
X.drop("dimension", axis=1, inplace=True)
X.head()

In [None]:
# get all the parameters and respective functions
settings = ComprehensiveFCParameters()

In [None]:
# compare performance of tsfresh for simple feature calculator that are calculated individually
res  = pd.DataFrame()
n_ts = 20
l_ts = 1000
n_ti = 3

for f, param in tqdm(settings.items()):
    res.loc[f, "feature"] = f
    res.loc[f, "n_samp"] = n_ts
    res.loc[f, "length"] = l_ts
    
    fc_dict = {f:param}
    
    t = timeit.timeit(lambda : extract_features(timeseries_container=X, 
                                                column_id="id",
                                                n_jobs=0, 
                                                default_fc_parameters=fc_dict, 
                                                disable_progressbar=True), 
                      number=n_ti)
    n_fs = len(param) if param is not None else 1
    res.loc[f, "n_fs"] = n_fs
    res.loc[f, "t_abs"] = t * 1.0/n_fs
    res.loc[f, "t_1ts"] = t*1.0/(n_ts*n_fs)

In [None]:
res

In [None]:
res["feature"] = res.feature.astype(str)
res = res.sort_values(by="feature")

In [None]:
plt.figure(figsize=(6, 20))
sns.barplot(y="feature", x="t_abs", data=res)
plt.title("Runtime of 1 apply features for 1 time series of length 1000")
plt.show()

In [None]:
plt.figure(figsize=(6,20))
sns.barplot(y="feature", x="t_1ts", data=res)
plt.title("Runtime of aggregate features for 1 time series of length 1000")
plt.show()

sample_entropy and approximate_entropy make up for most of the runtime, we create the following plots without them

In [None]:
plt.figure(figsize=(6,20))
sns.barplot(y="feature", x="t_1ts", data=res.drop(["sample_entropy", "approximate_entropy"]))
plt.title("Runtime of aggregate features for 1 time series of length 1000")
plt.show()

In [None]:
# we calculate how much many features cause 90% of the runtime 
r = res.t_1ts.copy()
r.sort_values(ascending=True, inplace=True)
r

In [None]:
# in percentage
r / r.sum()*100