# Time Course Data Analysis with TSFresh
This notebook applies TSFresh to generate new features from time course data from MBR data.

tsfresh==0.17
statsmodels==0.12
numpy==1.21.5
pandas==1.3.5

In [1]:
!python -V

Python 3.7.0


In [None]:
# Extract features using TSFresh
import numpy as np
import pandas as pd
from tsfresh import extract_features

In [None]:
# Load the time course data
df = pd.read_csv("../data/10types_waveform_mean_spread.csv").drop(["Proj","eye"],axis=1)

In [None]:
# Extract features using TSFresh
tsfresh_features = extract_features(df, column_id='group.cmp', column_sort="t")

## na and inf processing by `impute`

In [None]:
from tsfresh.utilities.dataframe_functions import impute
tsf_features_impute = impute(tsfresh_features)

## Remove or replace unwanted characters from feature names

In [None]:
tsf_features_impute.columns = (
    tsf_features_impute.columns
    .str.replace('"', '', regex=False)
    .str.replace('(', '', regex=False)
    .str.replace(')', '', regex=False)
    .str.replace('[', '', regex=False)
    .str.replace(']', '', regex=False)
    .str.replace('{', '', regex=False)
    .str.replace('}', '', regex=False)
    .str.replace(', ', '-', regex=False)
    .str.replace(',', '-', regex=False)
    .str.replace(':', '-', regex=False)
)


In [None]:
tsf_features_impute.index.name = "group.cmp"
tsf_features_impute.to_csv("../processed_data/tsfreshfeatures.csv")

## merge TBFs with PDFs

In [None]:
PDFs=pd.read_csv("../../calculation_of_predefined_features/processed_data/4types_waveform_PDFs.csv", index_col=0)

In [None]:
PDFs.shape

In [None]:
all_features = pd.merge(PDFs, tsf_features_impute, left_index=True, right_index=True)

In [None]:
all_features.to_csv("../processed_data/allfeatures.csv")