In [1]:
import numpy as np
import pandas as pd
from tsfresh.feature_extraction.extraction import extract_features
from tsfresh.feature_extraction.settings import PickableSettings

In [2]:
# Load the data and concatenate them
data_113 = pd.DataFrame(np.loadtxt('../dataset/20151026_113_filtered'))
data_114 = pd.DataFrame(np.loadtxt('../dataset/20151026_114_filtered'))
data = pd.concat((data_113, data_114))

# Insert id and time column for tsfresh
data.insert(0, 'id', np.arange(len(data)) // 150)
data.insert(1, 'time', np.arange(len(data)) % 150)

# Display the data frame
data

Unnamed: 0,id,time,0,1,2,3
0,0,0,0.000018,-0.000029,0.000022,-0.000018
1,0,1,0.000093,-0.000140,0.000099,-0.000091
2,0,2,0.000241,-0.000349,0.000233,-0.000242
3,0,3,0.000454,-0.000644,0.000404,-0.000475
4,0,4,0.000735,-0.001011,0.000601,-0.000793
...,...,...,...,...,...,...
44995,599,145,-0.002147,-0.001350,-0.010993,-0.011334
44996,599,146,-0.002148,-0.001354,-0.010979,-0.011327
44997,599,147,-0.002152,-0.001365,-0.011003,-0.011351
44998,599,148,-0.002160,-0.001381,-0.011057,-0.011403


In [3]:
labels = np.zeros(600, dtype=np.int8)
for i in range(600):
    segment = data[i*150:(i+1)*150]
    has_peek = segment[0].max() > 0.01 or segment[1].max() > 0.01
    has_valley = segment[0].min() < -0.01 or segment[1].min() < -0.01
    if has_peek and has_valley:
        # The section contains a full sharp wave
        labels[i] = 2
    elif has_peek or has_valley:
        # The section contains a partial sharp wave
        labels[i] = 1
    else:
        # The section contains no sharp wave
        labels[i] = 0

labels

array([2, 2, 0, 2, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 2, 2, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 2, 2, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0,
       2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 0, 0, 0, 0,
       0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,

In [4]:
settings = PickableSettings({
    "fft_aggregated": [{"aggtype": "variance"}],
    "variation_coefficient": None,
})

features = extract_features(data, column_id='id', column_sort='time',
                            default_fc_parameters=settings)

features = features[[
    '0__fft_aggregated__aggtype_"variance"',
    '1__fft_aggregated__aggtype_"variance"',
    '2__fft_aggregated__aggtype_"variance"',
    '3__fft_aggregated__aggtype_"variance"',
    '0__variation_coefficient',
    '1__variation_coefficient',
]]

features.insert(6, 'tag', labels)

features

Feature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]

Feature Extraction: 100%|██████████| 40/40 [00:02<00:00, 13.79it/s]


Unnamed: 0,"0__fft_aggregated__aggtype_""variance""","1__fft_aggregated__aggtype_""variance""","2__fft_aggregated__aggtype_""variance""","3__fft_aggregated__aggtype_""variance""",0__variation_coefficient,1__variation_coefficient,tag
0,258.540334,225.830256,268.197584,157.348701,0.367043,-0.352182,2
1,248.334049,207.411161,208.322011,16.387826,0.251688,-0.198657,2
2,318.449336,146.853375,155.680580,266.675919,0.487016,-0.124477,0
3,406.152553,395.207476,384.945073,350.962024,23.239893,-3.298146,2
4,284.099995,270.710357,124.164417,56.658498,1.167463,0.918186,2
...,...,...,...,...,...,...,...
595,144.485130,188.718498,362.784131,373.213058,-0.119729,-0.229562,1
596,163.770544,258.427574,286.320711,289.334815,-0.153575,-0.245348,1
597,207.974158,304.725255,150.804803,158.369773,-0.207339,-0.672677,1
598,253.368547,73.079415,362.213614,359.876559,-0.298976,-0.900108,1


In [5]:
features.iloc[:300].to_csv('../dataset/20151026_113_labeled.txt',
                           sep=' ', header=False, index=False)
features.iloc[300:].to_csv('../dataset/20151026_114_labeled.txt',
                           sep=' ', header=False, index=False)