In [2]:
import numpy as np
import pandas as pd
import h5py
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import MinimalFCParameters

## Extracción de datos

In [None]:
def features_from_file(filename, groups):
    with h5py.File(filename, 'r') as f:
        group_dict = {group: f[group][:] for group in groups}
        target = f['y'][:] if 'y' in f else np.array([])

    n_samples = next(iter(group_dict.values())).shape[0]

    dataSerie_train = []
    for group, matrix in group_dict.items():
        df = pd.DataFrame(matrix)
        df["id"] = np.arange(n_samples)
        df_melted = df.melt(id_vars=['id'], var_name='time', value_name='value')
        df_melted['group'] = group
        dataSerie_train.append(df_melted[['id', 'time', 'group', 'value']]) 


    df_long_train = pd.concat(dataSerie_train, ignore_index=True)
    df_long_train["time"] = df_long_train["time"].astype(int)

    extraction_settings = MinimalFCParameters()
    # features: sum_values,median,mean,length,standard_deviation,variance,root_mean_square,maximum,absolute_maximum,minimum
    # AQUI DEPENDIENDO
    extraction_settings.pop("length")  # costo innecesario
    extraction_settings.pop("absolute_maximum")    # costo innecesario
    extraction_settings.pop("median")  # costo innecesario

    data_tsfresh = extract_features(df_long_train, column_id="id", column_sort="time",
                                       column_kind="group", column_value="value",
                                       default_fc_parameters=extraction_settings,
                                       n_jobs=4)

    impute(data_tsfresh)
    data_tsfresh.reset_index(drop=True, inplace=True)

    data = data_tsfresh.sort_index()   # index debe coincidir con 0..n_samples-1
    target = np.asarray(target).ravel()

    return data, target

In [5]:
train_filename = 'dataset/train.h5'
test_filename = 'dataset/test.h5'
groups = ['body_acc_x', 'body_acc_y', 'body_acc_z',
          'body_gyro_x', 'body_gyro_y', 'body_gyro_z',
          'total_acc_x', 'total_acc_y', 'total_acc_z']

In [11]:
X_train, y_train = features_from_file(train_filename, groups)
train = pd.concat([X_train, pd.Series(y_train, name='y')], axis=1)
train.to_csv('dataset/train_preprocessed.csv', index=False)

Feature Extraction: 100%|██████████| 20/20 [00:20<00:00,  1.04s/it]


In [13]:
X_test, _ = features_from_file(test_filename, groups)
X_test.to_csv('dataset/test_preprocessed.csv', index=False)

Feature Extraction: 100%|██████████| 20/20 [00:09<00:00,  2.12it/s]


## Preprocesamiento de datos

### Targets
- 1: WALKING
- 2: WALKING_UPSTAIRS
- 3: WALKING_DOWNSTAIRS
- 4: SITTING
- 5: STANDING
- 6: LAYING


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns