Importing packages

In [None]:
import numpy as np
import pandas as pd
import featuretools as ft
import utils
from tqdm import tqdm
from sklearn.cluster import KMeans
from featuretools.primitives import make_agg_primitive
import featuretools.variable_types as vtypes
from tsfresh.feature_extraction.feature_calculators import (number_peaks, mean_abs_change,
                                                            cid_ce, last_location_of_maximum, length)

Loading data

In [None]:
data_path = 'train_FD004.txt'
data = utils.load_data(data_path)

data.head()


Creating cutoff times

In [None]:
splits = 5
cutoff_time_list = []

for i in tqdm(range(splits)):
    cutoff_time_list.append(utils.make_cutoff_times(data))

cutoff_time_list[0].head()


Making entitysets

In [None]:
nclusters = 50

def make_entityset(data, nclusters, kmeans=None):
    X = data[['operational_setting_1', 'operational_setting_2', 'operational_setting_3']]
    if kmeans:
        kmeans=kmeans
    else:
        kmeans = KMeans(n_clusters=nclusters).fit(X)
    data['settings_clusters'] = kmeans.predict(X)

    es = ft.EntitySet('Dataset')
    es.entity_from_dataframe(dataframe=data,
                             entity_id='recordings',
                             index='index',
                             time_index='time')

    es.normalize_entity(base_entity_id='recordings',
                        new_entity_id='engines',
                        index='engine_no')

    es.normalize_entity(base_entity_id='recordings',
                        new_entity_id='settings_clusters',
                        index='settings_clusters')

    return es, kmeans
es, kmeans = make_entityset(data, nclusters)
es
es.plot()


Creating features

In [None]:
Complexity = make_agg_primitive(lambda x: cid_ce(x, False),
                              input_types=[vtypes.Numeric],
                              return_type=vtypes.Numeric,
                              name="complexity")

fm, features = ft.dfs(entityset=es,
                      target_entity='engines',
                      agg_primitives=['last', 'max', Complexity],
                      trans_primitives=[],
                      chunk_size=.26,
                      cutoff_time=cutoff_time_list[0],
                      max_depth=3,
                      verbose=True)

fm.to_csv('advanced_fm.csv')
fm.head()

Making predictions

In [None]:
fm_list = [fm]
splits=4
for i in tqdm(range(1, splits)):
    fm = ft.calculate_feature_matrix(entityset=make_entityset(data, nclusters, kmeans=kmeans)[0],
                                     features=features,
                                     chunk_size=.26,
                                     cutoff_time=cutoff_time_list[i])
    fm_list.append(fm)