In [1]:
import pandas as pd
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt

from tqdm import tqdm_notebook

from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters
from tsfresh.utilities.dataframe_functions import impute

In [2]:
ts_cols = [
    'supply_flow',
    'supply_pressure',
    'return_temperature',
    'return_conductivity',
    'return_turbidity',
    'return_flow',
    'tank_level_pre_rinse',
    'tank_level_caustic',
    'tank_level_acid',
    'tank_level_clean_water',
    'tank_temperature_pre_rinse',
    'tank_temperature_caustic',
    'tank_temperature_acid',
    'tank_concentration_caustic',
    'tank_concentration_acid',
    'supply_pump',
    'supply_pre_rinse',
    'supply_caustic',
    'return_caustic',
    'supply_acid',
    'return_acid',
    'supply_clean_water',
    'return_recovery_water',
    'return_drain',
    'object_low_level'
]

In [3]:
train_df = pd.read_csv('data/train_values.csv', index_col=0, parse_dates=['timestamp'])

  mask |= (ar1 == a)


In [4]:
train_df['process_phase'] = train_df['process_id'].astype(str) + '-' + train_df['phase']

In [10]:
import warnings
warnings.simplefilter("ignore")
extraction_settings = EfficientFCParameters()
processes = set(train_df['process_id'])
for i, process in enumerate(processes):
    print('process {}/{}'.format(i+1, len(processes)))
    subset_df = train_df[train_df['process_id'] == process]
    subset_df[ts_cols] = subset_df[ts_cols].astype(float)
    train_features = extract_features(subset_df[ts_cols + ['process_phase']], 
                                      column_id='process_phase', 
                                      impute_function=impute, 
                                      default_fc_parameters=extraction_settings,
                                      show_warnings=False)
    train_features.to_csv('features/train_{}.csv'.format(process))

process 1/5021



Feature Extraction:   0%|          | 0/10 [00:00<?, ?it/s][A
Feature Extraction:  10%|█         | 1/10 [00:01<00:14,  1.59s/it][A
Feature Extraction:  20%|██        | 2/10 [00:01<00:06,  1.18it/s][A
Feature Extraction:  30%|███       | 3/10 [00:03<00:07,  1.07s/it][A
Feature Extraction:  40%|████      | 4/10 [00:03<00:05,  1.12it/s][A
Feature Extraction:  50%|█████     | 5/10 [00:05<00:05,  1.02s/it][A
Feature Extraction:  60%|██████    | 6/10 [00:05<00:03,  1.09it/s][A
Feature Extraction:  70%|███████   | 7/10 [00:06<00:02,  1.11it/s][A
Feature Extraction:  80%|████████  | 8/10 [00:06<00:01,  1.24it/s][A
Feature Extraction:  90%|█████████ | 9/10 [00:06<00:00,  1.32it/s][A
Feature Extraction: 100%|██████████| 10/10 [00:07<00:00,  1.37it/s][A
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_1'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_2'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_3'
 'object_low_level__max_langevin_fixed_point

process 2/5021



Feature Extraction:   0%|          | 0/10 [00:00<?, ?it/s][A
Feature Extraction:  10%|█         | 1/10 [00:03<00:28,  3.15s/it][A
Feature Extraction:  20%|██        | 2/10 [00:03<00:13,  1.69s/it][A
Feature Extraction:  30%|███       | 3/10 [00:08<00:19,  2.78s/it][A
Feature Extraction:  40%|████      | 4/10 [00:08<00:12,  2.11s/it][A
Feature Extraction:  50%|█████     | 5/10 [00:11<00:11,  2.33s/it][A
Feature Extraction:  70%|███████   | 7/10 [00:13<00:05,  1.98s/it][A
Feature Extraction:  80%|████████  | 8/10 [00:13<00:03,  1.75s/it][A
Feature Extraction:  90%|█████████ | 9/10 [00:15<00:01,  1.70s/it][A
Feature Extraction: 100%|██████████| 10/10 [00:15<00:00,  1.58s/it][A
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_1'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_2'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_3'
 'object_low_level__max_langevin_fixed_point__m_3__r_30'
 'return_acid__friedrich_coefficients__m_3__r_30__coeff_0

process 3/5021



Feature Extraction:   0%|          | 0/10 [00:00<?, ?it/s][A
Feature Extraction:  10%|█         | 1/10 [00:01<00:10,  1.15s/it][A
Feature Extraction:  30%|███       | 3/10 [00:02<00:05,  1.39it/s][A
Feature Extraction:  40%|████      | 4/10 [00:02<00:03,  1.73it/s][A
Feature Extraction:  50%|█████     | 5/10 [00:03<00:03,  1.56it/s][A
Feature Extraction:  70%|███████   | 7/10 [00:04<00:01,  1.69it/s][A
Feature Extraction:  90%|█████████ | 9/10 [00:05<00:00,  1.75it/s][A
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_1'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_2'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_3'
 'object_low_level__max_langevin_fixed_point__m_3__r_30'
 'return_acid__friedrich_coefficients__m_3__r_30__coeff_0'
 'return_acid__friedrich_coefficients__m_3__r_30__coeff_1'
 'return_acid__friedrich_coefficients__m_3__r_30__coeff_2'
 'return_acid__friedrich_coefficients__m_3__r_30__coeff_3'
 'return_acid__max_langevin_fixe

process 4/5021



Feature Extraction:   0%|          | 0/10 [00:00<?, ?it/s][A
Feature Extraction:  10%|█         | 1/10 [00:00<00:08,  1.09it/s][A
Feature Extraction:  30%|███       | 3/10 [00:01<00:04,  1.68it/s][A
Feature Extraction:  40%|████      | 4/10 [00:01<00:02,  2.11it/s][A
Feature Extraction:  50%|█████     | 5/10 [00:02<00:02,  1.82it/s][A
Feature Extraction:  60%|██████    | 6/10 [00:02<00:01,  2.00it/s][A
Feature Extraction:  70%|███████   | 7/10 [00:03<00:01,  1.84it/s][A
Feature Extraction:  80%|████████  | 8/10 [00:04<00:01,  1.97it/s][A
Feature Extraction:  90%|█████████ | 9/10 [00:04<00:00,  1.84it/s][A
Feature Extraction: 100%|██████████| 10/10 [00:05<00:00,  1.96it/s][A
 'object_low_level__fft_coefficient__coeff_99__attr_"angle"'
 'object_low_level__fft_coefficient__coeff_99__attr_"imag"'
 'object_low_level__fft_coefficient__coeff_99__attr_"real"'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_0'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_

process 5/5021



Feature Extraction:   0%|          | 0/10 [00:00<?, ?it/s][A
Feature Extraction:  10%|█         | 1/10 [00:03<00:30,  3.41s/it][A
Feature Extraction:  20%|██        | 2/10 [00:03<00:14,  1.82s/it][A
Feature Extraction:  30%|███       | 3/10 [00:09<00:21,  3.14s/it][A
Feature Extraction:  40%|████      | 4/10 [00:10<00:15,  2.53s/it][A
Feature Extraction:  50%|█████     | 5/10 [00:16<00:16,  3.28s/it][A
Feature Extraction:  60%|██████    | 6/10 [00:16<00:11,  2.79s/it][A
Feature Extraction:  70%|███████   | 7/10 [00:18<00:07,  2.65s/it][A
Feature Extraction:  80%|████████  | 8/10 [00:18<00:04,  2.36s/it][A
Feature Extraction:  90%|█████████ | 9/10 [00:20<00:02,  2.24s/it][A
Feature Extraction: 100%|██████████| 10/10 [00:20<00:00,  2.07s/it][A
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_1'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_2'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_3'
 'object_low_level__max_langevin_fixed_point

process 6/5021



Feature Extraction:   0%|          | 0/10 [00:00<?, ?it/s][A
Feature Extraction:  10%|█         | 1/10 [00:04<00:39,  4.40s/it][A
Feature Extraction:  20%|██        | 2/10 [00:04<00:18,  2.32s/it][A
Feature Extraction:  30%|███       | 3/10 [00:11<00:27,  3.88s/it][A
Feature Extraction:  50%|█████     | 5/10 [00:14<00:14,  2.87s/it][A
Feature Extraction:  60%|██████    | 6/10 [00:14<00:09,  2.43s/it][A
Feature Extraction:  70%|███████   | 7/10 [00:16<00:06,  2.32s/it][A
Feature Extraction:  80%|████████  | 8/10 [00:16<00:04,  2.09s/it][A
Feature Extraction:  90%|█████████ | 9/10 [00:18<00:02,  2.09s/it][A
Feature Extraction: 100%|██████████| 10/10 [00:19<00:00,  1.93s/it][A
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_1'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_2'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_3'
 'object_low_level__max_langevin_fixed_point__m_3__r_30'
 'return_acid__friedrich_coefficients__m_3__r_30__coeff_0

process 7/5021



Feature Extraction:   0%|          | 0/10 [00:00<?, ?it/s][A
Feature Extraction:  10%|█         | 1/10 [00:03<00:27,  3.08s/it][A
Feature Extraction:  20%|██        | 2/10 [00:03<00:13,  1.68s/it][A
Feature Extraction:  30%|███       | 3/10 [00:08<00:20,  2.93s/it][A
Feature Extraction:  40%|████      | 4/10 [00:08<00:13,  2.25s/it][A
Feature Extraction:  50%|█████     | 5/10 [00:11<00:11,  2.39s/it][A
Feature Extraction:  70%|███████   | 7/10 [00:14<00:06,  2.03s/it][A
Feature Extraction:  80%|████████  | 8/10 [00:14<00:03,  1.79s/it][A
Feature Extraction:  90%|█████████ | 9/10 [00:15<00:01,  1.73s/it][A
Feature Extraction: 100%|██████████| 10/10 [00:16<00:00,  1.61s/it][A
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_1'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_2'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_3'
 'object_low_level__max_langevin_fixed_point__m_3__r_30'
 'return_acid__friedrich_coefficients__m_3__r_30__coeff_0

process 8/5021



Feature Extraction:   0%|          | 0/10 [00:00<?, ?it/s][A
Feature Extraction:  10%|█         | 1/10 [00:04<00:36,  4.06s/it][A
Feature Extraction:  20%|██        | 2/10 [00:04<00:17,  2.16s/it][A
Feature Extraction:  30%|███       | 3/10 [00:10<00:25,  3.67s/it][A
Feature Extraction:  40%|████      | 4/10 [00:11<00:17,  2.92s/it][A
Feature Extraction:  50%|█████     | 5/10 [00:13<00:13,  2.78s/it][A
Feature Extraction:  60%|██████    | 6/10 [00:14<00:09,  2.36s/it][A
Feature Extraction:  70%|███████   | 7/10 [00:15<00:06,  2.25s/it][A
Feature Extraction:  80%|████████  | 8/10 [00:16<00:04,  2.05s/it][A
Feature Extraction:  90%|█████████ | 9/10 [00:18<00:02,  2.04s/it][A
Feature Extraction: 100%|██████████| 10/10 [00:19<00:00,  1.90s/it][A
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_1'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_2'
 'object_low_level__friedrich_coefficients__m_3__r_30__coeff_3'
 'object_low_level__max_langevin_fixed_point

process 9/5021


KeyboardInterrupt: 

In [8]:
train_features

variable,object_low_level__abs_energy,object_low_level__absolute_sum_of_changes,"object_low_level__agg_autocorrelation__f_agg_""mean""","object_low_level__agg_autocorrelation__f_agg_""median""","object_low_level__agg_autocorrelation__f_agg_""var""","object_low_level__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""intercept""","object_low_level__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""rvalue""","object_low_level__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""slope""","object_low_level__agg_linear_trend__f_agg_""max""__chunk_len_10__attr_""stderr""","object_low_level__agg_linear_trend__f_agg_""max""__chunk_len_50__attr_""intercept""",...,tank_temperature_pre_rinse__time_reversal_asymmetry_statistic__lag_1,tank_temperature_pre_rinse__time_reversal_asymmetry_statistic__lag_2,tank_temperature_pre_rinse__time_reversal_asymmetry_statistic__lag_3,tank_temperature_pre_rinse__value_count__value_-inf,tank_temperature_pre_rinse__value_count__value_0,tank_temperature_pre_rinse__value_count__value_1,tank_temperature_pre_rinse__value_count__value_inf,tank_temperature_pre_rinse__value_count__value_nan,tank_temperature_pre_rinse__variance,tank_temperature_pre_rinse__variance_larger_than_standard_deviation
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
24579-caustic,1.0,2.0,-0.009122,-0.008991,2e-06,0.116667,-0.123718,-0.007143,0.01589,0.333333,...,-0.81873,-1.711551,-2.893883,0.0,0.0,0.0,0.0,0.0,0.000899,0.0
24579-final_rinse,3.0,4.0,0.001759,-0.005809,0.002712,0.025641,0.11547,0.004103,0.007204,0.333333,...,-2.523108,-4.909794,-7.694428,0.0,0.0,0.0,0.0,0.0,0.003641,0.0
24579-pre_rinse,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,...,5.077151,11.038199,19.912541,0.0,0.0,0.0,0.0,0.0,0.000219,0.0


In [None]:
test_df = pd.read_csv('data/test_values.csv', index_col=0, parse_dates=['timestamp'])

In [None]:
test_features = extract_features(test_ts_df, column_id='id', impute_function=impute, 
                                 default_fc_parameters=extraction_settings)