In [1]:
import pandas as pd
from tsfresh.feature_extraction import extract_features
from tqdm import trange


In [2]:
data = pd.read_pickle("splits/split_1.pkl")
cat = ['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_66', 'D_68']
cat_vars = [col for col in data if col in cat]
variables = [col for col in data if col not in ["customer_ID",'S_2']]
num_vars = [col for col in variables if col not in cat_vars]


In [3]:
# Dict of features for categorical and numerical columns
cat_feats = {
    "mean": None
}

num_feats = {
    "variance": None,
    "mean": None,
    "mean_change": None,
    "cid_ce": [{"normalize": False}],
    "binned_entropy": [{"max_bins": 5}]
}

In [4]:
kind_to_fc_parameters = {
    **dict.fromkeys(cat_vars, cat_feats), 
    **dict.fromkeys(num_vars, num_feats)
}

In [6]:
for i in trange(7, 11):
    data = pd.read_pickle(f"splits/split_{i}.pkl")
    data["S_2"] = pd.to_datetime(data["S_2"])
    data["customer_ID"] = data.index
    data.reset_index(drop=True, inplace=True)
    data.drop("idx", axis=1, inplace=True)

    features = extract_features(
        timeseries_container=data, 
        column_id="customer_ID", 
        column_sort="S_2", 
        kind_to_fc_parameters=kind_to_fc_parameters, 
        n_jobs=16
    )
    
    del data
    features = features.astype("float32")
    features.to_pickle(f"feat_splits/feat_split_{i}.pkl")
    del features

  0%|                                                                                            | 0/4 [00:00<?, ?it/s]
Feature Extraction:   0%|                                                                       | 0/80 [00:00<?, ?it/s][A
Feature Extraction:   1%|▊                                                              | 1/80 [00:18<24:09, 18.34s/it][A
Feature Extraction:   2%|█▌                                                             | 2/80 [00:20<11:37,  8.94s/it][A
Feature Extraction:   4%|██▎                                                            | 3/80 [00:23<07:40,  5.98s/it][A
Feature Extraction:   5%|███▏                                                           | 4/80 [00:25<05:53,  4.65s/it][A
Feature Extraction:   6%|███▉                                                           | 5/80 [00:27<04:39,  3.73s/it][A
Feature Extraction:   8%|████▋                                                          | 6/80 [00:30<04:10,  3.39s/it][A
Feature Extraction:

Feature Extraction:  61%|█████████████████████████████████████▉                        | 49/80 [02:47<01:52,  3.63s/it][A
Feature Extraction:  62%|██████████████████████████████████████▊                       | 50/80 [02:51<01:45,  3.51s/it][A
Feature Extraction:  64%|███████████████████████████████████████▌                      | 51/80 [02:54<01:46,  3.66s/it][A
Feature Extraction:  65%|████████████████████████████████████████▎                     | 52/80 [02:58<01:43,  3.71s/it][A
Feature Extraction:  66%|█████████████████████████████████████████                     | 53/80 [03:02<01:38,  3.64s/it][A
Feature Extraction:  68%|█████████████████████████████████████████▊                    | 54/80 [03:05<01:34,  3.65s/it][A
Feature Extraction:  69%|██████████████████████████████████████████▋                   | 55/80 [03:09<01:33,  3.72s/it][A
Feature Extraction:  70%|███████████████████████████████████████████▍                  | 56/80 [03:13<01:29,  3.72s/it][A
Feature Extracti

Feature Extraction:  21%|█████████████▏                                                | 17/80 [01:03<03:18,  3.14s/it][A
Feature Extraction:  22%|█████████████▉                                                | 18/80 [01:06<03:21,  3.25s/it][A
Feature Extraction:  24%|██████████████▋                                               | 19/80 [01:09<03:10,  3.12s/it][A
Feature Extraction:  25%|███████████████▌                                              | 20/80 [01:13<03:17,  3.29s/it][A
Feature Extraction:  26%|████████████████▎                                             | 21/80 [01:16<03:09,  3.20s/it][A
Feature Extraction:  28%|█████████████████                                             | 22/80 [01:19<03:06,  3.22s/it][A
Feature Extraction:  29%|█████████████████▊                                            | 23/80 [01:22<02:54,  3.05s/it][A
Feature Extraction:  30%|██████████████████▌                                           | 24/80 [01:25<02:56,  3.15s/it][A
Feature Extracti