In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import pymfe.mfe
import tspymfe._embed

In [2]:
# Note: using only groups that has at least one meta-feature that can be extracted
# from a unsupervised dataset
groups = ("general", "statistical", "info-theory", "complexity", "itemset", "concept")
summary = ("mean", "sd", "max", "min")

extractor = pymfe.mfe.MFE(features="all",
                          summary=summary,
                          groups=groups)

   Please use only the updated version available at: https://github.com/ealcobaca/pymfe


In [3]:
data_train = pd.read_csv("../2_exploring_subsample/subsample_train.csv", header=0, index_col="timeseries_id")
data_test = pd.read_csv("../2_exploring_subsample/subsample_test.csv", header=0, index_col="timeseries_id")

In [4]:
assert data_train.shape[0] > data_test.shape[0]

data_train.head()

Unnamed: 0_level_0,category,inst_ind,datapoints
timeseries_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
e0b36e39-3872-11e8-8680-0242ac120002,Beta noise,25254,"0.73617,0.99008,0.71331,0.87094,0.75527,0.9912..."
81db0cf2-3883-11e8-8680-0242ac120002,Relative humidity,14878,"95.5,79,86.75,8.75,62.75,98.75,79.74,44.75,92...."
380eb353-387a-11e8-8680-0242ac120002,RR,6577,"0.6328,0.6328,0.625,0.6328,0.625,0.625,0.6172,..."
f33f461c-3871-11e8-8680-0242ac120002,Tremor,27821,"-0.6,1.5,1.5,0.1,0.9,0.6,0.3,-0.2,0.7,1,0.1,1...."
7bcad309-3874-11e8-8680-0242ac120002,Noisy sinusoids,14226,"0.38553,0.2014,1.8705,0.47883,0.33958,0.009558..."


In [5]:
# Note: using at most the last 1024 observations of each time-series
size_threshold = 2048
embed_dims, embed_lags = np.zeros((2, data_train.shape[0]), dtype=np.uint16)

# Note: using dummy data to get the metafeature names
mtf_names = extractor.fit(np.arange(16).reshape(-1, 2),
                          suppress_warnings=True).extract(suppress_warnings=True)[0]

results = pd.DataFrame(index=data_train.index, columns=mtf_names)

In [6]:
print("Number of meta-features per dataset:", len(mtf_names))
print(mtf_names)

Number of meta-features per dataset: 111
['attr_conc.max', 'attr_conc.mean', 'attr_conc.min', 'attr_conc.sd', 'attr_ent.max', 'attr_ent.mean', 'attr_ent.min', 'attr_ent.sd', 'attr_to_inst', 'cat_to_num', 'cohesiveness.max', 'cohesiveness.mean', 'cohesiveness.min', 'cohesiveness.sd', 'cor.max', 'cor.mean', 'cor.min', 'cor.sd', 'cov.max', 'cov.mean', 'cov.min', 'cov.sd', 'eigenvalues.max', 'eigenvalues.mean', 'eigenvalues.min', 'eigenvalues.sd', 'g_mean.max', 'g_mean.mean', 'g_mean.min', 'g_mean.sd', 'h_mean.max', 'h_mean.mean', 'h_mean.min', 'h_mean.sd', 'inst_to_attr', 'iq_range.max', 'iq_range.mean', 'iq_range.min', 'iq_range.sd', 'kurtosis.max', 'kurtosis.mean', 'kurtosis.min', 'kurtosis.sd', 'mad.max', 'mad.mean', 'mad.min', 'mad.sd', 'max.max', 'max.mean', 'max.min', 'max.sd', 'mean.max', 'mean.mean', 'mean.min', 'mean.sd', 'median.max', 'median.mean', 'median.min', 'median.sd', 'min.max', 'min.mean', 'min.min', 'min.sd', 'nr_attr', 'nr_bin', 'nr_cat', 'nr_cor_attr', 'nr_inst', 'nr

In [7]:
for i, (cls, _, vals) in enumerate(data_train.iloc[:10, :].values):
    ts = np.asarray(vals.split(",")[-size_threshold:], dtype=float)
    
    embed_lags[i] = tspymfe._embed.embed_lag(ts=ts, max_nlags=16)
    
    embed_dims[i] = tspymfe._embed.ft_emb_dim_cao(ts=ts,
                                                  lag=embed_lags[i],
                                                  dims=16,
                                                  tol_threshold=0.2)
    
    ts_embed = tspymfe._embed.embed_ts(ts=ts,
                                       dim=embed_dims[i],
                                       lag=embed_lags[i])
    
    extractor.fit(ts_embed, suppress_warnings=True)
    res = extractor.extract(suppress_warnings=True)
    results.iloc[i, :] = res[1]



array([ 1,  5,  1,  1,  5,  5,  1,  7,  7, 13,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0