In [14]:
from EAExperiment import EAExperiment
import pandas as pd
import functools
from skfeature.function.statistical_based import CFS
import numpy as np
import math

In [51]:
def create_propositional_table_dataframe(df,w, target, h):
    columns =[]
    for i in range(w,0,-1):
        columns.extend([s + "_lag"+str(i-1) for s in df.columns])
    for t in target:
        for j in range(h):
            columns.append(t+"_ahead"+str(j+1))
    dataframe = pd.DataFrame(columns=columns)
    return dataframe
    
def create_propositional_table(df, w, h, target):
    columns =[]
    for i in range(w,0,-1):
        columns.extend([s + "_lag"+str(i-1) for s in df.columns])
    for t in target:
        for j in range(h):
            columns.append(t+"_ahead"+str(j+1))
    dataframe = pd.DataFrame(columns=columns)
    
    indexes = []
    for i in range((len(df)-w-h+1)):
        window = df.iloc[i:(i+w)]
        row = window.values.reshape(1, len(window.columns)*len(window))
        targets = {}
        for t in target:
            row = np.append(row, df[t].iloc[(i+w):(i+w+h)])
            
        dataframe.loc[i]=row.reshape(1, len(row))[0]
        indexes.append(window.index[-1])
    
    dataframe = dataframe.set_index(pd.Series(indexes))
    
    return dataframe

In [52]:
def merit_calculation(X, y):
    """
    This function calculates the merit of X given class labels y, where
    merits = (k * rcf)/sqrt(k+k*(k-1)*rff)
    rcf = (1/k)*sum(su(fi,y)) for all fi in X
    rff = (1/(k*(k-1)))*sum(su(fi,fj)) for all fi and fj in X

    Input
    ----------
    X: {numpy array}, shape (n_samples, n_features)
        input data
    y: {numpy array}, shape (n_samples,)
        input class labels

    Output
    ----------
    merits: {float}
        merit of a feature subset X
    """

    n_samples, n_features = X.shape
    rff = 0
    rcf = 0
    for i in range(n_features):
        fi = X[:, i]
        rcf += su_calculation(fi, y)
        for j in range(n_features):
            if j > i:
                fj = X[:, j]
                rff += su_calculation(fi, fj)
    rff *= 2
    
    merits = rcf / np.sqrt(n_features + rff)
    return merits

In [72]:
def _evaluation_func(data, horizont, target, create_propositional_table, individuals):
    
    fitnesses = []
    for individual in individuals:
        target_idx = list(data.columns).index(target)
        individual[target_idx] = 1
        
        filtered_data = data.loc[:, individual[:-1].astype(bool)]
        
        window = individual[-1]
        propositional_filtered = create_propositional_table(filtered_data, window, horizont, [target])
        
        X = propositional_filtered.iloc[:, :-horizont]
        Y = propositional_filtered.iloc[:, -horizont:]
        
        fitness = 0
        all_features = []
        for h in Y.columns:
            
            merits = CFS.merit_calculation(X.values, Y[h].values)
            
            print("Merit :"+str(merits)+" for columns: "+str(X.columns))
            if not math.isnan(merits):
                fitness += merits/len(Y.columns)
        fitnesses.append(fitness)
            
    return fitnesses
    
evaluation_func = functools.partial(_evaluation_func, data, horizont, target, create_propositional_table)

In [165]:
propositional_filtered = create_propositional_table(data, 12, 6, ["PESO_20"])
import arff
arff.dump('propositional_DP_PESO20.arff'
      , propositional_filtered.values
      , relation='DP'
      , names=propositional_filtered.columns)

In [170]:
data = pd.read_csv('data.csv')
data = data.set_index("PERIODO")
horizont = 6
target = 'PESO_7'
n_gen = 50
pop_size = 12
max_window_size = 12

In [103]:
selected_columns = [target in col for col in propositional_filtered.columns]

In [104]:
selected_columns

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 F

In [144]:
f, m = CFS.cfs(propositional_filtered.iloc[:, :-6].values, propositional_filtered.iloc[:, -4].values)

In [147]:
f

array([1., 1., 1., 1., 1., 1.])

In [145]:
propositional_filtered.iloc[:, f].head()

Unnamed: 0,PESO_2_lag11,VALOR_2_lag11,PESO_3_lag11,VALOR_3_lag11,PESO_4_lag11,VALOR_4_lag11
Dic/14,20480.0,161590.43,1762044.0,7451234.36,241076.0,307944.95
Ene/15,28357.0,209912.23,2766260.0,9851902.35,187323.0,358730.54
Feb/15,39998.0,236089.89,2333843.0,9376101.44,444362.0,497602.91
Mar/15,116501.0,375152.33,3097984.0,10280540.18,409998.0,421632.82
Abr/15,185644.0,504055.77,3963593.0,12011060.64,685920.0,583072.45


In [111]:

import arff
arff.dump('propositional_DP.arff'
      , propositional_filtered.values
      , relation='DP'
      , names=propositional_filtered.columns)

In [74]:
eaExperiment = EAExperiment(evaluation_func)
pop, fits = eaExperiment.start_search_ga(n_gen, pop_size, max_window_size, data)

Merit :1.0003539046209036 for columns: Index(['NUM_OPERACIONES_2_lag0', 'VALOR_4_lag0', 'NUM_OPERACIONES_4_lag0',
       'PESO_5_lag0', 'PESO_6_lag0', 'VALOR_6_lag0', 'PESO_7_lag0',
       'VALOR_8_lag0', 'NUM_OPERACIONES_9_lag0', 'PESO_10_lag0',
       'NUM_OPERACIONES_10_lag0', 'PESO_11_lag0', 'VALOR_11_lag0',
       'PESO_12_lag0', 'VALOR_12_lag0', 'VALOR_13_lag0', 'PESO_14_lag0',
       'PESO_15_lag0', 'NUM_OPERACIONES_16_lag0', 'NUM_OPERACIONES_17_lag0',
       'PESO_18_lag0', 'VALOR_19_lag0', 'NUM_OPERACIONES_19_lag0',
       'PESO_20_lag0', 'NUM_OPERACIONES_20_lag0', 'PESO_21_lag0',
       'VALOR_21_lag0', 'NUM_OPERACIONES_21_lag0', 'VALOR_22_lag0',
       'NUM_OPERACIONES_22_lag0', 'PESO_23_lag0', 'w_med_lag0', 'hr_lag0',
       'w_racha_speed_lag0'],
      dtype='object')
Merit :1.000213014434646 for columns: Index(['VALOR_2_lag0', 'NUM_OPERACIONES_2_lag0', 'PESO_3_lag0',
       'NUM_OPERACIONES_3_lag0', 'PESO_4_lag0', 'VALOR_4_lag0',
       'NUM_OPERACIONES_4_lag0', 'PESO_6_l

      dtype='object')
Merit :1.000213014434646 for columns: Index(['VALOR_2_lag0', 'NUM_OPERACIONES_2_lag0', 'PESO_3_lag0',
       'NUM_OPERACIONES_3_lag0', 'PESO_4_lag0', 'VALOR_4_lag0',
       'NUM_OPERACIONES_4_lag0', 'PESO_6_lag0', 'VALOR_6_lag0', 'PESO_7_lag0',
       'NUM_OPERACIONES_7_lag0', 'PESO_8_lag0', 'VALOR_9_lag0',
       'NUM_OPERACIONES_9_lag0', 'VALOR_10_lag0', 'NUM_OPERACIONES_10_lag0',
       'PESO_12_lag0', 'PESO_13_lag0', 'VALOR_13_lag0',
       'NUM_OPERACIONES_13_lag0', 'PESO_14_lag0', 'VALOR_14_lag0',
       'PESO_17_lag0', 'VALOR_17_lag0', 'PESO_18_lag0', 'VALOR_18_lag0',
       'NUM_OPERACIONES_18_lag0', 'PESO_19_lag0', 'VALOR_19_lag0',
       'PESO_20_lag0', 'NUM_OPERACIONES_20_lag0', 'PESO_21_lag0',
       'PESO_22_lag0', 'VALOR_22_lag0', 'NUM_OPERACIONES_22_lag0',
       'NUM_OPERACIONES_23_lag0', 'w_med_lag0', 'p_mes_lag0', 'hr_lag0',
       'e_lag0', 'w_racha_dir_lag0'],
      dtype='object')
Merit :1.000213014434646 for columns: Index(['VALOR_2_lag0', 'N

      dtype='object', length=330)
Merit :1.0039886578906703 for columns: Index(['VALOR_1_lag3', 'VALOR_2_lag3', 'VALOR_3_lag3', 'PESO_5_lag3',
       'NUM_OPERACIONES_5_lag3', 'NUM_OPERACIONES_6_lag3', 'PESO_7_lag3',
       'NUM_OPERACIONES_8_lag3', 'VALOR_9_lag3', 'NUM_OPERACIONES_9_lag3',
       ...
       'NUM_OPERACIONES_19_lag0', 'PESO_20_lag0', 'NUM_OPERACIONES_21_lag0',
       'NUM_OPERACIONES_23_lag0', 'w_med_lag0', 'p_mes_lag0', 'hr_lag0',
       'e_lag0', 'w_racha_speed_lag0', 'PIB_lag0'],
      dtype='object', length=156)
Merit :1.0012598738807028 for columns: Index(['NUM_OPERACIONES_1_lag7', 'PESO_2_lag7', 'NUM_OPERACIONES_2_lag7',
       'PESO_3_lag7', 'NUM_OPERACIONES_3_lag7', 'PESO_4_lag7', 'PESO_6_lag7',
       'VALOR_6_lag7', 'PESO_7_lag7', 'VALOR_7_lag7',
       ...
       'VALOR_15_lag0', 'NUM_OPERACIONES_15_lag0', 'PESO_18_lag0',
       'NUM_OPERACIONES_18_lag0', 'VALOR_19_lag0', 'PESO_21_lag0',
       'VALOR_21_lag0', 'NUM_OPERACIONES_22_lag0', 'PESO_23_lag0', 'PIB

      dtype='object', length=248)
Merit :1.0019140802598079 for columns: Index(['VALOR_1_lag8', 'VALOR_2_lag8', 'VALOR_3_lag8',
       'NUM_OPERACIONES_3_lag8', 'VALOR_4_lag8', 'NUM_OPERACIONES_5_lag8',
       'PESO_7_lag8', 'NUM_OPERACIONES_7_lag8', 'VALOR_9_lag8', 'PESO_10_lag8',
       ...
       'VALOR_20_lag0', 'PESO_21_lag0', 'VALOR_21_lag0', 'PESO_22_lag0',
       'VALOR_22_lag0', 'PESO_23_lag0', 'w_med_lag0', 'p_mes_lag0', 'hr_lag0',
       'w_racha_speed_lag0'],
      dtype='object', length=297)
Merit :1.0012598738807028 for columns: Index(['NUM_OPERACIONES_1_lag7', 'PESO_2_lag7', 'NUM_OPERACIONES_2_lag7',
       'PESO_3_lag7', 'NUM_OPERACIONES_3_lag7', 'PESO_4_lag7', 'PESO_6_lag7',
       'VALOR_6_lag7', 'PESO_7_lag7', 'VALOR_7_lag7',
       ...
       'VALOR_15_lag0', 'NUM_OPERACIONES_15_lag0', 'PESO_18_lag0',
       'NUM_OPERACIONES_18_lag0', 'VALOR_19_lag0', 'PESO_21_lag0',
       'VALOR_21_lag0', 'NUM_OPERACIONES_22_lag0', 'PESO_23_lag0', 'PIB_lag0'],
      dtype='object'

      dtype='object', length=407)
Merit :1.0023331702498155 for columns: Index(['PESO_1_lag8', 'VALOR_1_lag8', 'VALOR_2_lag8', 'NUM_OPERACIONES_2_lag8',
       'VALOR_3_lag8', 'NUM_OPERACIONES_3_lag8', 'NUM_OPERACIONES_4_lag8',
       'PESO_5_lag8', 'VALOR_5_lag8', 'NUM_OPERACIONES_5_lag8',
       ...
       'PESO_19_lag0', 'NUM_OPERACIONES_19_lag0', 'VALOR_21_lag0',
       'NUM_OPERACIONES_21_lag0', 'PESO_22_lag0', 'NUM_OPERACIONES_23_lag0',
       'hr_lag0', 'e_lag0', 'w_racha_speed_lag0', 'PIB_lag0'],
      dtype='object', length=378)
Merit :1.0019140802598079 for columns: Index(['VALOR_1_lag8', 'VALOR_2_lag8', 'VALOR_3_lag8',
       'NUM_OPERACIONES_3_lag8', 'VALOR_4_lag8', 'NUM_OPERACIONES_5_lag8',
       'PESO_7_lag8', 'NUM_OPERACIONES_7_lag8', 'VALOR_9_lag8', 'PESO_10_lag8',
       ...
       'VALOR_20_lag0', 'PESO_21_lag0', 'VALOR_21_lag0', 'PESO_22_lag0',
       'VALOR_22_lag0', 'PESO_23_lag0', 'w_med_lag0', 'p_mes_lag0', 'hr_lag0',
       'w_racha_speed_lag0'],
      dtype='o

      dtype='object', length=273)
Merit :1.0031723591741801 for columns: Index(['NUM_OPERACIONES_1_lag10', 'NUM_OPERACIONES_3_lag10', 'PESO_4_lag10',
       'NUM_OPERACIONES_4_lag10', 'VALOR_5_lag10', 'NUM_OPERACIONES_6_lag10',
       'PESO_7_lag10', 'PESO_8_lag10', 'VALOR_8_lag10', 'VALOR_9_lag10',
       ...
       'PESO_20_lag0', 'NUM_OPERACIONES_20_lag0', 'VALOR_22_lag0',
       'NUM_OPERACIONES_22_lag0', 'PESO_23_lag0', 'NUM_OPERACIONES_23_lag0',
       'w_med_lag0', 'p_mes_lag0', 'w_racha_dir_lag0', 'w_racha_speed_lag0'],
      dtype='object', length=352)
Merit :1.003846418811664 for columns: Index(['VALOR_1_lag3', 'PESO_2_lag3', 'NUM_OPERACIONES_2_lag3', 'PESO_3_lag3',
       'VALOR_4_lag3', 'PESO_5_lag3', 'NUM_OPERACIONES_5_lag3',
       'NUM_OPERACIONES_6_lag3', 'PESO_7_lag3', 'NUM_OPERACIONES_8_lag3',
       ...
       'VALOR_19_lag0', 'NUM_OPERACIONES_19_lag0', 'NUM_OPERACIONES_21_lag0',
       'NUM_OPERACIONES_23_lag0', 'w_med_lag0', 'p_mes_lag0', 'hr_lag0',
       'e_lag0'

      dtype='object', length=273)
Merit :1.0031723591741801 for columns: Index(['NUM_OPERACIONES_1_lag10', 'NUM_OPERACIONES_3_lag10', 'PESO_4_lag10',
       'NUM_OPERACIONES_4_lag10', 'VALOR_5_lag10', 'NUM_OPERACIONES_6_lag10',
       'PESO_7_lag10', 'PESO_8_lag10', 'VALOR_8_lag10', 'VALOR_9_lag10',
       ...
       'PESO_20_lag0', 'NUM_OPERACIONES_20_lag0', 'VALOR_22_lag0',
       'NUM_OPERACIONES_22_lag0', 'PESO_23_lag0', 'NUM_OPERACIONES_23_lag0',
       'w_med_lag0', 'p_mes_lag0', 'w_racha_dir_lag0', 'w_racha_speed_lag0'],
      dtype='object', length=352)
Merit :1.0022433895360394 for columns: Index(['VALOR_1_lag8', 'VALOR_2_lag8', 'VALOR_3_lag8',
       'NUM_OPERACIONES_3_lag8', 'VALOR_4_lag8', 'NUM_OPERACIONES_5_lag8',
       'PESO_7_lag8', 'NUM_OPERACIONES_7_lag8', 'VALOR_9_lag8', 'PESO_10_lag8',
       ...
       'VALOR_20_lag0', 'PESO_21_lag0', 'VALOR_21_lag0', 'PESO_22_lag0',
       'VALOR_22_lag0', 'PESO_23_lag0', 'w_med_lag0', 'p_mes_lag0', 'hr_lag0',
       'w_racha_spee

      dtype='object', length=396)
Merit :1.0022433895360394 for columns: Index(['VALOR_1_lag8', 'VALOR_2_lag8', 'VALOR_3_lag8',
       'NUM_OPERACIONES_3_lag8', 'VALOR_4_lag8', 'NUM_OPERACIONES_5_lag8',
       'PESO_7_lag8', 'NUM_OPERACIONES_7_lag8', 'VALOR_9_lag8', 'PESO_10_lag8',
       ...
       'VALOR_20_lag0', 'PESO_21_lag0', 'VALOR_21_lag0', 'PESO_22_lag0',
       'VALOR_22_lag0', 'PESO_23_lag0', 'w_med_lag0', 'p_mes_lag0', 'hr_lag0',
       'w_racha_speed_lag0'],
      dtype='object', length=324)
Merit :1.0031723591741801 for columns: Index(['NUM_OPERACIONES_1_lag10', 'NUM_OPERACIONES_3_lag10', 'PESO_4_lag10',
       'NUM_OPERACIONES_4_lag10', 'VALOR_5_lag10', 'NUM_OPERACIONES_6_lag10',
       'PESO_7_lag10', 'PESO_8_lag10', 'VALOR_8_lag10', 'VALOR_9_lag10',
       ...
       'PESO_20_lag0', 'NUM_OPERACIONES_20_lag0', 'VALOR_22_lag0',
       'NUM_OPERACIONES_22_lag0', 'PESO_23_lag0', 'NUM_OPERACIONES_23_lag0',
       'w_med_lag0', 'p_mes_lag0', 'w_racha_dir_lag0', 'w_racha_spee

      dtype='object', length=168)
Merit :1.0050214164264442 for columns: Index(['VALOR_1_lag3', 'VALOR_2_lag3', 'VALOR_3_lag3', 'PESO_5_lag3',
       'NUM_OPERACIONES_5_lag3', 'NUM_OPERACIONES_6_lag3', 'PESO_7_lag3',
       'NUM_OPERACIONES_8_lag3', 'VALOR_9_lag3', 'NUM_OPERACIONES_9_lag3',
       ...
       'NUM_OPERACIONES_20_lag0', 'VALOR_21_lag0', 'NUM_OPERACIONES_23_lag0',
       'tm_mes_lag0', 'w_med_lag0', 'p_mes_lag0', 'hr_lag0', 'e_lag0',
       'w_racha_speed_lag0', 'PIB_lag0'],
      dtype='object', length=144)
Merit :1.0025808569319805 for columns: Index(['VALOR_1_lag3', 'PESO_2_lag3', 'NUM_OPERACIONES_3_lag3', 'PESO_4_lag3',
       'NUM_OPERACIONES_4_lag3', 'VALOR_5_lag3', 'PESO_7_lag3', 'PESO_8_lag3',
       'VALOR_8_lag3', 'VALOR_9_lag3',
       ...
       'VALOR_19_lag0', 'NUM_OPERACIONES_19_lag0', 'NUM_OPERACIONES_21_lag0',
       'NUM_OPERACIONES_23_lag0', 'w_med_lag0', 'p_mes_lag0', 'hr_lag0',
       'e_lag0', 'w_racha_speed_lag0', 'PIB_lag0'],
      dtype='object', 

      dtype='object', length=168)
Merit :1.0029377165862416 for columns: Index(['NUM_OPERACIONES_1_lag6', 'VALOR_2_lag6', 'VALOR_3_lag6', 'PESO_5_lag6',
       'NUM_OPERACIONES_5_lag6', 'NUM_OPERACIONES_6_lag6', 'PESO_7_lag6',
       'NUM_OPERACIONES_8_lag6', 'VALOR_9_lag6', 'NUM_OPERACIONES_9_lag6',
       ...
       'VALOR_18_lag0', 'VALOR_19_lag0', 'NUM_OPERACIONES_19_lag0',
       'VALOR_21_lag0', 'PESO_23_lag0', 'VALOR_23_lag0',
       'NUM_OPERACIONES_23_lag0', 'tm_mes_lag0', 'p_mes_lag0',
       'w_racha_speed_lag0'],
      dtype='object', length=266)
Merit :1.0025808569319805 for columns: Index(['VALOR_1_lag3', 'PESO_2_lag3', 'NUM_OPERACIONES_3_lag3', 'PESO_4_lag3',
       'NUM_OPERACIONES_4_lag3', 'VALOR_5_lag3', 'PESO_7_lag3', 'PESO_8_lag3',
       'VALOR_8_lag3', 'VALOR_9_lag3',
       ...
       'VALOR_19_lag0', 'NUM_OPERACIONES_19_lag0', 'NUM_OPERACIONES_21_lag0',
       'NUM_OPERACIONES_23_lag0', 'w_med_lag0', 'p_mes_lag0', 'hr_lag0',
       'e_lag0', 'w_racha_speed_lag0'

      dtype='object', length=418)
Merit :1.0029377165862416 for columns: Index(['NUM_OPERACIONES_1_lag6', 'VALOR_2_lag6', 'VALOR_3_lag6', 'PESO_5_lag6',
       'NUM_OPERACIONES_5_lag6', 'NUM_OPERACIONES_6_lag6', 'PESO_7_lag6',
       'NUM_OPERACIONES_8_lag6', 'VALOR_9_lag6', 'NUM_OPERACIONES_9_lag6',
       ...
       'VALOR_18_lag0', 'VALOR_19_lag0', 'NUM_OPERACIONES_19_lag0',
       'VALOR_21_lag0', 'PESO_23_lag0', 'VALOR_23_lag0',
       'NUM_OPERACIONES_23_lag0', 'tm_mes_lag0', 'p_mes_lag0',
       'w_racha_speed_lag0'],
      dtype='object', length=266)
Merit :1.0030201795558236 for columns: Index(['NUM_OPERACIONES_1_lag10', 'PESO_2_lag10', 'NUM_OPERACIONES_3_lag10',
       'PESO_4_lag10', 'NUM_OPERACIONES_4_lag10', 'VALOR_5_lag10',
       'NUM_OPERACIONES_6_lag10', 'PESO_7_lag10', 'PESO_8_lag10',
       'VALOR_8_lag10',
       ...
       'PESO_20_lag0', 'NUM_OPERACIONES_20_lag0', 'VALOR_22_lag0',
       'NUM_OPERACIONES_22_lag0', 'PESO_23_lag0', 'NUM_OPERACIONES_23_lag0',
       

In [56]:
max_idx = fits.index(max(fits))
fits[max_idx]

1.0043394373031238

In [57]:
columns = data.loc[:, list(map(bool,pop[max_idx][:-1]))].columns

In [141]:
propositional_filtered.corr()

Unnamed: 0,PESO_1_lag11,VALOR_1_lag11,NUM_OPERACIONES_1_lag11,PESO_2_lag11,VALOR_2_lag11,NUM_OPERACIONES_2_lag11,PESO_3_lag11,VALOR_3_lag11,NUM_OPERACIONES_3_lag11,PESO_4_lag11,...,e_lag0,w_racha_dir_lag0,w_racha_speed_lag0,PIB_lag0,PESO_7_ahead1,PESO_7_ahead2,PESO_7_ahead3,PESO_7_ahead4,PESO_7_ahead5,PESO_7_ahead6
PESO_1_lag11,1.000000,0.829936,0.313673,0.289904,0.285200,0.107381,-0.098946,0.175361,0.219853,0.008324,...,0.115022,0.054995,-0.120679,-0.028847,-0.010143,-0.051043,0.129289,0.029765,0.145906,0.371348
VALOR_1_lag11,0.829936,1.000000,0.509463,0.435289,0.336528,0.163002,-0.038204,0.102369,0.260336,-0.018974,...,0.066357,0.115940,0.046180,-0.084759,-0.103623,-0.127082,0.203272,0.054825,0.159351,0.348513
NUM_OPERACIONES_1_lag11,0.313673,0.509463,1.000000,0.085252,0.076278,-0.166515,0.034071,-0.108575,0.111927,0.048656,...,-0.010671,-0.026861,0.032481,0.021372,-0.167699,-0.055886,0.156389,0.083958,0.113587,0.032505
PESO_2_lag11,0.289904,0.435289,0.085252,1.000000,0.804540,0.250921,0.000329,-0.010346,-0.064000,0.053519,...,-0.014133,0.161770,-0.168478,-0.169449,-0.154317,-0.082509,0.053530,-0.095337,0.143667,0.101637
VALOR_2_lag11,0.285200,0.336528,0.076278,0.804540,1.000000,0.221670,-0.018642,0.113094,0.025593,-0.045967,...,-0.139851,0.247460,-0.165983,-0.138805,-0.034778,-0.072501,0.068177,-0.060121,0.170427,0.098877
NUM_OPERACIONES_2_lag11,0.107381,0.163002,-0.166515,0.250921,0.221670,1.000000,0.169703,0.398857,0.343613,0.004531,...,0.232436,0.175875,0.181786,-0.541907,-0.090453,-0.001978,0.015733,0.030319,0.173395,0.383775
PESO_3_lag11,-0.098946,-0.038204,0.034071,0.000329,-0.018642,0.169703,1.000000,0.555363,0.286979,0.076430,...,0.346426,-0.182139,0.263516,-0.009244,0.013199,-0.123132,-0.111242,-0.348869,-0.093018,0.281902
VALOR_3_lag11,0.175361,0.102369,-0.108575,-0.010346,0.113094,0.398857,0.555363,1.000000,0.526509,0.000120,...,0.266629,-0.014826,0.096402,-0.493113,0.367503,0.136389,0.329454,-0.074425,0.231822,0.444368
NUM_OPERACIONES_3_lag11,0.219853,0.260336,0.111927,-0.064000,0.025593,0.343613,0.286979,0.526509,1.000000,-0.173767,...,0.294026,0.004943,0.267462,-0.412510,0.102467,-0.187277,0.080753,0.080553,0.141032,0.402300
PESO_4_lag11,0.008324,-0.018974,0.048656,0.053519,-0.045967,0.004531,0.076430,0.000120,-0.173767,1.000000,...,0.232083,-0.296233,-0.035031,0.127813,-0.031156,-0.060207,0.017927,0.065053,0.172670,-0.074688


In [58]:
sum(pop[max_idx][:-1])

41

In [59]:
print("Variables: "+str(columns.values))
print("Window size: "+str(pop[max_idx][-1]))

Variables: ['PESO_1' 'VALOR_1' 'NUM_OPERACIONES_1' 'PESO_2' 'VALOR_2' 'VALOR_3'
 'NUM_OPERACIONES_3' 'VALOR_4' 'NUM_OPERACIONES_4' 'VALOR_5'
 'NUM_OPERACIONES_5' 'PESO_6' 'NUM_OPERACIONES_6' 'PESO_7' 'PESO_8'
 'NUM_OPERACIONES_8' 'PESO_9' 'VALOR_10' 'PESO_11' 'NUM_OPERACIONES_11'
 'NUM_OPERACIONES_12' 'PESO_13' 'VALOR_13' 'NUM_OPERACIONES_13' 'PESO_14'
 'NUM_OPERACIONES_14' 'PESO_16' 'VALOR_16' 'PESO_17' 'VALOR_17' 'PESO_18'
 'VALOR_18' 'PESO_19' 'VALOR_21' 'VALOR_22' 'NUM_OPERACIONES_22'
 'VALOR_23' 'w_med' 'hr' 'w_racha_dir' 'PIB']
Window size: 6


In [11]:
'\',\''.join(columns.values)

"NUM_OPERACIONES_1','NUM_OPERACIONES_5','NUM_OPERACIONES_8','NUM_OPERACIONES_9','NUM_OPERACIONES_11','NUM_OPERACIONES_13','NUM_OPERACIONES_14','NUM_OPERACIONES_15','NUM_OPERACIONES_18','NUM_OPERACIONES_22','w_med','hr','w_racha_dir','PIB"

In [139]:
data.corr()

Unnamed: 0,PESO_1,VALOR_1,NUM_OPERACIONES_1,PESO_2,VALOR_2,NUM_OPERACIONES_2,PESO_3,VALOR_3,NUM_OPERACIONES_3,PESO_4,...,VALOR_23,NUM_OPERACIONES_23,tm_mes,w_med,p_mes,hr,e,w_racha_dir,w_racha_speed,PIB
PESO_1,1.000000,0.836053,0.352986,0.070787,0.183930,0.083861,-0.108815,0.099991,0.219211,-0.000747,...,0.078411,0.068706,0.074919,-0.071386,-0.056982,0.139034,0.153949,0.022594,-0.086566,-0.024413
VALOR_1,0.836053,1.000000,0.523462,0.133375,0.234757,0.119496,0.007015,0.083893,0.257803,-0.041609,...,0.051718,0.022489,0.108314,0.034067,-0.093375,0.124913,0.158619,-0.066904,-0.011168,-0.033183
NUM_OPERACIONES_1,0.352986,0.523462,1.000000,-0.045279,0.046350,-0.172097,0.052839,-0.198993,0.183576,-0.004576,...,0.145471,-0.260857,0.125859,0.201253,-0.165883,0.097128,0.139177,-0.091827,-0.015337,0.206297
PESO_2,0.070787,0.133375,-0.045279,1.000000,0.769356,0.411278,-0.072183,-0.060525,-0.066445,0.070275,...,-0.133842,0.106588,-0.087412,-0.016318,-0.062043,-0.014189,-0.128363,-0.068723,-0.062639,-0.216650
VALOR_2,0.183930,0.234757,0.046350,0.769356,1.000000,0.344714,-0.020514,0.026922,0.028583,-0.046764,...,0.006462,-0.019082,-0.103257,0.051032,-0.077647,0.020188,-0.115240,-0.100972,0.064820,-0.042135
NUM_OPERACIONES_2,0.083861,0.119496,-0.172097,0.411278,0.344714,1.000000,0.149702,0.340925,0.284750,0.052155,...,-0.424724,0.370107,0.285344,0.216154,-0.037898,0.111842,0.338715,-0.245420,0.028330,-0.483368
PESO_3,-0.108815,0.007015,0.052839,-0.072183,-0.020514,0.149702,1.000000,0.470673,0.257691,0.061115,...,-0.051571,-0.082354,0.319156,-0.013646,-0.262535,-0.148332,0.285995,-0.134043,-0.178454,0.099226
VALOR_3,0.099991,0.083893,-0.198993,-0.060525,0.026922,0.340925,0.470673,1.000000,0.397458,0.090174,...,-0.320049,0.491846,0.287079,0.018597,-0.196348,-0.222754,0.259723,-0.177295,-0.108505,-0.479160
NUM_OPERACIONES_3,0.219211,0.257803,0.183576,-0.066445,0.028583,0.284750,0.257691,0.397458,1.000000,-0.114858,...,-0.118067,0.073881,0.335565,0.197194,-0.223593,0.202600,0.395775,-0.366158,-0.097836,-0.250859
PESO_4,-0.000747,-0.041609,-0.004576,0.070275,-0.046764,0.052155,0.061115,0.090174,-0.114858,1.000000,...,-0.023522,0.056272,0.238984,0.086082,-0.138154,-0.075511,0.226773,0.058018,-0.091003,-0.089940


In [172]:
from scipy.io import arff
import pandas as pd

all_columns = np.array([])
for i in range(6):
    df_arff = arff.loadarff('propositional_DP_PESO20_a'+str(i+1)+".arff")
    df = pd.DataFrame(df_arff[0])
    all_columns = np.append(all_columns,df.columns.values)

In [173]:
columns = [col for col in all_columns if 'ahead' not in col]

In [178]:
columns

['NUM_OPERACIONES_13_lag11',
 'hr_lag11',
 'NUM_OPERACIONES_12_lag10',
 'NUM_OPERACIONES_13_lag10',
 'p_mes_lag10',
 'w_racha_dir_lag10',
 'w_racha_speed_lag10',
 'NUM_OPERACIONES_13_lag9',
 'NUM_OPERACIONES_22_lag9',
 'w_racha_dir_lag9',
 'NUM_OPERACIONES_2_lag8',
 'NUM_OPERACIONES_3_lag8',
 'NUM_OPERACIONES_8_lag8',
 'NUM_OPERACIONES_16_lag8',
 'NUM_OPERACIONES_17_lag8',
 'NUM_OPERACIONES_20_lag8',
 'hr_lag8',
 'NUM_OPERACIONES_1_lag7',
 'NUM_OPERACIONES_3_lag7',
 'NUM_OPERACIONES_6_lag7',
 'NUM_OPERACIONES_8_lag7',
 'PESO_13_lag7',
 'NUM_OPERACIONES_17_lag7',
 'NUM_OPERACIONES_18_lag7',
 'NUM_OPERACIONES_19_lag7',
 'NUM_OPERACIONES_21_lag7',
 'NUM_OPERACIONES_22_lag7',
 'w_med_lag7',
 'p_mes_lag7',
 'NUM_OPERACIONES_13_lag6',
 'NUM_OPERACIONES_22_lag6',
 'NUM_OPERACIONES_3_lag5',
 'NUM_OPERACIONES_17_lag5',
 'NUM_OPERACIONES_18_lag5',
 'w_med_lag5',
 'VALOR_10_lag4',
 'NUM_OPERACIONES_10_lag4',
 'PESO_12_lag4',
 'NUM_OPERACIONES_17_lag4',
 'NUM_OPERACIONES_18_lag4',
 'w_racha_dir_la

In [154]:
propositional_filtered.loc[:, columns].to_csv("selected_propositional_DP_PESO8.csv")

In [174]:
columns_events = set(["_".join(col.split("_")[:-1]) for col in columns])

In [177]:
data.loc[:, columns_events].to_csv("event_table_selected_PESO20.csv")

In [175]:
len(data.columns)

77

In [176]:
len(columns_events)

42