In [1]:
%%javascript
/** Below because this notebook is pretty large */
var rto = 200;
console.log('NB: Increase require timeout to ' + rto + ' seconds');
window.requirejs.config({waitSeconds: rto});

<IPython.core.display.Javascript object>

## Importacion de Librerías y Configuración de Consola

In [2]:
import imblearn
from imblearn.pipeline import Pipeline

import time 
import os
import janitor
import pandas as pd
import numpy as np

from pathlib import Path


In [3]:
# Especificamos el directorio base, retrocediendo una carpeta
BASE = Path('..')
# Y, a partir de allí, definimos las carpetas que debieran existir
RAW = BASE/'raw'
NOTEBOOKS = BASE/'notebooks'
DATA = BASE/'data'
REFERENCIAS = BASE/'referencias'
PERFORMANCE = BASE/'performance'

In [4]:
# Creamos todos los directorios que definimos previamente, si no existieran aún.
path_list = [RAW, NOTEBOOKS, DATA, REFERENCIAS, PERFORMANCE]
for path_i in path_list:
    if not os.path.exists(path_i):
        os.makedirs(path_i)

In [5]:
#Tomo el tiempo al inicio de la ejecucion
start_time=time.time()

In [6]:
#Seteamos para que no utilice notacion cientifica
pd.options.display.float_format = '{:.9f}'.format
#Seteo para que el máximo de columnas que muestra al levantar una base sean 500
pd.set_option('display.max_columns',500)
#Estos códigos hacen que la visualización de la consola abarque toda la pantalla (sin los recortes a los costados). Tambien hacen que al mostrar dataframes podamos ver todas las columnas que tiene.
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))


In [7]:
# Codigo para poder imprimir multiples outputs en una misma línea
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [8]:
# Definiremos el directorio en el que estamos trabajando. 
scripts=os.getcwd()  #Obtenemos la direccion actual
os.chdir('..') # Nos vamos una carpeta atrás
raiz=os.getcwd() # Esta direccion la guardaremos como "raiz" 
os.chdir(raiz+'\\raw') 
raw=os.getcwd()
os.chdir(raiz+'\\raw')
outputs=os.getcwd()

In [9]:
from datetime import date
today = date.today()
Today_Date = str(today)

# Importacion de Archivos

In [10]:
# Lee file de merval
df_merval = pd.read_pickle(DATA/'df_merval.pkl')
# Lee file de acciones argentinas
dict_acciones_arg = np.load(DATA/'dict_acciones_arg.npy',allow_pickle='TRUE').item()
# Lee file de dolar blue
df_db = pd.read_pickle(DATA/'df_dolarblue.pkl')

In [11]:
df_merval.tail(2)
df_db.tail(2)

Unnamed: 0,date,open,high,low,close,volume,ticker
4655,2021-02-02,49576.890625,50612.80859375,49576.890625,49686.828125,0,merval
4656,2021-02-03,49686.828125,50377.96875,49672.7890625,49857.55078125,0,merval


Unnamed: 0,date,venta_dblue
4742,2021-02-03,153.0
4743,2021-02-04,153.0


In [12]:
dict_acciones_arg['ggal'].tail(2)

Unnamed: 0,date,open,high,low,close,volume,ticker
4744,2021-02-02,120.0,122.0,118.5,119.199996948,1310376,ggal
4745,2021-02-03,119.050003052,121.400001526,118.199996948,118.349998474,1047763,ggal


# Creación de funciones ad hoc

In [13]:
# Función que genera la VD binaria para distintas ventanas temporales. 
# Para cada horizonte, se exige que el rendimiento porcentual haya superado el threshold.

def WINDOW_VDS(df_input, list_windows=[1, 5, 10, 22], threshold_vd=0.008):

    list_names =[]
    for window in list_windows:
        name = str(window) +'d'
        list_names.append(name)
        # calculamos el máximo rendimiento alcanzado en la ventana temporal posterior a cada día
        df_input['up_' + str(name)] =  (df_input['close'].rolling(window=window, min_periods=1).max().shift(-window) - df_input['close']) / df_input['close']    
        df_input['vd_' + str(name)] =  np.where(  df_input['up_' + str(name)] > threshold_vd , 1 , 0)
    
    # vemos cuales columnas auxiliares hay que quitar (todas las que dicen 'up_')
    list_to_drop = ['up_' + str(name) for name in list_names]
    df_input.drop(columns=list_to_drop, inplace=True)
    return df_input


In [14]:
# Raw Attributes Generator
def ATTRIBUTES_GENERATOR(df):
    df_input = df.copy()
    # ratios respecto al valor open
    df_input['ratio_ho'] = df_input['high'] / df_input['open']
    df_input['ratio_lo'] = df_input['low'] / df_input['open']
    df_input['ratio_co'] = df_input['close'] / df_input['open']
    # ratios respecto al valor close
    df_input['ratio_hc'] = df_input['high'] / df_input['close']
    df_input['ratio_lc'] = df_input['low'] / df_input['close']
    # ratio high low
    df_input['ratio_hl'] = df_input['high'] / df_input['low']
    
    # ratio todo respecto a su media
    df_input['ratio_o_mean'] = df_input['open'] / (df_input['high'].mean())
    df_input['ratio_c_mean'] = df_input['close'] / (df_input['high'].mean())
    df_input['ratio_l_mean'] = df_input['low'] / (df_input['high'].mean())
    df_input['ratio_h_mean'] = df_input['high'] / (df_input['high'].mean())
    
    # ratio todo respecto a la media del close
    df_input['ratio_o_meanc'] = df_input['open'] / (df_input['close'].mean())
    df_input['ratio_c_meanc'] = df_input['close'] / (df_input['close'].mean())
    df_input['ratio_l_meanc'] = df_input['low'] / (df_input['close'].mean())
    df_input['ratio_h_meanc'] = df_input['high'] / (df_input['close'].mean())
    
    return df_input
    
# Rezagos y Tendencias!    
def TRENDS_GENERATOR(df, list_windows=[5, 10, 12, 26, 120, 252], cols_to_exclude=['date','ticker']): 
    df_input = df.copy()
    # Tomamos las columnas que utilizaremos (todas menos las que hay que excluir)
    cols_to_include = list(set(df_input.columns).difference(set(cols_to_exclude)))
#     print(cols_to_include)
    
    # Tomamos los rezagos como una variable nueva
    for col_i in cols_to_include: 
        # En cada fila vamos a poner los valores que sucedieron hace 1, 2 y 5 días.
        df_input[col_i + '_1'] = df_input[col_i].shift(1)
        df_input[col_i + '_2'] = df_input[col_i].shift(2)
        df_input[col_i + '_5'] = df_input[col_i].shift(5)
        # Calculamos los promedios de los ultimos n dias (incluyendo la fila en cuestion).
        # el for está anidado para poder tomar, para cada variable, todas las windows que queremos
        for window_i in list_windows:
            df_input[col_i + '_m' + str(window_i) + 'd'] = df_input[col_i].rolling(window=window_i, min_periods=window_i).mean()
        

    
    # Una vez generadas las variables rezagadas (o promedios de los ult. n dias) definimos cuales fueron los sufijos utilizados
    
    # Comenzamos las comparaciones entre variables (para no utilizar nominales en el modelo final)
    # Primero compararemos cada variable original contra su versión rezagada (1, 2 y 5 días)
    list_fixed = ['_1','_2','_5']
    for col_i in cols_to_include:
        for suffix in list_fixed:
            df_input['var_orig_'+ col_i + '_vs' + suffix ] = (df_input[col_i] / df_input[col_i + suffix]) - 1
    
    # Luego compararemos los promedios de los ultimos n dias entre ellos de forma ascendente (5d vs 10d ...252d, y luego 10d vs 12d...252d, etc)
    list_w_suff = ['_m'+str(x)+'d' for x in list_windows]
    for index_a in np.arange(0,len(list_w_suff)):
        for index_b in np.arange(index_a, len(list_w_suff)):
            if index_a < index_b:
                df_input['var_means_' + col_i + list_w_suff[index_a] + list_w_suff[index_b]] = (df_input[col_i + list_w_suff[index_a]] / df_input[col_i + list_w_suff[index_b]]) - 1 
        
    # Por ultimo, solo nos quedamos con las variaciones generadas, no con los promedios auxiliares que hicimos (por ser nominales)
    # es decir, tomamos las columnas originales (cols_to_include) y las que tengan 'var' en su comienzo (primeros 3 caracteres)
    
    cols_to_keep = [element for element in df_input.columns.tolist() if 'var' in element[:3]]
    # agregamos las columnas originales a la lista
    cols_to_keep.extend(cols_to_include)
    cols_to_keep.extend(cols_to_exclude)
    # vemos las cols to drop (es mas rapido esto que filtrar el dataframe)
    cols_to_drop = list(set(df_input.columns).difference(set(cols_to_keep)))
    # dropeamos
    df_input.drop(columns=cols_to_drop, inplace=True)
    
    return df_input
    

In [15]:
def DROP_NZV(df, threshold=0.995, cols_to_exclude=['date','ticker']):
    # copiamos el input en otro dataframe
    df_input = df.copy()
    # generamos lista vacía que luego rellenaremos
    list_to_drop = []
    
    cols_to_include= list(set(df_input.columns).difference(set(cols_to_exclude)))
    
    # iteramos en cada columna
    for col_i in cols_to_include:
        # si su valor más repetido (porcentualmente) supera al threshold, entonces agregaremos esta variable a la lista que hay que remover
        if list(pd.DataFrame(df_input[col_i].value_counts(dropna=False, normalize=True))[col_i])[0] > threshold:
            list_to_drop.append(col_i)
    # del input removemos las columnas que superaron el threshold
    df_input.drop(columns=list_to_drop, inplace=True)
    return df_input

In [16]:
def DOLLARIZE(df_a_dolarizar, df_dolar, cols_to_exclude=['date','ticker','volume']):
    df_input = df_a_dolarizar.copy()
    # guardamos las columnas originales con las que viene el dataframe
#     cols_input = df_input.columns.tolist()
    cols_to_include = list(set(df_input.columns).difference(cols_to_exclude))
    
    # cruzamos el dataframe con el de dolar
    df_input = pd.merge(df_input, df_dolar, how='left', on='date')
    for col_i in cols_to_include:
        df_input[col_i + '_usd'] = df_input[col_i] / df_input['venta_dblue']
    
    # Ahora quitaremos las columnas que hemos dolarizado (de nada nos sirven en pesos)
    # agregaremos a la lista a la variable de venta_dblue, que ya fue utilizada
    cols_to_include.append('venta_dblue')
    df_input.drop(columns=cols_to_include, inplace=True)
    
    cols_wo_usd = [x.replace('_usd','') for x in df_input.columns.tolist()]
    
    df_input.columns = cols_wo_usd
    
    return df_input

In [17]:
class Feature_Engine(): 
#     def __init__(self):
        
    def fit_transform(self, df, df_dolar, list_windows=[5, 10, 12, 26, 120, 252], cols_to_exclude=['ticker', 'date']):
        """ Basicamente es un pipeline de preprocesamiento pero fijo:
                1) Drop_NZV
                2) Dolariza
                3) Interpola posibles huecos de dolarizacion (ej: si no tenemos el dólar para esa fecha)
                4) Genera atributos entre columnas
                5) Genera atributos comparando entre filas
                6) Drop_NZV (nuevamente)
        - df: dataframe de input
        - df_dolar: dataframe con datos de dolar
        - list_windows: ventanas temporales para calcular los atributos históricos (los que son entre filas)
        - cols_to_exclude: aquí irá la data no-numérica, como la fecha y el ticker
        
        """
        df_input = df.copy()
        df_input.set_index(cols_to_exclude, inplace=True)
        
        self.list_windows = list_windows
        self.cols_to_exclude = cols_to_exclude
        self.df_dolar = df_dolar
        
        # Aplicamos las funciones para el tratamiento de datos (suerte de Pipeline...la ventaja de que fuera pipeline sería poder cambiarle los pasos, pero aún no nos importa)
        # quitamos variables cuyo valor más repetido sea el 99.5% de los valores totales
        self.df_input_dnzv = DROP_NZV(df = df_input, threshold=0.995, cols_to_exclude=self.cols_to_exclude)
        # dolarizamos variables
        self.df_input_usd = DOLLARIZE(df_a_dolarizar = self.df_input_dnzv, df_dolar = self.df_dolar)
        # por si faltara alguna fecha para dolarizar, deberemos interpolar los valores faltantes. 
        self.df_input_usd.interpolate(method ='linear', limit_direction ='forward',inplace=True)
        # generamos atributos (compara entre columnas)
        self.df_input_ag = ATTRIBUTES_GENERATOR(self.df_input_usd)
        # generamos tendencias (variaciones) respecto a valores históricos
        self.df_input_tg = TRENDS_GENERATOR(self.df_input_ag, list_windows=self.list_windows, cols_to_exclude=self.cols_to_exclude)
        # quitamos las columnas que posean "poca varianza" nuevamente
        self.df_attributes = DROP_NZV(self.df_input_tg).copy()
        
        return self.df_attributes
    
    def multiple_vds(self, list_windows=[1, 5, 10, 22], threshold_vd=0.008):
        """ Genera distintas VDs según si la cotización 'close' superó el threshold de rendimiento en algún momento de la ventana temporal.
            - list_windows: ventanas temporales en las cuales ver si se superó el threshold
            - list_names: sufijo que se colocará a cada ventana
            - threshold: rendimiento mínimo exigido
        
        """
        self.list_w_vds = list_windows
        self.threshold_vd = threshold_vd
        
        # generamos multiples vds
        self.df_vds = WINDOW_VDS(self.df_attributes, self.list_w_vds , self.threshold_vd)
        return self.df_vds
        
    def period_filter(self, df,  date_to_start = '2002-01-11', date_to_end = Today_Date):
        df_aux = df
        self.df_period_filter = df_aux[(df_aux['date']>= date_to_start)& (df_aux['date']<= date_to_end)]
        return self.df_period_filter

# Transformacion de dataframes

## Dolar Blue: Interpolación para missings

In [18]:
df_db.sort_values('date',ascending=True, inplace=True)
df_db.interpolate(method ='linear', limit_direction ='forward',inplace=True)
df_db

Unnamed: 0,date,venta_dblue
4649,2002-01-11,1.700000000
4648,2002-01-14,1.680000000
4647,2002-01-15,1.950000000
4646,2002-01-16,1.870000000
4645,2002-01-17,1.970000000
...,...,...
4739,2021-01-29,153.000000000
4740,2021-02-01,153.000000000
4741,2021-02-02,153.000000000
4742,2021-02-03,153.000000000


## Sumamos datos de merval a todas las acciones

In [19]:
# Quitamos ticker y volume, que son irrelevantes
df_merval.drop(columns=['ticker','volume'], inplace=True)
# agregamos merv a todas las variables restantes
df_merval.columns = ['merv_'+str(x) for x in df_merval.columns.tolist() ]
# mostramos
df_merval.head()
# vemos si tiene nans 
df_merval.isna().sum()

Unnamed: 0,merv_date,merv_open,merv_high,merv_low,merv_close
0,2002-01-17,322.970001221,375.829986572,318.920013428,371.200012207
1,2002-01-18,377.149993896,422.299987793,377.119995117,414.859985352
2,2002-01-21,414.859985352,500.589996338,414.859985352,470.549987793
3,2002-01-22,480.859985352,482.540008545,441.209991455,445.880004883
4,2002-01-23,438.670013428,453.230010986,423.230010986,438.890014648


merv_date     0
merv_open     0
merv_high     0
merv_low      0
merv_close    0
dtype: int64

In [20]:
# Cruzamos los datos:
# comenzamos armando un diccionario vacío
dict_acciones_arg_merv = {}
# vamos por todos los tickers de las acciones que tenemos
for key_i in dict_acciones_arg.keys():
    # copiamos la data en un dataframe auxiliar para cada accion
    df_aux = dict_acciones_arg[key_i].copy()
    # cruzamos la data de la accion con la de merval
    df_aux = pd.merge(df_aux, df_merval, how='left', left_on='date', right_on='merv_date')
    # quitamos la columna de fecha del merval, ya que es un dato repetido o nan
    df_aux.drop(columns=['merv_date'], inplace=True)
    # guardamos ese cruce en el dictionary que teníamos vacío
    dict_acciones_arg_merv[key_i] = df_aux.copy()
dict_acciones_arg_merv['ggal']

Unnamed: 0,date,open,high,low,close,volume,ticker,merv_open,merv_high,merv_low,merv_close
0,2002-01-11,0.499000013,0.499000013,0.499000013,0.480228692,0,ggal,,,,
1,2002-01-14,0.499000013,0.499000013,0.499000013,0.480228692,0,ggal,,,,
2,2002-01-15,0.499000013,0.499000013,0.499000013,0.480228692,0,ggal,,,,
3,2002-01-16,0.499000013,0.499000013,0.499000013,0.480228692,0,ggal,,,,
4,2002-01-17,0.481999993,0.519999981,0.449999988,0.463868231,4919000,ggal,322.970001221,375.829986572,318.920013428,371.200012207
...,...,...,...,...,...,...,...,...,...,...,...
4741,2021-01-28,117.599998474,120.699996948,117.599998474,119.500000000,808116,ggal,49970.410156250,50523.589843750,49344.109375000,49630.089843750
4742,2021-01-29,120.449996948,120.449996948,115.050003052,115.699996948,1551028,ggal,49630.089843750,49999.929687500,48202.859375000,48257.140625000
4743,2021-02-01,116.099998474,119.300003052,116.099998474,119.150001526,874855,ggal,48257.140625000,49773.820312500,48257.140625000,49576.890625000
4744,2021-02-02,120.000000000,122.000000000,118.500000000,119.199996948,1310376,ggal,49576.890625000,50612.808593750,49576.890625000,49686.828125000


# Transformamos el dictionary de acciones

In [21]:
# Inicializamos la clase para las transformaciones
FE = Feature_Engine()

In [22]:
dict_aa_ok = {}
for key_i in dict_acciones_arg_merv.keys():
    df_aux = dict_acciones_arg_merv[key_i].copy()
    
    df_aux_atr = FE.fit_transform( df= df_aux, 
                                   df_dolar = df_db, 
                                   list_windows = [5, 10, 12, 26, 120, 252],
                                   cols_to_exclude=['date','ticker']
                                 )
    df_aux_vds = FE.multiple_vds(threshold_vd=0.008)
    df_aux_ok = FE.period_filter(df_aux_vds, date_to_start='2010-01-01')
    dict_aa_ok[key_i] = df_aux_ok.copy()
dict_aa_ok['ggal'] 

Unnamed: 0,date,volume,high,low,merv_close,merv_open,merv_high,open,close,merv_low,ratio_ho,ratio_lo,ratio_co,ratio_hc,ratio_lc,ratio_hl,ratio_o_mean,ratio_c_mean,ratio_l_mean,ratio_h_mean,ratio_o_meanc,ratio_c_meanc,ratio_l_meanc,ratio_h_meanc,var_orig_high_vs_1,var_orig_high_vs_2,var_orig_high_vs_5,var_orig_ratio_c_meanc_vs_1,var_orig_ratio_c_meanc_vs_2,var_orig_ratio_c_meanc_vs_5,var_orig_merv_close_vs_1,var_orig_merv_close_vs_2,var_orig_merv_close_vs_5,var_orig_merv_open_vs_1,var_orig_merv_open_vs_2,var_orig_merv_open_vs_5,var_orig_open_vs_1,var_orig_open_vs_2,var_orig_open_vs_5,var_orig_ratio_h_mean_vs_1,var_orig_ratio_h_mean_vs_2,var_orig_ratio_h_mean_vs_5,var_orig_ratio_hl_vs_1,var_orig_ratio_hl_vs_2,var_orig_ratio_hl_vs_5,var_orig_ratio_l_meanc_vs_1,var_orig_ratio_l_meanc_vs_2,var_orig_ratio_l_meanc_vs_5,var_orig_ratio_co_vs_1,var_orig_ratio_co_vs_2,var_orig_ratio_co_vs_5,var_orig_merv_low_vs_1,var_orig_merv_low_vs_2,var_orig_merv_low_vs_5,var_orig_ratio_o_mean_vs_1,var_orig_ratio_o_mean_vs_2,var_orig_ratio_o_mean_vs_5,var_orig_ratio_l_mean_vs_1,var_orig_ratio_l_mean_vs_2,var_orig_ratio_l_mean_vs_5,var_orig_ratio_o_meanc_vs_1,var_orig_ratio_o_meanc_vs_2,var_orig_ratio_o_meanc_vs_5,var_orig_ratio_ho_vs_1,var_orig_ratio_ho_vs_2,var_orig_ratio_ho_vs_5,var_orig_volume_vs_1,var_orig_volume_vs_2,var_orig_volume_vs_5,var_orig_low_vs_1,var_orig_low_vs_2,var_orig_low_vs_5,var_orig_merv_high_vs_1,var_orig_merv_high_vs_2,var_orig_merv_high_vs_5,var_orig_ratio_lc_vs_1,var_orig_ratio_lc_vs_2,var_orig_ratio_lc_vs_5,var_orig_close_vs_1,var_orig_close_vs_2,var_orig_close_vs_5,var_orig_ratio_lo_vs_1,var_orig_ratio_lo_vs_2,var_orig_ratio_lo_vs_5,var_orig_ratio_c_mean_vs_1,var_orig_ratio_c_mean_vs_2,var_orig_ratio_c_mean_vs_5,var_orig_ratio_h_meanc_vs_1,var_orig_ratio_h_meanc_vs_2,var_orig_ratio_h_meanc_vs_5,var_orig_ratio_hc_vs_1,var_orig_ratio_hc_vs_2,var_orig_ratio_hc_vs_5,var_means_ratio_hc_m5d_m10d,var_means_ratio_hc_m5d_m12d,var_means_ratio_hc_m5d_m26d,var_means_ratio_hc_m5d_m120d,var_means_ratio_hc_m5d_m252d,var_means_ratio_hc_m10d_m12d,var_means_ratio_hc_m10d_m26d,var_means_ratio_hc_m10d_m120d,var_means_ratio_hc_m10d_m252d,var_means_ratio_hc_m12d_m26d,var_means_ratio_hc_m12d_m120d,var_means_ratio_hc_m12d_m252d,var_means_ratio_hc_m26d_m120d,var_means_ratio_hc_m26d_m252d,var_means_ratio_hc_m120d_m252d,vd_1d,vd_5d,vd_10d,vd_22d
2040,2010-01-04,1485340,0.569948199,0.559585514,617.743520786,603.577735387,620.108838650,0.564766857,0.543521599,603.577735387,1.009174303,0.990825697,0.962382252,1.048621066,1.029555247,1.018518500,0.419731845,0.403942478,0.415881098,0.423582592,0.437637360,0.421174428,0.433622342,0.441652377,0.006864982,0.013824871,0.062427740,0.006928423,0.013953522,0.073616128,0.013550909,0.027474118,0.048225536,0.002931579,0.005880397,0.033029294,0.006928400,0.013953474,0.057904771,0.006864982,0.013824871,0.062427740,-0.002457888,-0.004949757,-0.001258540,0.009345841,0.018868020,0.063766533,0.000000023,0.000000047,0.014851391,0.005255392,0.010566314,0.033621242,0.006928400,0.013953474,0.057904771,0.009345841,0.018868020,0.063766533,0.006928400,0.013953474,0.057904771,-0.000062981,-0.000126834,0.004275403,inf,0.412978543,0.030992810,0.009345841,0.018868020,0.063766533,0.013867758,0.028125554,0.050246907,0.002400785,0.004846868,-0.009174225,0.006928423,0.013953522,0.073616128,0.002400808,0.004846914,0.005540916,0.006928423,0.013953522,0.073616128,0.006864982,0.013824871,0.062427740,-0.000063005,-0.000126880,-0.010421218,0.000296576,-0.000447137,-0.001902745,-0.004416376,-0.025400220,-0.000743493,-0.002198670,-0.004711555,-0.025689177,-0.001456259,-0.003971014,-0.024964245,-0.002518423,-0.023542270,-0.021076927,1,1,1,1
2041,2010-01-05,2624534,0.588082897,0.564766857,622.215025907,617.849715633,624.715000607,0.564766857,0.558480945,617.849715633,1.041284363,1.000000000,0.988869899,1.053004408,1.011255374,1.041284363,0.419731845,0.415060187,0.419731845,0.437060207,0.437637360,0.432766412,0.437637360,0.455704939,0.031818151,0.038901564,0.088518863,0.027523000,0.034642114,0.084561553,0.007238449,0.020887446,0.049482501,0.023645637,0.026646536,0.057223623,0.000000000,0.006928400,0.065859039,0.031818151,0.038901564,0.088518863,0.022351939,0.019839112,0.021259682,0.009259250,0.018691627,0.065859039,0.027523000,0.027523024,0.017546893,0.023645637,0.029025296,0.057223623,0.000000000,0.006928400,0.065859039,0.009259250,0.018691627,0.065859039,0.000000000,0.006928400,0.065859039,0.031818151,0.031753165,0.021259682,0.766958407,inf,1.486557416,0.009259250,0.018691627,0.065859039,0.007427990,0.021398757,0.051157139,-0.017774542,-0.015416430,-0.017244309,0.027523000,0.034642114,0.084561553,0.009259250,0.011682288,0.000000000,0.027523000,0.034642114,0.084561553,0.031818151,0.038901564,0.088518863,0.004180101,0.004116834,0.003648764,-0.000301162,0.001109249,-0.001328423,-0.003735009,-0.024737572,0.001410837,-0.001027570,-0.003434881,-0.024443771,-0.002434971,-0.004838891,-0.025818183,-0.002409788,-0.023440288,-0.021081302,0,0,0,0
2042,2010-01-06,3233258,0.567010322,0.548969102,608.881433231,618.649512222,618.829912009,0.561855687,0.533278824,604.976819225,1.009174303,0.977064243,0.949138429,1.063253022,1.029422277,1.032863817,0.417568279,0.396330100,0.407991034,0.421399177,0.435381497,0.413237310,0.425395693,0.439375819,-0.035832661,-0.005154639,0.065208740,-0.045126196,-0.018845204,0.040999257,-0.021429236,-0.014345901,0.016382600,0.001294484,0.024970730,0.048365668,-0.005154639,-0.005154639,0.055525034,-0.035832661,-0.005154639,0.065208740,-0.008086692,0.014084493,0.032863817,-0.027972171,-0.018971922,0.031315768,-0.040178663,-0.013761500,-0.013761660,-0.020834996,0.002317985,0.025195874,-0.005154639,-0.005154639,0.055525034,-0.027972171,-0.018971922,0.031315768,-0.005154639,-0.005154639,0.055525034,-0.030836975,0.000000000,0.009174303,0.231936031,1.176779727,inf,-0.027972171,-0.018971922,0.031315768,-0.009420437,-0.002062423,0.029963792,0.017964704,-0.000129152,-0.009302110,-0.045126196,-0.018845204,0.040999257,-0.022935757,-0.013888875,-0.022935757,-0.045126196,-0.018845204,0.040999257,-0.035832661,-0.005154639,0.065208740,0.009732736,0.013953522,0.023256004,0.002000183,0.003796019,0.002390622,0.000864050,-0.020182609,0.001792251,0.000389660,-0.001133865,-0.022138511,-0.001400082,-0.002920881,-0.023887949,-0.001522931,-0.022519396,-0.021028489,1,1,1,1
2043,2010-01-07,2773188,0.568475465,0.547803588,617.428927951,610.454770268,618.297172763,0.552971603,0.544603974,602.563287306,1.028037355,0.990654104,0.984867885,1.043832752,1.005875121,1.037735928,0.410965673,0.404746893,0.407124830,0.422488063,0.428497228,0.422013159,0.424492537,0.440511157,0.002583979,-0.033341272,0.011205169,0.021236826,-0.024847707,0.015972719,0.014038028,-0.007692032,0.026950865,-0.013246179,-0.011968842,0.017341182,-0.015812039,-0.020885173,-0.007223119,0.002583979,-0.033341272,0.011205169,0.004717089,-0.003407749,0.013824871,-0.002123095,-0.030035878,-0.002583979,0.037644095,-0.004047058,0.023364603,-0.003989462,-0.024741337,0.008867830,-0.015812039,-0.020885173,-0.007223119,-0.002123095,-0.030035878,-0.002583979,-0.015812039,-0.020885173,-0.007223119,0.018691570,-0.012721797,0.018562366,-0.142293006,0.056640150,1.638086323,-0.002123095,-0.030035878,-0.002583979,-0.000860882,-0.010273209,0.025121856,-0.022874147,-0.005320370,-0.018264957,0.021236826,-0.024847707,0.015972719,0.013908871,-0.009345896,0.004672893,0.021236826,-0.024847707,0.015972719,0.002583979,-0.033341272,0.011205169,-0.018264957,-0.008709989,-0.004692596,0.002061142,0.002479223,0.001467824,-0.000109566,-0.021116104,0.000417221,-0.000592097,-0.002166243,-0.023129572,-0.001008898,-0.002582387,-0.023536973,-0.001575078,-0.022550827,-0.021008839,0,0,0,0
2044,2010-01-08,1289394,0.560723534,0.547803588,607.948322937,614.506432191,614.506432191,0.560723534,0.532170044,603.268708616,1.000000000,0.976958438,0.949077419,1.053654823,1.029376970,1.023584997,0.416726868,0.395506060,0.407124830,0.416726868,0.434504192,0.412378117,0.424492537,0.434504192,-0.013636350,-0.011087607,-0.009431222,-0.022831140,-0.002079175,-0.014101475,-0.015354974,-0.001532499,-0.002520375,0.006637121,-0.006696975,0.021091187,0.014018678,-0.002015025,-0.000280480,-0.013636350,-0.011087607,-0.009431222,-0.013636350,-0.008983585,0.002504264,0.000000000,-0.002123095,-0.011905671,-0.036340373,-0.000064279,-0.013824872,0.001170701,-0.002823432,0.004740709,0.014018678,-0.002015025,-0.000280480,0.000000000,-0.002123095,-0.011905671,0.014018678,-0.002015025,-0.000280480,-0.027272701,-0.009090900,-0.009153309,-0.535049914,-0.601209059,inf,0.000000000,-0.002123095,-0.011905671,-0.006130936,-0.006986540,0.004707915,0.023364580,-0.000044012,0.002227210,-0.022831140,-0.002079175,-0.014101475,-0.013824871,-0.000108288,-0.011628452,-0.022831140,-0.002079175,-0.014101475,-0.013636350,-0.011087607,-0.009431222,0.009409622,-0.009027202,0.004737052,0.001617527,0.002266517,0.003041707,0.000719708,-0.020239508,0.000647942,0.001421880,-0.000896369,-0.021821738,0.000773437,-0.001543311,-0.022455130,-0.002314958,-0.023210615,-0.020944142,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4741,2021-01-28,808116,0.781735732,0.761658021,321.438405724,323.642552825,327.225322822,0.761658021,0.773963731,319.586200615,1.026360532,1.000000000,1.016156476,1.010041815,0.984100406,1.026360532,0.566060353,0.575205894,0.566060353,0.580982005,0.590208156,0.599743840,0.590208156,0.605766357,0.004064230,0.025658306,0.022648001,0.003286292,0.024943454,0.051727690,-0.009383467,0.013758565,0.034489625,0.007624020,0.063582890,0.005250562,-0.001316830,0.057105424,0.006085121,0.004064230,0.025658306,0.022648001,-0.015584157,-0.029748328,-0.017103819,0.019959437,0.057105424,0.040443559,0.004609191,-0.030424563,0.045366507,0.012598433,0.050252545,0.032722126,-0.001316830,0.057105424,0.006085121,0.019959437,0.057105424,0.040443559,-0.001316830,0.057105424,0.006085121,0.005388155,-0.029748328,0.016462702,-0.232430112,-0.305203263,-0.579546850,0.019959437,0.057105424,0.040443559,0.003467665,0.030175228,0.013531243,0.016618532,0.031379263,-0.010729137,0.003286292,0.024943454,0.051727690,0.021304321,0.000000000,0.034150627,0.003286292,0.024943454,0.051727690,0.004064230,0.025658306,0.022648001,0.000775390,0.000697455,-0.027649447,-0.004223340,-0.003505105,-0.003820514,-0.009832505,-0.012499951,0.000721281,0.000404535,-0.005632955,-0.008311714,-0.000316518,-0.006349656,-0.009026484,-0.006035048,-0.008712724,-0.002693934,0,0,0,0
4742,2021-01-29,1551028,0.787254882,0.751960804,315.406147876,324.379672181,326.796926062,0.787254882,0.756209130,315.051368464,1.000000000,0.955168169,0.960564549,1.041054452,0.994382075,1.046936060,0.585083810,0.562010765,0.558853431,0.585083810,0.610043142,0.585985815,0.582693791,0.610043142,0.007060123,0.011153047,0.060550590,-0.022939835,-0.019728930,0.043047090,-0.018766450,-0.027973822,0.015111103,0.002277572,0.009918956,0.043955543,0.033606763,0.032245679,0.077296154,0.007060123,0.011153047,0.060550590,0.020047077,0.004150504,0.021621517,-0.012731720,0.006973599,0.038105181,-0.054708038,-0.050351007,-0.031791689,-0.014189700,-0.001770035,0.020415001,0.033606763,0.032245679,0.077296154,-0.012731720,0.006973599,0.038105181,0.033606763,0.032245679,0.077296154,-0.025683501,-0.020433733,-0.015544067,0.919313564,0.473207298,0.052067231,-0.012731720,0.006973599,0.038105181,-0.001309180,0.002153945,0.038789111,0.010447785,0.027239944,-0.004737954,-0.022939835,-0.019728930,0.043047090,-0.044831831,-0.024482621,-0.036379015,-0.022939835,-0.019728930,0.043047090,0.007060123,0.011153047,0.060550590,0.030704310,0.031503507,0.016781121,-0.002912390,-0.002068585,-0.000263381,-0.006600790,-0.009257279,0.000846270,0.002656747,-0.003699173,-0.006363421,0.001808947,-0.004541599,-0.007203595,-0.006339079,-0.008996267,-0.002674140,1,0,0,0
4743,2021-02-01,874855,0.779738582,0.758823519,324.031964869,315.406147876,325.319087010,0.758823519,0.778758180,315.406147876,1.027562486,1.000000000,1.026270483,1.001258930,0.974401989,1.027562486,0.563953766,0.578769104,0.563953766,0.579497734,0.588011703,0.603459055,0.588011703,0.604218768,-0.009547480,-0.002554763,0.059575048,0.029818536,0.006194669,0.081801182,0.027348284,0.008068604,0.064862610,-0.027663646,-0.025449079,0.015111103,-0.036114559,-0.003721489,0.038390079,-0.009547480,-0.002554763,0.059575048,-0.018505022,0.001171084,-0.006450933,0.009126427,-0.003721489,0.066454676,0.068403456,0.009953199,0.041806161,0.001126100,-0.013079578,0.038305332,-0.036114559,-0.003721489,0.038390079,0.009126427,-0.003721489,0.066454676,-0.036114559,-0.003721489,0.038390079,0.027562486,0.001171084,0.020401745,-0.435951511,0.082585916,-0.092553139,0.009126427,-0.003721489,0.066454676,-0.004522194,-0.005825453,0.034955064,-0.020092967,-0.009855109,-0.014186069,0.029818536,0.006194669,0.081801182,0.046936060,0.000000000,0.027027027,0.029818536,0.006194669,0.081801182,-0.009547480,-0.002554763,0.059575048,-0.038226168,-0.008695566,-0.020545489,-0.005176693,-0.005281734,-0.004072931,-0.009735595,-0.013191108,-0.000105588,0.001109505,-0.004582625,-0.008056120,0.001215221,-0.004477510,-0.007951372,-0.005685822,-0.009155467,-0.003489486,0,0,0,0
4744,2021-02-02,1310376,0.797385621,0.774509804,324.750510621,324.031964869,330.802670547,0.784313725,0.779084947,324.031964869,1.016666667,0.987500000,0.993333308,1.023489959,0.994127542,1.029535865,0.582897957,0.579011956,0.575611733,0.592612923,0.607764042,0.603712267,0.600166992,0.617893443,0.022631994,0.012868436,0.046191382,0.000419601,0.030250649,0.031725370,0.002217515,0.029626445,0.024204344,0.027348284,-0.001071915,0.064862610,0.033591745,-0.003735965,0.088549284,0.022631994,0.012868436,0.046191382,0.001920447,-0.016620113,-0.026746583,0.020671848,0.029986935,0.074942418,-0.032094049,0.034114063,-0.052201507,0.027348284,0.028505181,0.064862610,0.033591745,-0.003735965,0.088549284,0.020671848,0.029986935,0.074942418,0.033591745,-0.003735965,0.088549284,-0.010603559,0.016666667,-0.038912250,0.497820782,-0.155156451,0.126626584,0.020671848,0.029986935,0.074942418,0.016856015,0.012257595,0.041437483,0.020243753,-0.000255971,0.041888131,0.000419601,0.030250649,0.031725370,-0.012500000,0.033849360,-0.012500000,0.000419601,0.030250649,0.031725370,0.022631994,0.012868436,0.046191382,0.022203077,-0.016871829,0.014021184,-0.001728102,-0.002790788,-0.001069599,-0.006630978,-0.010508640,-0.001064526,0.000659642,-0.004911363,-0.008795739,0.001726006,-0.003850937,-0.007739451,-0.005567333,-0.009449148,-0.003903547,0,0,0,0


In [23]:
# # Generamos los atributos para merval
# df_merval_atr = FE.fit_transform(df= df_merval, 
#                                  df_dolar = df_db, 
#                                  list_windows= [5, 10, 12, 26, 120, 252] ,
#                                  cols_to_exclude=['date','ticker'])
# # df_merval_vds = FE.multiple_vds(threshold_vd=0.008)
# # filtramos los periodos
# df_merval_ok = FE.period_filter(df_merval_atr, date_to_start='2010-01-01')
# df_merval_ok.head()

# Exportación

In [24]:
# Exportamos este dictionary, que será el "model input" (colocamos sufijo "mi_")d
np.save(DATA/'mi_dict_acciones_arg',dict_aa_ok)

In [26]:
# Tomo el tiempo al final de la ejecucion
end_time=time.time()
print('El script demora', (end_time-start_time)/60, 'minutos')

El script demora 11.211720915635427 minutos
