### Análise de lag

Este notebook tem por finalidade analisar a quantidade apropriada de "lag" para as bases de dados utilizadas

A análise será feita considerando os períodos de:
* 1 dia
* 2 dias
* 5 dias
* 10 dias

### Importação

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score

from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor

### Funções

In [2]:
def read(path, open_ = False):
    
    df = pd.read_csv(path, index_col = 0)
    
    df.rename(columns={'Alvo':'Close 0'}, inplace=True)
    df.rename(columns={'Abertura':'Open 0'}, inplace=True)
    
    if not(open_):
        df.drop(columns='Open 0', inplace = True)
    
    return df

In [3]:
def lag(df, size, open_ = False):
    
    for i in range(1, (int(size)+1)):
        
        df['Close '+str(i)] = df['Close '+str(i-1)].shift(1)
        
        if open_:
            
            df['Open '+str(i)] = df['Open '+str(i-1)].shift(1)
            
    df.rename(columns={'Close 0':'Target'}, inplace=True)
    df.dropna(inplace=True)
            
    return df

In [4]:
def X_Y(df):
    
    X = df.drop(columns = 'Target')
    y = df.loc[:,'Target'].tolist()
    
    return X, y

In [5]:
def training(df):
    
    X, y = X_Y(df)
    
    mod = SVR(kernel='linear', C=1)
        
    scores = cross_val_score(mod, X, y, cv=5)
    
    max_ = max(scores)
    avg_ = (sum(scores)/len(scores))
    print("MAX: ", max_)
    print("AVG: ", avg_)
    return  max_, avg_

In [6]:
datasets = ['FB_1']

size = [1,2,5,10]

resultados = {}

In [None]:
for data in datasets:
    for sz in size:
        
        file = 'Datasets/Acoes/'+str(data)+'.csv'
        nome = str(data)+'-'+str(sz)
        
        df = read(file)
        df = lag(df, sz)
        max_, avg_ = training(df)
        
        resultados[nome] = {
            'Max': max_,
            'Avg': avg_
        }
        
        print(nome)

In [None]:
,'FB_3','FB_5',
           'PBR_1','PBR_3','PBR_5',
           'AAPL_1','AAPL_3','AAPL_5',
           'TSLA_1','TSLA_3','TSLA_5'