## Estudo acoes

### Bibilioteca

In [33]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import os
import pickle


### Busca e tratamento de dados

In [37]:
df = pd.read_csv('./dados/dados.csv', sep=';', encoding='utf-8')

In [38]:
df.head(3)

Unnamed: 0,TICKER,PRECO,DY,P/L,P/VP,P/ATIVOS,MARGEM BRUTA,MARGEM EBIT,MARG. LIQUIDA,P/EBIT,...,PATRIMONIO / ATIVOS,PASSIVOS / ATIVOS,GIRO ATIVOS,CAGR RECEITAS 5 ANOS,CAGR LUCROS 5 ANOS,LIQUIDEZ MEDIA DIARIA,VPA,LPA,PEG Ratio,VALOR DE MERCADO
0,AALR3,700,,-315,82,30,3010,160,-2281,4476,...,37,62,42,13,,"641.433,39",858,-222,-3,"828.049.712,00"
1,ABCB4,2461,641.0,601,102,10,2550,1706,1550,546,...,10,91,11,1870,1780.0,"15.402.170,61",2415,409,-33,"5.830.997.322,56"
2,ABEV3,1271,575.0,1328,222,145,5030,2259,1829,1075,...,65,34,60,1072,1572.0,"341.222.476,32",573,96,88,"200.231.221.039,64"


In [39]:
df.fillna(0, inplace=True)
for column in df.columns:
    if column != 'TICKER':
        df[column] = df[column].astype(str).apply(lambda x: x.replace('.', '').replace(',', '.') if ',' in x else x)
        df[column] = df[column].astype(float)


In [40]:
df.isna().sum()

TICKER                    0
PRECO                     0
DY                        0
P/L                       0
P/VP                      0
P/ATIVOS                  0
MARGEM BRUTA              0
MARGEM EBIT               0
MARG. LIQUIDA             0
P/EBIT                    0
EV/EBIT                   0
DIVIDA LIQUIDA / EBIT     0
DIV. LIQ. / PATRI.        0
PSR                       0
P/CAP. GIRO               0
P. AT CIR. LIQ.           0
LIQ. CORRENTE             0
ROE                       0
ROA                       0
ROIC                      0
PATRIMONIO / ATIVOS       0
PASSIVOS / ATIVOS         0
GIRO ATIVOS               0
CAGR RECEITAS 5 ANOS      0
CAGR LUCROS 5 ANOS        0
 LIQUIDEZ MEDIA DIARIA    0
 VPA                      0
 LPA                      0
 PEG Ratio                0
 VALOR DE MERCADO         0
dtype: int64

### Definição alvo e previsores

In [41]:
alvo = df.PRECO

In [42]:
previsores = df[['DY', 'P/L', 'P/VP', 'P/ATIVOS', 'MARGEM BRUTA',
       'MARGEM EBIT', 'MARG. LIQUIDA', 'P/EBIT', 'EV/EBIT',
       'DIVIDA LIQUIDA / EBIT', 'DIV. LIQ. / PATRI.', 'PSR', 'P/CAP. GIRO',
       'P. AT CIR. LIQ.', 'LIQ. CORRENTE', 'ROE', 'ROA', 'ROIC',
       'PATRIMONIO / ATIVOS', 'PASSIVOS / ATIVOS', 'GIRO ATIVOS',
       'CAGR RECEITAS 5 ANOS', 'CAGR LUCROS 5 ANOS', ' LIQUIDEZ MEDIA DIARIA',
       ' VPA', ' LPA', ' PEG Ratio', ' VALOR DE MERCADO']]

In [43]:
previsores.head(2)

Unnamed: 0,DY,P/L,P/VP,P/ATIVOS,MARGEM BRUTA,MARGEM EBIT,MARG. LIQUIDA,P/EBIT,EV/EBIT,DIVIDA LIQUIDA / EBIT,...,PATRIMONIO / ATIVOS,PASSIVOS / ATIVOS,GIRO ATIVOS,CAGR RECEITAS 5 ANOS,CAGR LUCROS 5 ANOS,LIQUIDEZ MEDIA DIARIA,VPA,LPA,PEG Ratio,VALOR DE MERCADO
0,0.0,-3.15,0.82,0.3,30.1,1.6,-22.81,44.76,85.66,40.9,...,0.37,0.62,0.42,0.13,0.0,641433.39,8.58,-2.22,-0.03,828049700.0
1,6.41,6.01,1.02,0.1,25.5,17.06,15.5,5.46,5.46,0.0,...,0.1,0.91,0.11,18.7,17.8,15402170.61,24.15,4.09,-0.33,5830997000.0


In [44]:
alvo.head(2)

0     7.00
1    24.61
Name: PRECO, dtype: float64

In [45]:
print(previsores.shape)
print(alvo.shape)

(620, 28)
(620,)


### Escalonamento

In [47]:
scaler = StandardScaler()
previsores_standard_scaling = scaler.fit_transform(previsores)

# Aplicando MinMaxScaler
min_max_scaler = MinMaxScaler()
previsores_min_scaling = min_max_scaler.fit_transform(previsores)

# Criando a pasta 'variaveis' se não existir
if not os.path.exists('variaveis'):
    os.makedirs('variaveis')

# Salvando os previsores escalonados
with open('variaveis/previsores_standard_scaling.pickle', 'wb') as f:
    pickle.dump(previsores_standard_scaling, f)

with open('variaveis/previsores_min_scaling.pickle', 'wb') as f:
    pickle.dump(previsores_min_scaling, f)

with open('variaveis/alvo.pickle', 'wb') as f:
    pickle.dump(alvo, f)

with open('variaveis/previsores.pickle', 'wb') as f:
    pickle.dump(previsores, f)