In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, root_mean_squared_error

In [2]:
def print_metrics(y_pred,y_true):
    print('MAE',mean_absolute_error(y_pred,y_true))
    print('MSE',mean_squared_error(y_pred,y_true))
    print('RMSE',root_mean_squared_error(y_pred,y_true))

In [None]:
df = pd.read_csv("US_Stock_Data.csv", index_col = 'Unnamed: 0')
df.head(10)

In [None]:
df.info()

Aqui da pra perceber que o dataset precisa passar por um bom tratamento de dados, as datas não estão padronizadas e há dados relacionados a volume de transação nulos, além de dados com tipos errados.
Primeiramente, vou converter todos os valores de data para dd/mm/yyyy

In [5]:
df['Date'] = df['Date'].apply(lambda x : pd.to_datetime(x,dayfirst=True))

Precisa também converter os tipos de alguns dados, vou criar um dataframe auxiliar para armazenar apenas os preços

In [6]:
df_price = pd.DataFrame()
for column in df.columns:
    if 'Price' in column:
        df_price[f'{column}'] = df[f'{column}']

In [None]:
df_price

Vou passar um tratamento por todas as colunas, removendo as virgulas e convertendo as colunas para float64, a fim de conseguir plotar gráficos

In [8]:
for column in df_price.columns:
    df_price[f'{column}'] = df[f'{column}'].apply(lambda x: str(x).replace(',','')).astype('float64')

In [None]:
df_price.dtypes

In [10]:
df_price = df_price.set_index(df['Date'])
df = df.set_index(df['Date'])

In [None]:
df_price

In [None]:
fig,axes = plt.subplots(nrows = 5, ncols = 4, figsize = (32,40))
for index, column in enumerate(df_price.columns):
    df_price[f'{column}'].plot(ax=axes[index%5][index//5], title=column)

plt.tight_layout()

In [None]:
for index, column in enumerate(df_price.columns):
    fig = plt.figure(figsize = (8,5))
    df_price[f'{column}'].plot(title=column)
    plt.show()

Aqui da pra fazer análise de muitas empresas, mas eu vou escolher o ramo específico de Criptomoedas (Bitcoin e Ethereum)

In [None]:
df.info()

In [15]:
df_cripto = df_price[['Bitcoin_Price','Ethereum_Price']]

In [None]:
df_cripto

In [None]:
fig,axes = plt.subplots(ncols = 2, figsize = (12,8))
df_cripto['Bitcoin_Price'].plot(ax = axes[0])
df_cripto['Ethereum_Price'].plot(ax = axes[1])

In [18]:
df_cripto_predict = (
    df_cripto
    .assign(Btc_fut = df_cripto['Bitcoin_Price'].shift(1))
    .assign(Eth_fut = df_cripto['Ethereum_Price'].shift(1))
)


In [None]:
df_cripto_predict

In [20]:
df_cripto_predict.drop('2024-02-02', inplace = True)

In [21]:
threshold = np.quantile(df_cripto_predict.index,0.95)
train = df_cripto_predict.loc[df_cripto_predict.index < threshold]
test = df_cripto_predict.loc[df_cripto_predict.index > threshold]

In [22]:
X_train_btc,y_train_btc = train['Bitcoin_Price'],train['Btc_fut']
X_test_btc,y_test_btc = test['Bitcoin_Price'],test['Btc_fut']
X_train_eth,y_train_eth= train['Ethereum_Price'],train['Eth_fut']
X_test_eth,y_test_eth= test['Ethereum_Price'],test['Eth_fut']


In [None]:
XGB = XGBRegressor()
XGB.fit(X_train_btc,y_train_btc)

In [24]:
y_pred_btc = XGB.predict(X_test_btc)

In [None]:
print_metrics(y_pred_btc,y_test_btc)

In [26]:
datas_previsao = np.asarray(X_test_btc.index)
datas = np.asarray(df_cripto_predict.index)

In [None]:
fig = plt.figure(figsize = (15,5))
plt.plot(datas, df_cripto_predict['Bitcoin_Price'].values, label = 'Real')
plt.plot(datas_previsao, y_pred_btc, label = 'Predito', color = 'red')
plt.legend()

In [None]:
fig = plt.figure(figsize = (8,5))
plt.plot(datas_previsao, y_test_btc, label = 'Real')
plt.plot(datas_previsao, y_pred_btc, label = 'Predito', color = 'red')
plt.legend()

Agora vou fazer o mesmo pro ethereum

In [None]:
XGB = XGBRegressor()
XGB.fit(X_train_eth,y_train_eth)

In [30]:
y_pred_eth = XGB.predict(X_test_eth)

In [None]:
print_metrics(y_pred_eth,y_test_eth)

In [32]:
datas_previsao = np.asarray(X_test_eth.index)
datas = np.asarray(df_cripto_predict.index)

In [None]:
fig = plt.figure(figsize = (15,5))
plt.plot(datas, df_cripto_predict['Ethereum_Price'].values, label = 'Real')
plt.plot(datas_previsao, y_pred_eth, label = 'Predito', color = 'red')
plt.legend()

In [None]:
fig = plt.figure(figsize = (8,5))
plt.plot(datas_previsao, y_test_eth, label = 'Real')
plt.plot(datas_previsao, y_pred_eth, label = 'Predito', color = 'red')
plt.legend()