<a href="https://colab.research.google.com/github/Sakamotto/MPCA/blob/main/ANN/Exerc%C3%ADcio_Forcasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#baseline

A ideia aqui é utilizar a base de dados de preços de ações, já disponível na lib `yfinance`. Vamos tentar predizar o futuro das ações da apple, tendo como base os dados de 2014 até 2024

In [None]:
import yfinance as yf
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import matplotlib.pyplot as plt

df = yf.download("AAPL", start="2014-01-01", end="2024-12-31")
df = df[['Close']].reset_index()
df.columns = ['ds', 'y']

  df = yf.download("AAPL", start="2014-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed


In [None]:
df.head()

Unnamed: 0,ds,y
0,2014-01-02,17.192816
1,2014-01-03,16.815166
2,2014-01-06,16.906862
3,2014-01-07,16.785952
4,2014-01-08,16.892252


In [None]:
split_date = '2024-01-01'
train_df = df[df['ds'] < split_date]
test_df = df[df['ds'] >= split_date]

mean = train_df['y'].mean()
std = train_df['y'].std()

df['y_norm'] = (df['y'] - mean) / std

class TimeSeriesDataset(Dataset):
    def __init__(self, series, window_size=10):
        self.window_size = window_size
        self.series = series.values
        self.X = []
        self.y = []
        for i in range(len(self.series) - window_size):
            x_i = self.series[i:i+window_size]
            y_i = self.series[i+window_size]
            self.X.append(x_i)
            self.y.append(y_i)
        self.X = np.array(self.X)
        self.y = np.array(self.y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

window_size = 10

train_series = df[df['ds'] < split_date]['y_norm']
test_series = df[df['ds'] >= split_date]['y_norm']

train_dataset = TimeSeriesDataset(train_series, window_size)
test_dataset = TimeSeriesDataset(test_series, window_size)

# naive baseline
def evaluate_naive_method(dataset):
    total_abs_err = 0
    total_sq_err = 0
    samples_seen = 0

    for samples, targets in dataset:
        # Pega o último valor da janela (naive)
        preds = samples[-1]

        pred_value = preds * std + mean
        target_value = targets * std + mean

        total_abs_err += np.abs(pred_value - target_value)
        total_sq_err += (pred_value - target_value) ** 2
        samples_seen += 1

    mae = total_abs_err / samples_seen
    rmse = np.sqrt(total_sq_err / samples_seen)
    return mae, rmse

# 6. Avaliar
mae_train, rmse_train = evaluate_naive_method(train_dataset)
mae_test, rmse_test = evaluate_naive_method(test_dataset)

print(f"Treino - MAE: {mae_train:.2f} | RMSE: {rmse_train:.2f}")
print(f"Teste  - MAE: {mae_test:.2f} | RMSE: {rmse_test:.2f}")


  df = yf.download("AAPL", start="2014-01-01", end="2024-12-31")
[*********************100%***********************]  1 of 1 completed

Treino - MAE: 1.00 | RMSE: 1.70
Teste  - MAE: 2.14 | RMSE: 2.90





In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader

batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = x.unsqueeze(-1)  # (batch, seq_len, 1)
        out, _ = self.lstm(x)  # out: (batch, seq_len, hidden)
        out = out[:, -1, :]    # último estado
        out = self.linear(out) # (batch, 1)
        return out.squeeze()   # saída: (batch)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTMModel().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

# Aqui começa o treino
epochs = 50
model.train()
for epoch in range(epochs):
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(torch.float32).to(device)
        y_batch = y_batch.to(torch.float32).to(device)

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * X_batch.size(0)

    print(f"Epoch {epoch+1}/{epochs} - Loss: {epoch_loss/len(train_loader.dataset):.4f}")

model.eval()
mae_total = 0
rmse_total = 0
n_samples = 0

with torch.no_grad():
    for X, y in test_loader:
        X = X.to(torch.float32).to(device)
        y = y.to(torch.float32).to(device)
        y_pred = model(X)

        # desnormalizar
        y_pred_denorm = y_pred.item() * std + mean
        y_true_denorm = y.item() * std + mean

        mae_total += abs(y_pred_denorm - y_true_denorm)
        rmse_total += (y_pred_denorm - y_true_denorm) ** 2
        n_samples += 1

mae = mae_total / n_samples
rmse = (rmse_total / n_samples) ** 0.5

print(f"\nLSTM Forecast - MAE: {mae:.2f} | RMSE: {rmse:.2f}")

Epoch 1/50 - Loss: 0.2340
Epoch 2/50 - Loss: 0.0074
Epoch 3/50 - Loss: 0.0037
Epoch 4/50 - Loss: 0.0035
Epoch 5/50 - Loss: 0.0036
Epoch 6/50 - Loss: 0.0032
Epoch 7/50 - Loss: 0.0031
Epoch 8/50 - Loss: 0.0031
Epoch 9/50 - Loss: 0.0032
Epoch 10/50 - Loss: 0.0029
Epoch 11/50 - Loss: 0.0027
Epoch 12/50 - Loss: 0.0028
Epoch 13/50 - Loss: 0.0029
Epoch 14/50 - Loss: 0.0025
Epoch 15/50 - Loss: 0.0024
Epoch 16/50 - Loss: 0.0021
Epoch 17/50 - Loss: 0.0026
Epoch 18/50 - Loss: 0.0020
Epoch 19/50 - Loss: 0.0020
Epoch 20/50 - Loss: 0.0020
Epoch 21/50 - Loss: 0.0019
Epoch 22/50 - Loss: 0.0020
Epoch 23/50 - Loss: 0.0017
Epoch 24/50 - Loss: 0.0018
Epoch 25/50 - Loss: 0.0018
Epoch 26/50 - Loss: 0.0016
Epoch 27/50 - Loss: 0.0017
Epoch 28/50 - Loss: 0.0016
Epoch 29/50 - Loss: 0.0016
Epoch 30/50 - Loss: 0.0016
Epoch 31/50 - Loss: 0.0015
Epoch 32/50 - Loss: 0.0015
Epoch 33/50 - Loss: 0.0016
Epoch 34/50 - Loss: 0.0014
Epoch 35/50 - Loss: 0.0013
Epoch 36/50 - Loss: 0.0012
Epoch 37/50 - Loss: 0.0014
Epoch 38/5