In [None]:
%pip install captum

In [None]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import ML.prediction_torch.models as models
import ML.prediction_torch.utils as utils

In [None]:
df = pd.read_csv('C:/Users/luis2/OneDrive/Documentos/challenge-ML/dados/resultado_unificado_normalized_clean.csv', sep=';')
df.info()

In [None]:
import warnings

warnings.filterwarnings("ignore")

# Definições
target = "val_volumeutilcon"
cat_col = "id_reservatorio"

train_list, test_list = [], []

# split por reservatório
for rid, group in df.groupby(cat_col):
    group = group.sort_values(["ano", "mes", "dia"])  # garante ordem temporal
    
    split_idx = int(len(group) * 0.7)
    train_part = group.iloc[:split_idx].copy()
    test_part = group.iloc[split_idx:].copy()
    
    # Expanding mean até t-1 no treino
    train_part["id_encoded"] = (
        train_part[target].expanding().mean().shift(1)
    )
    
    # Preenche primeiros valores sem histórico com média global do treino
    global_mean = train_part[target].mean()
    train_part["id_encoded"].fillna(global_mean, inplace=True)
    
    # Para o teste, calculamos encoding usando apenas histórico do treino
    # Concatenamos treino + teste, aplicamos expanding, mas só usamos valores válidos
    full_series = pd.concat([train_part, test_part])
    full_series["id_encoded"] = (
        full_series[target].expanding().mean().shift(1)
    )
    
    # Só aplica para o conjunto de teste
    test_part["id_encoded"] = full_series.loc[test_part.index, "id_encoded"]
    test_part["id_encoded"].fillna(global_mean, inplace=True)

    train_list.append(train_part)
    test_list.append(test_part)

# Junta todos os reservatórios
train_df = pd.concat(train_list)
test_df = pd.concat(test_list)

# Define features
features = [
    "id_encoded",
    "val_volmax",
    "ear_reservatorio_percentual_lag1",
    "ear_reservatorio_percentual_lag7",
    "ear_reservatorio_percentual_roll7",
    "dia",
    "mes",
    "ano"
]

X_train = train_df[features]
y_train = train_df[[target]]

X_test = test_df[features]
y_test = test_df[[target]]

In [None]:
X_train = X_train.values.astype(np.float32)
y_train = y_train.values.astype(np.float32)
X_test = X_test.values.astype(np.float32)
y_test = y_test.values.astype(np.float32)

In [None]:
from torch.utils.data import TensorDataset, DataLoader

X_train_seq, y_train_seq = utils.create_sequences_multi_step(X_train, y_train, context_len=45, horizon=7)

train_ds = TensorDataset(torch.tensor(X_train_seq), torch.tensor(y_train_seq))

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)

In [None]:
from captum.attr import IntegratedGradients

model = models.LSTM_model(input_size=X_train.shape[1], hidden_size=50, num_layers=2, output_size=7)
ig = IntegratedGradients(model)

In [None]:
from torch.optim import Adam

# IMPORTANTE: Fazer alteração para mudar a data para definir escopo da predição

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 10
optimizer = Adam(model.parameters(), lr=0.001)
criterion = torch.nn.MSELoss()

for epoch in range(epochs):
    model.train()
    total_loss = 0

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        y_batch = y_batch.squeeze()

        valid_mask = ~torch.isnan(X_batch).any(dim=(1,2))
        X_batch = X_batch[valid_mask]
        y_batch = y_batch[valid_mask]

        optimizer.zero_grad()
        outputs = model(X_batch)

        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}")

In [49]:
reservatorio_id = 'JEIRAP'
df_res = test_df[test_df['id_reservatorio'] == reservatorio_id].sort_values(["ano", "mes", "dia"])

context_len = 45
X_context = df_res[features].values[-context_len:]  # shape: (45, num_features)
X_context_tensor = torch.tensor(X_context, dtype=torch.float32).unsqueeze(0).to(device)
print(X_context_tensor)


tensor([[[3.7812e-01, 1.0841e-01, 4.1385e-01, 4.1666e-01, 4.9870e-01,
          5.0000e+00, 8.0000e+00, 2.0250e+03],
         [3.7813e-01, 1.0841e-01, 4.1326e-01, 4.1622e-01, 4.9809e-01,
          6.0000e+00, 8.0000e+00, 2.0250e+03],
         [3.7813e-01, 1.0841e-01, 4.1273e-01, 4.1578e-01, 4.9747e-01,
          7.0000e+00, 8.0000e+00, 2.0250e+03],
         [3.7814e-01, 1.0841e-01, 4.1214e-01, 4.1525e-01, 4.9683e-01,
          8.0000e+00, 8.0000e+00, 2.0250e+03],
         [3.7815e-01, 1.0841e-01, 4.1155e-01, 4.1475e-01, 4.9617e-01,
          9.0000e+00, 8.0000e+00, 2.0250e+03],
         [3.7815e-01, 1.0841e-01, 4.1092e-01, 4.1444e-01, 4.9551e-01,
          1.0000e+01, 8.0000e+00, 2.0250e+03],
         [3.7816e-01, 1.0841e-01, 4.1064e-01, 4.1385e-01, 4.9486e-01,
          1.1000e+01, 8.0000e+00, 2.0250e+03],
         [3.7816e-01, 1.0841e-01, 4.1005e-01, 4.1326e-01, 4.9421e-01,
          1.2000e+01, 8.0000e+00, 2.0250e+03],
         [3.7817e-01, 1.0841e-01, 4.0949e-01, 4.1273e-01, 4.9355

In [50]:
model.eval()
with torch.no_grad():
    y_pred = model(X_context_tensor)
    
print("Predição:", y_pred.cpu().numpy())  # shape: (1, horizon=7)

Predição: [[0.39413032 0.39512312 0.39406517 0.39507312 0.39449072 0.3945567
  0.3945737 ]]


In [None]:
# target = 0 -> primeiro passo da previsão
attr, delta = ig.attribute(
    X_context_tensor,
    torch.zeros_like(X_context_tensor),
    target=0,
    return_convergence_delta=True
)


In [None]:
import matplotlib.pyplot as plt

# soma importância sobre features para ver relevância por timestep
attr_timestep = attr.sum(dim=2).squeeze().cpu().detach().numpy()

plt.plot(range(attr_timestep.shape[0]), attr_timestep)
plt.xlabel("Timestep (dias atrás)")
plt.ylabel("Importância")
plt.title("Integrated Gradients para o primeiro dia previsto")
plt.show()
