# Primeiro teste de previsão com PyTorch (SSL)

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

df = pd.read_csv('C:\\Users\\natan\\OneDrive\\Documentos\\dev\\pfc\\databases\\dataset2001_2024.csv')

# Supondo que os dados estejam carregados em um DataFrame `df`
df = df.replace(-999.00, np.nan).dropna()
X = df.drop(columns=['T2M']).values
y = df['T2M'].values

# Normalizar os dados
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Dividir os dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Converter para tensores PyTorch
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Modelo de Self-Supervised Learning com Masked Modeling
class SelfSupervisedModel(nn.Module):
    def __init__(self, input_dim):
        super(SelfSupervisedModel, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU()
        )
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, input_dim)
        )
        
    def forward(self, x):
        encoded = self.encoder(x)
        reconstructed = self.decoder(encoded)
        return reconstructed, encoded

# Instanciar o modelo
input_dim = X_train.shape[1]
model = SelfSupervisedModel(input_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Treinamento Self-Supervisionado (Masked Modeling)
def train_self_supervised(model, X, epochs=100, mask_ratio=0.3):
    for epoch in range(epochs):
        model.train()
        
        # Mascara aleatoriamente alguns valores de entrada
        mask = torch.rand(X.shape) < mask_ratio
        X_masked = X.clone()
        X_masked[mask] = 0  # Define os valores mascarados como 0
        
        # Forward
        reconstructed, _ = model(X_masked)
        loss = criterion(reconstructed[mask], X[mask])  # Apenas os valores mascarados

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

train_self_supervised(model, X_train)

# Congelar o encoder e usar para regressão
class RegressionModel(nn.Module):
    def __init__(self, encoder, encoded_dim):
        super(RegressionModel, self).__init__()
        self.encoder = encoder
        self.regressor = nn.Sequential(
            nn.Linear(encoded_dim, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )
        
    def forward(self, x):
        with torch.no_grad():
            encoded = self.encoder(x)
        output = self.regressor(encoded)
        return output

# Criar o modelo de regressão
encoded_dim = 32
regression_model = RegressionModel(model.encoder, encoded_dim)
regression_criterion = nn.MSELoss()
regression_optimizer = optim.Adam(regression_model.parameters(), lr=0.001)

# Treinamento supervisionado para previsão de temperatura
def train_regression(model, X, y, epochs=100):
    for epoch in range(epochs):
        model.train()
        
        # Forward
        output = model(X).squeeze()
        loss = regression_criterion(output, y)

        # Backward
        regression_optimizer.zero_grad()
        loss.backward()
        regression_optimizer.step()
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Regression Loss: {loss.item():.4f}')

train_regression(regression_model, X_train, y_train)

# Avaliação no conjunto de teste
regression_model.eval()
with torch.no_grad():
    test_preds = regression_model(X_test).squeeze()
    test_rmse = np.sqrt(mean_squared_error(y_test, test_preds))
    print(f'Test RMSE: {test_rmse:.4f}')


Epoch [10/100], Loss: 0.9883
Epoch [20/100], Loss: 0.9570
Epoch [30/100], Loss: 0.9022
Epoch [40/100], Loss: 0.8188
Epoch [50/100], Loss: 0.7340
Epoch [60/100], Loss: 0.6672
Epoch [70/100], Loss: 0.6227
Epoch [80/100], Loss: 0.5907
Epoch [90/100], Loss: 0.5737
Epoch [100/100], Loss: 0.5626
Epoch [10/100], Regression Loss: 531.1185
Epoch [20/100], Regression Loss: 507.6984
Epoch [30/100], Regression Loss: 477.7468
Epoch [40/100], Regression Loss: 443.6201
Epoch [50/100], Regression Loss: 406.3732
Epoch [60/100], Regression Loss: 366.9147
Epoch [70/100], Regression Loss: 326.1841
Epoch [80/100], Regression Loss: 285.2582
Epoch [90/100], Regression Loss: 245.3082
Epoch [100/100], Regression Loss: 207.5103
Test RMSE: 14.2893
