In [1]:
import os
import dill
import torch
from torch_geometric.data import Data
import re


def orden_numerico(archivo):
    numero = re.search(r'\d+', archivo)
    if numero:
        return int(numero.group())
    return 0

def cargar_snapshots_en_orden(ruta_guardado):
    archivos = os.listdir(ruta_guardado)
    archivos_snapshots = sorted([archivo for archivo in archivos if archivo.endswith('.pkl')], key=orden_numerico)
    
    snapshots_cargados = []
    for i, archivo in enumerate(archivos_snapshots):  
        ruta_completa = os.path.join(ruta_guardado, archivo)
        with open(ruta_completa, 'rb') as f:
            snapshot_dict = dill.load(f)
        
        snapshot = Data(
            x=torch.tensor(snapshot_dict['x'], dtype=torch.float),
            y=torch.tensor(snapshot_dict['y'], dtype=torch.float),
        )
        snapshots_cargados.append(snapshot)
        
        print(f"Snapshot cargado: {archivo}")
    
    return snapshots_cargados

ruta_guardado = "Dades/Snapshots"
snapshots_cargados = cargar_snapshots_en_orden(ruta_guardado)

print(f"Número de snapshots cargados: {len(snapshots_cargados)}")
print(snapshots_cargados[0])


for i, snapshot in enumerate(snapshots_cargados):
    nan_indices = torch.isnan(snapshot.x)
    if nan_indices.any():
        print(f"Valores nan encontrados en x del snapshot {i}")
        print("Valores de x con NaN:", snapshot.x[nan_indices])
snapshots_reestructurados = snapshots_cargados
del snapshots_cargados

Snapshot cargado: snapshot_0.pkl
Snapshot cargado: snapshot_1.pkl
Snapshot cargado: snapshot_2.pkl
Snapshot cargado: snapshot_3.pkl
Snapshot cargado: snapshot_4.pkl
Snapshot cargado: snapshot_5.pkl
Snapshot cargado: snapshot_6.pkl
Snapshot cargado: snapshot_7.pkl
Snapshot cargado: snapshot_8.pkl
Snapshot cargado: snapshot_9.pkl
Snapshot cargado: snapshot_10.pkl
Snapshot cargado: snapshot_11.pkl
Snapshot cargado: snapshot_12.pkl
Snapshot cargado: snapshot_13.pkl
Snapshot cargado: snapshot_14.pkl
Snapshot cargado: snapshot_15.pkl
Snapshot cargado: snapshot_16.pkl
Snapshot cargado: snapshot_17.pkl
Snapshot cargado: snapshot_18.pkl
Snapshot cargado: snapshot_19.pkl
Snapshot cargado: snapshot_20.pkl
Snapshot cargado: snapshot_21.pkl
Snapshot cargado: snapshot_22.pkl
Snapshot cargado: snapshot_23.pkl
Snapshot cargado: snapshot_24.pkl
Snapshot cargado: snapshot_25.pkl
Snapshot cargado: snapshot_26.pkl
Snapshot cargado: snapshot_27.pkl
Snapshot cargado: snapshot_28.pkl
Snapshot cargado: snapsh

In [2]:
snapshots_reestructurados[0]

Data(x=[512, 270], y=[512, 48])

In [3]:
import torch
from torch_geometric.data import Data
     
from torch_geometric.loader import DataLoader
total_datos = len(snapshots_reestructurados)
train_size = int(0.7 * total_datos)
val_size = int(0.15 * total_datos)
test_size = total_datos - train_size - val_size

train_dataset = snapshots_reestructurados[:train_size]
val_dataset = snapshots_reestructurados[train_size:train_size + val_size]
test_dataset = snapshots_reestructurados[train_size + val_size:]

caracteristicas_concatenadas = torch.cat([data.x for data in train_dataset], dim=0).to(torch.float64)
mean = caracteristicas_concatenadas.mean(dim=0)
std = caracteristicas_concatenadas.std(dim=0)

umbral_std = 1e-6
std[std < umbral_std] = 1

def normalize_dataset(dataset):
    for data in dataset:
        data.x = ((data.x.to(torch.float64) - mean) / std).to(torch.float32)  
    return dataset

train_dataset = normalize_dataset(train_dataset)
val_dataset = normalize_dataset(val_dataset)
test_dataset = normalize_dataset(test_dataset)

print(len(train_dataset))
print(len(val_dataset))
print(len(test_dataset))


batch_size = 1 

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

for batch in train_loader:
    print("Tamaño del lote (número de grafos):", batch.num_graphs)
    print("Características del primer grafo del lote:", batch[0].x.shape)
    print("Etiquetas del primer grafo del lote:", batch[0].y.shape)
    break

15329
3285
3286
Tamaño del lote (número de grafos): 1
Características del primer grafo del lote: torch.Size([512, 270])
Etiquetas del primer grafo del lote: torch.Size([512, 48])


In [None]:
"""import torch
from torch import nn

class Conv2DLSTMModel(nn.Module):
    def __init__(self, num_features, num_classes):
        super(Conv2DLSTMModel, self).__init__()
        self.conv = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(3, 3), stride=1, padding=(1, 1))
        self.lstm = nn.LSTM(input_size=32, hidden_size=64, batch_first=True) 
        self.linear = nn.Linear(64, num_classes)

    def forward(self, x):
        x = x.view(-1, 1, 1, 270)
        x = self.conv(x)
        x = torch.relu(x)
        x = x.view(x.size(0), -1, 32)  
        x, (hn, cn) = self.lstm(x)
        x = x[:, -1, :]
        x = self.linear(x)
        return x
      


NUM_FEATURES = 270
NUM_CLASSES = 48

model = Conv2DLSTMModel(num_features=NUM_FEATURES, num_classes=NUM_CLASSES)"""

In [4]:
import torch.nn as nn

class Conv3DLSTMModel(nn.Module):
    def __init__(self, num_stations, num_time_steps, num_features_per_time_step, num_classes, dropout_rate=0.5):
        super(Conv3DLSTMModel, self).__init__()
        self.num_stations = num_stations
        self.num_time_steps = num_time_steps
        self.num_features_per_time_step = num_features_per_time_step
        self.num_classes = num_classes
        
        self.conv3d = nn.Conv3d(in_channels=1, out_channels=num_features_per_time_step, kernel_size=(3, 3, 3), stride=1, padding=(1, 1, 1))
        self.bn = nn.BatchNorm3d(num_features_per_time_step)
        self.relu = nn.ReLU(inplace=True)
        self.dropout_conv = nn.Dropout3d(p=dropout_rate) 
        lstm_input_size = num_features_per_time_step * num_features_per_time_step
        self.hidden_size = 512 
        hidden_dim = 8  
        
        self.lstm = nn.LSTM(input_size=lstm_input_size, hidden_size=hidden_dim, batch_first=False)
        self.dropout_lstm = nn.Dropout(p=dropout_rate) 
        
        self.linear = nn.Linear(hidden_dim * num_time_steps, self.num_classes) 

    def forward(self, x):
        x = x.view(-1, 1, self.num_time_steps, self.num_stations, self.num_features_per_time_step)
        x = self.conv3d(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.dropout_conv(x) 
        x = x.permute(2, 3, 0, 1, 4)
        x = x.reshape(self.num_time_steps, self.num_stations, -1)  
        x, (hn, cn) = self.lstm(x)
        x = self.dropout_lstm(x)  
        x = x.permute(1, 0, 2)
        x = x.reshape(self.num_stations, -1)
        x = self.linear(x)
        return x


NUM_STATIONS = 512 
NUM_TIME_STEPS = 6  
NUM_FEATURES_PER_TIME_STEP = 45  
NUM_CLASSES = 48 

model = Conv3DLSTMModel(num_stations=NUM_STATIONS, num_time_steps=NUM_TIME_STEPS, num_features_per_time_step=NUM_FEATURES_PER_TIME_STEP, num_classes=NUM_CLASSES)


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
from torch.optim.lr_scheduler import ReduceLROnPlateau

criterion = nn.HuberLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5, verbose=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model.to(device)

def train(model, train_loader):
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        
        optimizer.zero_grad()
        out = model(data.x)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

def validate(model, val_loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data in val_loader:
            data = data.to(device)
            
            out = model(data.x)
            loss = criterion(out, data.y)
            total_loss += loss.item()
    return total_loss / len(val_loader)

num_epochs = 150
best_val_loss = float('inf')
best_model = None

for epoch in range(num_epochs):
    train_loss = train(model, train_loader)
    val_loss = validate(model, val_loader)
    print(f'Epoch: {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')
    
    scheduler.step(val_loss)
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model.state_dict()
        torch.save(model.state_dict(), 'best_model.pth')

model.load_state_dict(torch.load('best_model.pth'))




Epoch: 1, Train Loss: 5.3820, Validation Loss: 7.1769
Epoch: 2, Train Loss: 4.7784, Validation Loss: 7.0175
Epoch: 3, Train Loss: 4.6793, Validation Loss: 6.9832
Epoch: 4, Train Loss: 4.6427, Validation Loss: 6.9712
Epoch: 5, Train Loss: 4.6185, Validation Loss: 6.9638
Epoch: 6, Train Loss: 4.6099, Validation Loss: 6.9555
Epoch: 7, Train Loss: 4.5926, Validation Loss: 6.9640
Epoch: 8, Train Loss: 4.5862, Validation Loss: 6.9557
Epoch: 9, Train Loss: 4.5768, Validation Loss: 6.9592
Epoch: 10, Train Loss: 4.5732, Validation Loss: 6.9503
Epoch: 11, Train Loss: 4.5692, Validation Loss: 6.9441
Epoch: 12, Train Loss: 4.5660, Validation Loss: 6.9390
Epoch: 13, Train Loss: 4.5638, Validation Loss: 6.9426
Epoch: 14, Train Loss: 4.5595, Validation Loss: 6.9383
Epoch: 15, Train Loss: 4.5577, Validation Loss: 6.9322
Epoch: 16, Train Loss: 4.5551, Validation Loss: 6.9206
Epoch: 17, Train Loss: 4.5523, Validation Loss: 6.9298
Epoch: 18, Train Loss: 4.5541, Validation Loss: 6.9543
Epoch: 19, Train Lo

<All keys matched successfully>

#### 2

In [6]:
model.load_state_dict(torch.load('best_model.pth'))
import numpy as np
import torch

def calculate_metrics(outputs, targets):
    mse = criterion(outputs, targets)
    rmse = torch.sqrt(mse)
    mae = torch.mean(torch.abs(outputs - targets))

    outputs_np = outputs.detach().cpu().numpy()
    targets_np = targets.detach().cpu().numpy()

    if np.std(outputs_np) > 0 and np.std(targets_np) > 0:
        correlation = np.corrcoef(outputs_np.ravel(), targets_np.ravel())[0, 1]
    else:
        correlation = np.nan
    return mse.item(), rmse.item(), mae.item(), correlation

nombres_compuestos = ["C6H6", "CO", "NO2", "NOx", "O3", "PM10", "PM25", "SO2"]


def test(model, test_loader, num_compounds, num_steps):
    model.eval()
    device = next(model.parameters()).device  
    metrics_per_compound_step = [[[] for _ in range(num_steps)] for _ in range(num_compounds)]

    with torch.no_grad():
        for data in test_loader:
            data = data.to(device)
            
            out = model(data.x)
            out = out.view(-1, num_steps, num_compounds).to(device)
            y = data.y.view(-1, num_steps, num_compounds).to(device)
            for i in range(num_compounds):
                for j in range(num_steps):
                    mse, rmse, mae, corr = calculate_metrics(out[:, j, i], y[:, j, i])
                    metrics_per_compound_step[i][j].append((mse, rmse, mae, corr))

    averaged_metrics = [[np.mean(metrics, axis=0) for metrics in compound_metrics] for compound_metrics in metrics_per_compound_step]
    return averaged_metrics

num_compounds = 8  
num_steps = 6      
averaged_metrics = test(model, test_loader, num_compounds, num_steps)

for i, compound_metrics in enumerate(averaged_metrics):
    compound_name = nombres_compuestos[i]
    for j, step_metrics in enumerate(compound_metrics):
        mse, rmse, mae, corr = step_metrics
        print(f"Compuesto {compound_name}, Paso {j+1} - MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, Correlación: {corr:.4f}")

Compuesto C6H6, Paso 1 - MSE: 0.0679, RMSE: 0.2535, MAE: 0.2675, Correlación: 0.2118
Compuesto C6H6, Paso 2 - MSE: 0.0676, RMSE: 0.2529, MAE: 0.2665, Correlación: 0.2204
Compuesto C6H6, Paso 3 - MSE: 0.0673, RMSE: 0.2522, MAE: 0.2654, Correlación: 0.2267
Compuesto C6H6, Paso 4 - MSE: 0.0673, RMSE: 0.2521, MAE: 0.2651, Correlación: 0.2287
Compuesto C6H6, Paso 5 - MSE: 0.0676, RMSE: 0.2527, MAE: 0.2660, Correlación: 0.2259
Compuesto C6H6, Paso 6 - MSE: 0.0680, RMSE: 0.2537, MAE: 0.2675, Correlación: 0.2190
Compuesto CO, Paso 1 - MSE: 0.0150, RMSE: 0.1205, MAE: 0.1198, Correlación: 0.1088
Compuesto CO, Paso 2 - MSE: 0.0151, RMSE: 0.1206, MAE: 0.1199, Correlación: 0.1105
Compuesto CO, Paso 3 - MSE: 0.0151, RMSE: 0.1206, MAE: 0.1200, Correlación: 0.1121
Compuesto CO, Paso 4 - MSE: 0.0151, RMSE: 0.1207, MAE: 0.1200, Correlación: 0.1128
Compuesto CO, Paso 5 - MSE: 0.0151, RMSE: 0.1207, MAE: 0.1201, Correlación: 0.1122
Compuesto CO, Paso 6 - MSE: 0.0151, RMSE: 0.1207, MAE: 0.1201, Correlación:

#### 1

In [None]:
model.load_state_dict(torch.load('best_model.pth'))
import numpy as np
import torch

def calculate_metrics(outputs, targets):
    mse = criterion(outputs, targets)
    rmse = torch.sqrt(mse)
    mae = torch.mean(torch.abs(outputs - targets))

    outputs_np = outputs.detach().cpu().numpy()
    targets_np = targets.detach().cpu().numpy()

    if np.std(outputs_np) > 0 and np.std(targets_np) > 0:
        correlation = np.corrcoef(outputs_np.ravel(), targets_np.ravel())[0, 1]
    else:
        correlation = np.nan
    return mse.item(), rmse.item(), mae.item(), correlation

nombres_compuestos = ["C6H6", "CO", "NO2", "NOx", "O3", "PM10", "PM25", "SO2"]


def test(model, test_loader, num_compounds, num_steps):
    model.eval()
    device = next(model.parameters()).device  
    metrics_per_compound_step = [[[] for _ in range(num_steps)] for _ in range(num_compounds)]

    with torch.no_grad():
        for data in test_loader:
            data = data.to(device)
            
            out = model(data.x)
            out = out.view(-1, num_steps, num_compounds).to(device)
            y = data.y.view(-1, num_steps, num_compounds).to(device)
            for i in range(num_compounds):
                for j in range(num_steps):
                    mse, rmse, mae, corr = calculate_metrics(out[:, j, i], y[:, j, i])
                    metrics_per_compound_step[i][j].append((mse, rmse, mae, corr))

    averaged_metrics = [[np.mean(metrics, axis=0) for metrics in compound_metrics] for compound_metrics in metrics_per_compound_step]
    return averaged_metrics

num_compounds = 8  
num_steps = 6      
averaged_metrics = test(model, test_loader, num_compounds, num_steps)

for i, compound_metrics in enumerate(averaged_metrics):
    compound_name = nombres_compuestos[i]
    for j, step_metrics in enumerate(compound_metrics):
        mse, rmse, mae, corr = step_metrics
        print(f"Compuesto {compound_name}, Paso {j+1} - MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, Correlación: {corr:.4f}")

Compuesto C6H6, Paso 1 - MSE: 0.0721, RMSE: 0.2641, MAE: 0.2774, Correlación: 0.1228
Compuesto C6H6, Paso 2 - MSE: 0.0697, RMSE: 0.2603, MAE: 0.2754, Correlación: 0.1212
Compuesto C6H6, Paso 3 - MSE: 0.0717, RMSE: 0.2635, MAE: 0.2774, Correlación: 0.1147
Compuesto C6H6, Paso 4 - MSE: 0.0703, RMSE: 0.2612, MAE: 0.2769, Correlación: 0.1113
Compuesto C6H6, Paso 5 - MSE: 0.0705, RMSE: 0.2616, MAE: 0.2771, Correlación: 0.1058
Compuesto C6H6, Paso 6 - MSE: 0.0714, RMSE: 0.2630, MAE: 0.2779, Correlación: 0.1029
Compuesto CO, Paso 1 - MSE: 0.0142, RMSE: 0.1180, MAE: 0.1163, Correlación: 0.1830
Compuesto CO, Paso 2 - MSE: 0.0143, RMSE: 0.1186, MAE: 0.1197, Correlación: 0.0924
Compuesto CO, Paso 3 - MSE: 0.0139, RMSE: 0.1170, MAE: 0.1176, Correlación: 0.1714
Compuesto CO, Paso 4 - MSE: 0.0144, RMSE: 0.1194, MAE: 0.1230, Correlación: 0.1584
Compuesto CO, Paso 5 - MSE: 0.0140, RMSE: 0.1174, MAE: 0.1183, Correlación: 0.1609
Compuesto CO, Paso 6 - MSE: 0.0144, RMSE: 0.1187, MAE: 0.1176, Correlación: