# install

In [None]:
!pip install torch-scatter torch-sparse torch-cluster torch-geometric -f https://data.pyg.org/whl/torch-2.0.0+cpu.html

Looking in links: https://data.pyg.org/whl/torch-2.0.0+cpu.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_scatter-2.1.2%2Bpt20cpu-cp310-cp310-linux_x86_64.whl (494 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.1/494.1 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_sparse-0.6.18%2Bpt20cpu-cp310-cp310-linux_x86_64.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-cluster
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_cluster-1.6.3%2Bpt20cpu-cp310-cp310-linux_x86_64.whl (751 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m751.3/751.3 kB[0m [31m22.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K   

In [None]:
!pip install torch-geometric



# encoder_decoder_gnn_Regression Task

In [None]:
!rm -rf /content/drive/*

In [None]:
from google.colab import drive

# Mount Google Drive to a different directory
drive.mount('/content/drive_mount')

Mounted at /content/drive_mount


In [None]:
import torch
import pandas as pd
import numpy as np
from torch_geometric.data import Data
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch.nn as nn
from torch_geometric.nn import SAGEConv
import torch.optim as optim

# Load METR-LA dataset
def load_metr_la_dataset(adj_path, time_series_path):
    adj_data = pd.read_pickle(adj_path)
    if isinstance(adj_data, list):
        adj_matrix = np.array([list(map(float, row)) for row in adj_data[0]], dtype=np.float32)
        node_mapping = adj_data[1]
    elif isinstance(adj_data, dict):
        adj_matrix = adj_data["adjacency_matrix"]
        node_mapping = adj_data["node_mapping"]
    else:
        raise ValueError("Unexpected structure in adj_METR-LA.pkl")

    if len(adj_matrix.shape) == 1:
        num_nodes = len(adj_matrix)
        adj_matrix = np.diag(adj_matrix)
    elif len(adj_matrix.shape) != 2:
        raise ValueError(f"Cannot convert adjacency matrix to 2D. Current shape: {adj_matrix.shape}")

    time_series_data = pd.read_hdf(time_series_path, "df")  # shape: (timesteps, nodes)
    return adj_matrix, node_mapping, time_series_data

# Generate static node features from time-series
def create_node_features(time_series_data, aggregation="mean"):
    if aggregation == "mean":
        node_features = time_series_data.mean(axis=0).values
    else:
        raise ValueError("Unsupported aggregation method.")
    node_features = np.array(node_features, dtype=np.float32)
    node_features = np.expand_dims(node_features, axis=1)
    return torch.tensor(node_features, dtype=torch.float)

# Convert adjacency matrix to edge list for PyTorch Geometric
def adjacency_to_edge_list(adj_matrix):
    if isinstance(adj_matrix, pd.DataFrame):
        adj_matrix = adj_matrix.values

    edge_indices = np.nonzero(adj_matrix)
    edge_index = np.stack(edge_indices, axis=0)
    edge_index = torch.tensor(edge_index, dtype=torch.long)
    return edge_index

# Prepare PyTorch Geometric Data object
def prepare_graph_data(adj_matrix, node_features, node_labels, train_mask, test_mask):
    edge_index = adjacency_to_edge_list(adj_matrix)
    data = Data(
        x=node_features,
        edge_index=edge_index,
        y=torch.tensor(node_labels, dtype=torch.float),
        train_mask=train_mask,
        test_mask=test_mask
    )
    return data

# Define GNNEncoder
class GNNEncoder(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, out_dim):
        super().__init__()
        self.conv1 = SAGEConv(input_dim, hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, out_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x

# Define NodeRegressor
class NodeRegressor(torch.nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc = nn.Linear(input_dim, 1)

    def forward(self, x):
        return self.fc(x)

# Define overall Model
class Model(torch.nn.Module):
    def __init__(self, encoder, regressor):
        super().__init__()
        self.encoder = encoder
        self.regressor = regressor

    def forward(self, data):
        x = self.encoder(data.x, data.edge_index)
        x = self.regressor(x)
        return x

# Training function
def train_model(model, data, optimizer, criterion, epochs=100, device="cpu"):
    model.to(device)
    data = data.to(device)
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out[data.train_mask], data.y[data.train_mask].unsqueeze(-1))
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

# Evaluation function with accuracy metrics added
def evaluate_model(model, data, device="cpu", accuracy_threshold=0.05):
    model.eval()
    data = data.to(device)
    with torch.no_grad():
        out = model(data)
    pred = out.squeeze().cpu().numpy()
    true = data.y.cpu().numpy()

    # Calculate MAPE (Mean Absolute Percentage Error)
    def mape(y_true, y_pred):
        return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

    # Calculate accuracy (based on a threshold for "correct" predictions)
    def accuracy(y_true, y_pred, threshold=accuracy_threshold):
        return np.mean(np.abs((y_true - y_pred) / y_true) < threshold) * 100

    mse_train = mean_squared_error(true[data.train_mask], pred[data.train_mask])
    mae_train = mean_absolute_error(true[data.train_mask], pred[data.train_mask])
    mape_train = mape(true[data.train_mask], pred[data.train_mask])
    r2_train = r2_score(true[data.train_mask], pred[data.train_mask])
    acc_train = accuracy(true[data.train_mask], pred[data.train_mask])

    mse_test = mean_squared_error(true[data.test_mask], pred[data.test_mask])
    mae_test = mean_absolute_error(true[data.test_mask], pred[data.test_mask])
    mape_test = mape(true[data.test_mask], pred[data.test_mask])
    r2_test = r2_score(true[data.test_mask], pred[data.test_mask])
    acc_test = accuracy(true[data.test_mask], pred[data.test_mask])

    # Print metrics
    print(f"Train MSE: {mse_train:.4f}, MAE: {mae_train:.4f}, MAPE: {mape_train:.4f}, R²: {r2_train:.4f}, Accuracy: {acc_train:.4f}%")
    print(f"Test MSE: {mse_test:.4f}, MAE: {mae_test:.4f}, MAPE: {mape_test:.4f}, R²: {r2_test:.4f}, Accuracy: {acc_test:.4f}%")

# Paths to METR-LA dataset
adj_path = "/content/drive_mount/MyDrive/archive (29)/adj_METR-LA.pkl"
time_series_path = "/content/drive_mount/MyDrive/archive (29)/METR-LA.h5"

# Load dataset
adj_matrix, node_mapping, time_series_data = load_metr_la_dataset(adj_path, time_series_path)

# Create node features (mean of time series)
node_features = create_node_features(time_series_data, aggregation="mean")

# Create node labels (standard deviation of time series)
node_labels = time_series_data.std(axis=0).values

# Define train and test masks
num_nodes = node_features.shape[0]
train_ratio = 0.8
train_size = int(train_ratio * num_nodes)
perm = torch.randperm(num_nodes)
train_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[perm[:train_size]] = True
test_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask[perm[train_size:]] = True

# Prepare graph data
graph_data = prepare_graph_data(adj_matrix, node_features, node_labels, train_mask, test_mask)

# Define model components
input_dim = node_features.size(1)
hidden_dim = 64
out_dim = 32

encoder = GNNEncoder(input_dim, hidden_dim, out_dim)
regressor = NodeRegressor(out_dim)
model = Model(encoder, regressor)

# Define optimizer and criterion
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.MSELoss()

# Determine device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Train and evaluate the model
train_model(model, graph_data, optimizer, criterion, epochs=100, device=device)
evaluate_model(model, graph_data, device=device)


Epoch 1/100, Loss: 373.2015
Epoch 2/100, Loss: 125.8073
Epoch 3/100, Loss: 236.5612
Epoch 4/100, Loss: 65.2744
Epoch 5/100, Loss: 16.7371
Epoch 6/100, Loss: 83.4525
Epoch 7/100, Loss: 111.6744
Epoch 8/100, Loss: 76.3283
Epoch 9/100, Loss: 30.1711
Epoch 10/100, Loss: 12.3061
Epoch 11/100, Loss: 24.9665
Epoch 12/100, Loss: 44.8249
Epoch 13/100, Loss: 49.6002
Epoch 14/100, Loss: 37.5844
Epoch 15/100, Loss: 21.6281
Epoch 16/100, Loss: 13.0486
Epoch 17/100, Loss: 14.2195
Epoch 18/100, Loss: 20.7556
Epoch 19/100, Loss: 26.9118
Epoch 20/100, Loss: 29.1794
Epoch 21/100, Loss: 26.9555
Epoch 22/100, Loss: 21.8270
Epoch 23/100, Loss: 16.3475
Epoch 24/100, Loss: 12.8693
Epoch 25/100, Loss: 12.5793
Epoch 26/100, Loss: 14.8445
Epoch 27/100, Loss: 17.5955
Epoch 28/100, Loss: 18.8710
Epoch 29/100, Loss: 17.9922
Epoch 30/100, Loss: 15.6990
Epoch 31/100, Loss: 13.4456
Epoch 32/100, Loss: 12.3494
Epoch 33/100, Loss: 12.6246
Epoch 34/100, Loss: 13.7163
Epoch 35/100, Loss: 14.7945
Epoch 36/100, Loss: 15.22

# Encoder_decoder_ClassificationTask

In [None]:
import torch
import pandas as pd
import numpy as np
from torch_geometric.data import Data
from sklearn.metrics import accuracy_score
import torch.nn as nn
from torch_geometric.nn import SAGEConv
import torch.optim as optim

# Load METR-LA dataset
def load_metr_la_dataset(adj_path, time_series_path):
    adj_data = pd.read_pickle(adj_path)
    if isinstance(adj_data, list):
        adj_matrix = np.array([list(map(float, row)) for row in adj_data[0]], dtype=np.float32)
        node_mapping = adj_data[1]
    elif isinstance(adj_data, dict):
        adj_matrix = adj_data["adjacency_matrix"]
        node_mapping = adj_data["node_mapping"]
    else:
        raise ValueError("Unexpected structure in adj_METR-LA.pkl")

    if len(adj_matrix.shape) == 1:
        num_nodes = len(adj_matrix)
        adj_matrix = np.diag(adj_matrix)
    elif len(adj_matrix.shape) != 2:
        raise ValueError(f"Cannot convert adjacency matrix to 2D. Current shape: {adj_matrix.shape}")

    time_series_data = pd.read_hdf(time_series_path, "df")  # shape: (timesteps, nodes)
    return adj_matrix, node_mapping, time_series_data

# Generate static node features from time-series
def create_node_features(time_series_data, aggregation="mean"):
    if aggregation == "mean":
        node_features = time_series_data.mean(axis=0).values
    else:
        raise ValueError("Unsupported aggregation method.")
    node_features = np.array(node_features, dtype=np.float32)
    node_features = np.expand_dims(node_features, axis=1)
    return torch.tensor(node_features, dtype=torch.float)

# Convert adjacency matrix to edge list for PyTorch Geometric
def adjacency_to_edge_list(adj_matrix):
    if isinstance(adj_matrix, pd.DataFrame):
        adj_matrix = adj_matrix.values

    edge_indices = np.nonzero(adj_matrix)
    edge_index = np.stack(edge_indices, axis=0)
    edge_index = torch.tensor(edge_index, dtype=torch.long)
    return edge_index

# Prepare PyTorch Geometric Data object
def prepare_graph_data(adj_matrix, node_features, node_labels, train_mask, test_mask):
    edge_index = adjacency_to_edge_list(adj_matrix)
    data = Data(
        x=node_features,
        edge_index=edge_index,
        y=torch.tensor(node_labels, dtype=torch.long),
        train_mask=train_mask,
        test_mask=test_mask
    )
    return data

# Define GNNEncoder
class GNNEncoder(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, out_dim):
        super().__init__()
        self.conv1 = SAGEConv(input_dim, hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, out_dim)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x

# Define NodeClassifier
class NodeClassifier(torch.nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.fc = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        return self.fc(x)

# Define overall Model
class Model(torch.nn.Module):
    def __init__(self, encoder, classifier):
        super().__init__()
        self.encoder = encoder
        self.classifier = classifier

    def forward(self, data):
        x = self.encoder(data.x, data.edge_index)
        x = self.classifier(x)
        return x

# Training function
def train_model(model, data, optimizer, criterion, epochs=100, device="cpu"):
    model.to(device)
    data = data.to(device)
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

# Evaluation function
def evaluate_model(model, data, device="cpu"):
    model.eval()
    data = data.to(device)
    with torch.no_grad():
        out = model(data)
    pred = out.argmax(dim=1).cpu()
    true = data.y.cpu()

    acc_train = accuracy_score(true[data.train_mask], pred[data.train_mask])
    acc_test = accuracy_score(true[data.test_mask], pred[data.test_mask])

    print(f"Train Accuracy: {acc_train:.4f}")
    print(f"Test Accuracy: {acc_test:.4f}")

# Paths to METR-LA dataset
adj_path = "/content/drive_mount/MyDrive/archive (29)/adj_METR-LA.pkl"
time_series_path = "/content/drive_mount/MyDrive/archive (29)/METR-LA.h5"

# Load dataset
adj_matrix, node_mapping, time_series_data = load_metr_la_dataset(adj_path, time_series_path)

# Create node features (mean of time series)
node_features = create_node_features(time_series_data, aggregation="mean")

# Create node labels (standard deviation of time series)
node_labels = time_series_data.std(axis=0).values

# Convert node labels to discrete classes
num_classes = 3
node_labels = pd.qcut(node_labels, q=num_classes, labels=False)



In [None]:
# Define train and test masks
num_nodes = node_features.shape[0]
train_ratio = 0.8
train_size = int(train_ratio * num_nodes)
perm = torch.randperm(num_nodes)
train_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[perm[:train_size]] = True
test_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask[perm[train_size:]] = True

# Prepare graph data
graph_data = prepare_graph_data(adj_matrix, node_features, node_labels, train_mask, test_mask)

# Define model components
input_dim = node_features.size(1)
hidden_dim = 64
out_dim = 32
num_classes = len(np.unique(node_labels))

encoder = GNNEncoder(input_dim, hidden_dim, out_dim)
classifier = NodeClassifier(out_dim, num_classes)
model = Model(encoder, classifier)

# Define optimizer and criterion
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.CrossEntropyLoss()

# Determine device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Train and evaluate the model
train_model(model, graph_data, optimizer, criterion, epochs=100, device=device)
evaluate_model(model, graph_data, device=device)

Epoch 1/100, Loss: 5.7422
Epoch 2/100, Loss: 16.7089
Epoch 3/100, Loss: 18.3224
Epoch 4/100, Loss: 15.6903
Epoch 5/100, Loss: 10.3265
Epoch 6/100, Loss: 7.8031
Epoch 7/100, Loss: 4.3610
Epoch 8/100, Loss: 2.9355
Epoch 9/100, Loss: 4.8440
Epoch 10/100, Loss: 3.4965
Epoch 11/100, Loss: 1.3875
Epoch 12/100, Loss: 2.2390
Epoch 13/100, Loss: 2.7147
Epoch 14/100, Loss: 3.1564
Epoch 15/100, Loss: 2.6710
Epoch 16/100, Loss: 2.5091
Epoch 17/100, Loss: 1.7229
Epoch 18/100, Loss: 1.3881
Epoch 19/100, Loss: 1.7822
Epoch 20/100, Loss: 2.0783
Epoch 21/100, Loss: 1.7735
Epoch 22/100, Loss: 1.3553
Epoch 23/100, Loss: 1.4350
Epoch 24/100, Loss: 1.3148
Epoch 25/100, Loss: 1.5452
Epoch 26/100, Loss: 1.5196
Epoch 27/100, Loss: 1.2639
Epoch 28/100, Loss: 1.3432
Epoch 29/100, Loss: 1.1697
Epoch 30/100, Loss: 1.2390
Epoch 31/100, Loss: 1.3610
Epoch 32/100, Loss: 1.2848
Epoch 33/100, Loss: 1.1567
Epoch 34/100, Loss: 1.2090
Epoch 35/100, Loss: 1.1816
Epoch 36/100, Loss: 1.1196
Epoch 37/100, Loss: 1.2004
Epoch 

# time series_lstm

In [None]:
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load METR-LA dataset
def load_metr_la_dataset(time_series_path):
    time_series_data = pd.read_hdf(time_series_path, "df")  # shape: (timesteps, nodes)
    return time_series_data

# Generate time series data
def create_time_series_data(time_series_data, seq_length):
    data = []
    labels = []
    for i in range(len(time_series_data) - seq_length):
        data.append(time_series_data.iloc[i:i+seq_length].values)
        labels.append(time_series_data.iloc[i+seq_length].values)
    return np.array(data), np.array(labels)

# Custom Dataset class
class TimeSeriesDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float), torch.tensor(self.labels[idx], dtype=torch.float)

# Define LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(LSTMModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).to(x.device)

        # Forward pass through LSTM
        out, (hn, cn) = self.lstm(x, (h0, c0))

        # Only take the output from the last time step
        out = self.fc(out[:, -1, :])
        return out

# Training function
def train_model(model, train_loader, optimizer, criterion, epochs=10, device="cpu"):
    model.to(device)
    model.train()
    for epoch in range(epochs):
        for data, labels in train_loader:
            data, labels = data.to(device), labels.to(device)
            optimizer.zero_grad()
            out = model(data)
            loss = criterion(out, labels)
            loss.backward()
            optimizer.step()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

# Evaluation function
def evaluate_model(model, test_loader, device="cpu"):
    model.eval()
    model.to(device)
    predictions = []
    true_labels = []
    with torch.no_grad():
        for data, labels in test_loader:
            data, labels = data.to(device), labels.to(device)
            out = model(data)
            predictions.append(out.cpu().numpy())
            true_labels.append(labels.cpu().numpy())

    predictions = np.concatenate(predictions, axis=0)
    true_labels = np.concatenate(true_labels, axis=0)

    mse = mean_squared_error(true_labels, predictions)
    mae = mean_absolute_error(true_labels, predictions)
    r2 = r2_score(true_labels, predictions)

    print(f"Test MSE: {mse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}")

# Paths to METR-LA dataset
time_series_path = "/content/drive_mount/MyDrive/archive (29)/METR-LA.h5"

# Load dataset
time_series_data = load_metr_la_dataset(time_series_path)

# Create time series data
seq_length = 12  # Sequence length (e.g., predicting the next value based on the last 12)

# Create data and labels
data, labels = create_time_series_data(time_series_data, seq_length)

# Split data into train and test sets
train_ratio = 0.8
train_size = int(len(data) * train_ratio)
train_data, test_data = data[:train_size], data[train_size:]
train_labels, test_labels = labels[:train_size], labels[train_size:]

# Create DataLoader
train_dataset = TimeSeriesDataset(train_data, train_labels)
test_dataset = TimeSeriesDataset(test_data, test_labels)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define model components
input_dim = time_series_data.shape[1]  # Number of nodes
hidden_dim = 64
output_dim = time_series_data.shape[1]  # Number of nodes
num_layers = 2

model = LSTMModel(input_dim, hidden_dim, output_dim, num_layers)

# Define optimizer and criterion
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = nn.MSELoss()

# Determine device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Train and evaluate the model
train_model(model, train_loader, optimizer, criterion, epochs=100, device=device)
evaluate_model(model, test_loader, device=device)


Epoch 1/100, Loss: 343.0768
Epoch 2/100, Loss: 160.7804
Epoch 3/100, Loss: 161.7779
Epoch 4/100, Loss: 169.6268
Epoch 5/100, Loss: 255.8895
Epoch 6/100, Loss: 205.2028
Epoch 7/100, Loss: 206.7097
Epoch 8/100, Loss: 186.6997
Epoch 9/100, Loss: 196.8815
Epoch 10/100, Loss: 132.5844
Epoch 11/100, Loss: 195.4702
Epoch 12/100, Loss: 201.1268
Epoch 13/100, Loss: 142.6893
Epoch 14/100, Loss: 244.1316
Epoch 15/100, Loss: 323.9764
Epoch 16/100, Loss: 170.1968
Epoch 17/100, Loss: 187.0440
Epoch 18/100, Loss: 175.3933
Epoch 19/100, Loss: 232.7141
Epoch 20/100, Loss: 203.3325
Epoch 21/100, Loss: 220.6606
Epoch 22/100, Loss: 182.3668
Epoch 23/100, Loss: 198.2322
Epoch 24/100, Loss: 185.6296
Epoch 25/100, Loss: 207.4905
Epoch 26/100, Loss: 170.5513
Epoch 27/100, Loss: 141.5580
Epoch 28/100, Loss: 197.2070
Epoch 29/100, Loss: 279.8860
Epoch 30/100, Loss: 144.4003
Epoch 31/100, Loss: 104.9193
Epoch 32/100, Loss: 378.7969
Epoch 33/100, Loss: 165.6007
Epoch 34/100, Loss: 295.5452
Epoch 35/100, Loss: 169

# xgboost

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
import torch

# Load METR-LA dataset
def load_metr_la_dataset(time_series_path):
    time_series_data = pd.read_hdf(time_series_path, "df")  # shape: (timesteps, nodes)
    print(f"Data loaded with shape: {time_series_data.shape}")
    return time_series_data

# Generate time series data
def create_time_series_data(time_series_data, seq_length):
    data = []
    labels = []
    for i in range(len(time_series_data) - seq_length):
        data.append(time_series_data.iloc[i:i+seq_length].values.flatten())  # Flatten the sequence
        labels.append(time_series_data.iloc[i+seq_length].values)  # Predict the next time step
    print(f"Generated data shape: {np.array(data).shape}")
    print(f"Generated labels shape: {np.array(labels).shape}")
    return np.array(data), np.array(labels)

# Paths to METR-LA dataset
time_series_path = "/content/drive_mount/MyDrive/archive (29)/METR-LA.h5"

# Load dataset
time_series_data = load_metr_la_dataset(time_series_path)

# Create time series data
seq_length = 12  # Sequence length (e.g., predicting the next value based on the last 12)

# Create data and labels
data, labels = create_time_series_data(time_series_data, seq_length)

# Split data into train and test sets
train_ratio = 0.8
train_size = int(len(data) * train_ratio)
train_data, test_data = data[:train_size], data[train_size:]
train_labels, test_labels = labels[:train_size], labels[train_size:]

# Define the model and select device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# If GPU is available, use GPU for training in XGBoost
use_gpu = (device == torch.device("cuda"))

# Initialize the model with GPU support if available
model = XGBRegressor(objective='reg:squarederror', eval_metric='rmse',
                     max_depth=6, learning_rate=0.1, n_estimators=100,
                     tree_method='gpu_hist' if use_gpu else 'auto')

# Ensure data dimensions are correct
print(f"Training data shape: {train_data.shape}")
print(f"Training labels shape: {train_labels.shape}")
print(f"Testing data shape: {test_data.shape}")
print(f"Testing labels shape: {test_labels.shape}")

# Train the model
print("Training the model...")
model.fit(train_data, train_labels)

# Make predictions
print("Making predictions...")
predictions = model.predict(test_data)

# Evaluate the model
mse = mean_squared_error(test_labels, predictions)
mae = mean_absolute_error(test_labels, predictions)
r2 = r2_score(test_labels, predictions)

# Define regression accuracy metric (using a tolerance of 0.1 as an example)
def regression_accuracy(y_true, y_pred, tolerance=0.1):
    correct = np.abs(y_true - y_pred) <= tolerance
    accuracy = np.mean(correct) * 100  # Convert to percentage
    return accuracy

# Calculate regression accuracy
accuracy = regression_accuracy(test_labels, predictions, tolerance=0.1)

# Print the results
print(f"Test MSE: {mse:.4f}")
print(f"Test MAE: {mae:.4f}")
print(f"Test R²: {r2:.4f}")
print(f"Test Regression Accuracy: {accuracy:.2f}%")


Data loaded with shape: (34272, 207)
Generated data shape: (34260, 2484)
Generated labels shape: (34260, 207)
Using device: cuda
Training data shape: (27408, 2484)
Training labels shape: (27408, 207)
Testing data shape: (6852, 2484)
Testing labels shape: (6852, 207)
Training the model...



    E.g. tree_method = "hist", device = "cuda"



Making predictions...



    E.g. tree_method = "hist", device = "cuda"

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




Test MSE: 46.9128
Test MAE: 3.1562
Test R²: 0.9003
Test Regression Accuracy: 3.41%
