# THIS IS A TEST FOR THE HGPSL THEN DISCARDED AS OUT OF SCOPE FOR THE PROJECT 

In [4]:
from ACAgraphML.Dataset import ZINC_Dataset
from ACAgraphML.HGPSL import HGPSLModel
from ACAgraphML.Transforms import OneHotEncodeFeat
import torch

In [5]:
import os
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.loader import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint
from typing import Literal

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
CHECKPOINT_PATH = './saved_models/HGPSL/'
AVAIL_GPUS = min(1, torch.cuda.device_count()) if torch.cuda.is_available() else 0
BATCH_SIZE = 256 if AVAIL_GPUS else 64

# Setting the seed
pl.seed_everything(42)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Create Checkpoint path
os.makedirs(CHECKPOINT_PATH, exist_ok=True)

Seed set to 42


In [7]:
# Dataset configuration
NUM_NODE_FEATS = 28  # Number of node features in ZINC dataset
NUM_EDGE_FEATS = 4   # Number of edge features (bond types: 0, 1, 2, 3 where 0 is padding)
oneHotTransform = OneHotEncodeFeat(NUM_NODE_FEATS)

# Custom transform to ensure float type and process edge attributes
def ensure_float_transform(data):
    data = oneHotTransform(data)
    data.x = data.x.float()  # Ensure node features are float
    # Convert edge attributes to one-hot encoding for bond types
    data.edge_attr = torch.nn.functional.one_hot(data.edge_attr.long(), num_classes=NUM_EDGE_FEATS).float()
    return data

# Load datasets
train_dataset = ZINC_Dataset.SMALL_TRAIN.load(transform=ensure_float_transform)
val_dataset = ZINC_Dataset.SMALL_VAL.load(transform=ensure_float_transform)

# Create data loaders
graph_train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
graph_val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

print(f"Number of training graphs: {len(train_dataset)}")
print(f"Number of validation graphs: {len(val_dataset)}")
print(f"Node feature shape: {train_dataset[0].x.shape}")
print(f"Edge attribute shape after transform: {train_dataset[0].edge_attr.shape}")
print(f"Target shape: {train_dataset[0].y.shape}")

Number of training graphs: 10000
Number of validation graphs: 1000
Node feature shape: torch.Size([29, 28])
Edge attribute shape after transform: torch.Size([64, 4])
Target shape: torch.Size([1])


In [8]:
# Configuration class for HGPSL model (since it requires args object)
class HGPSLConfig:
    def __init__(self):
        self.num_features = NUM_NODE_FEATS
        self.nhid = 128  # Hidden dimension
        self.num_classes = 1  # Regression task
        self.pooling_ratio = 0.5  # Pooling ratio for hierarchical pooling
        self.dropout_ratio = 0.5  # Dropout ratio
        self.sample_neighbor = True  # Whether to sample neighbors
        self.sparse_attention = True  # Whether to use sparse attention
        self.structure_learning = True  # Whether to use structure learning
        self.lamb = 1.0  # Lambda parameter for structure learning

# Create configuration
hgpsl_config = HGPSLConfig()
print("HGPSL Configuration:")
for attr, value in vars(hgpsl_config).items():
    print(f"  {attr}: {value}")

HGPSL Configuration:
  num_features: 28
  nhid: 128
  num_classes: 1
  pooling_ratio: 0.5
  dropout_ratio: 0.5
  sample_neighbor: True
  sparse_attention: True
  structure_learning: True
  lamb: 1.0


In [9]:
class HGPSLLightningModule(pl.LightningModule):
    def __init__(self, args, loss: Literal['mse', 'mae'] = 'mae', lr: float = 1e-3, weight_decay: float = 0.0):
        super().__init__()
        # Saving hyperparameters
        self.save_hyperparameters()
        
        # Initialize HGPSL model
        self.model = HGPSLModel(args)
        
        # Loss function
        if loss == 'mae':
            self.loss_module = nn.L1Loss()
        elif loss == 'mse':
            self.loss_module = nn.MSELoss()
        else:
            raise ValueError(f"Unsupported loss type: {loss}")
        
        self.lr = lr
        self.weight_decay = weight_decay

    def forward(self, data, mode="train"):
        # HGPSL expects log_softmax output, but we need regression output
        # We'll need to modify the forward pass slightly
        x = self.model(data)
        
        # Since HGPSL outputs log_softmax for classification, 
        # we need to modify it for regression
        # For now, let's just remove the log_softmax and treat it as regression
        x = x.squeeze(dim=-1) if x.dim() > 1 else x
        
        if hasattr(data, 'y'):
            loss = self.loss_module(x, data.y)
            return x, loss
        else:
            return x

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        return optimizer

    def training_step(self, batch, batch_idx):
        loss = self.forward(batch, mode="train")[1]
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        y, loss = self.forward(batch, mode="val")
        self.log("val_loss", loss)
        mae = nn.L1Loss()(y, batch.y)
        self.log("val_mae", mae)
        return loss

    def test_step(self, batch, batch_idx):
        y, loss = self.forward(batch, mode="test")
        self.log("test_loss", loss, prog_bar=True)
        mae = nn.L1Loss()(y, batch.y)   
        self.log("test_mae", mae, prog_bar=True)
        return {"test_loss": loss, "test_mae": mae}

In [10]:
from ACAgraphML.HGPSL.layers import GCN, HGPSLPool
from torch_geometric.nn import GCNConv, global_mean_pool as gap, global_max_pool as gmp

class HGPSLRegressionModel(torch.nn.Module):
    """Modified HGPSL model for regression tasks"""
    def __init__(self, args):
        super(HGPSLRegressionModel, self).__init__()
        self.args = args
        self.num_features = args.num_features
        self.nhid = args.nhid
        self.num_classes = args.num_classes
        self.pooling_ratio = args.pooling_ratio
        self.dropout_ratio = args.dropout_ratio
        self.sample = args.sample_neighbor
        self.sparse = args.sparse_attention
        self.sl = args.structure_learning
        self.lamb = args.lamb

        self.conv1 = GCNConv(self.num_features, self.nhid)
        self.conv2 = GCN(self.nhid, self.nhid)
        self.conv3 = GCN(self.nhid, self.nhid)

        self.pool1 = HGPSLPool(self.nhid, self.pooling_ratio,
                               self.sample, self.sparse, self.sl, self.lamb)
        self.pool2 = HGPSLPool(self.nhid, self.pooling_ratio,
                               self.sample, self.sparse, self.sl, self.lamb)

        self.lin1 = torch.nn.Linear(self.nhid * 2, self.nhid)
        self.lin2 = torch.nn.Linear(self.nhid, self.nhid // 2)
        self.lin3 = torch.nn.Linear(self.nhid // 2, self.num_classes)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        edge_attr = None

        x = F.relu(self.conv1(x, edge_index, edge_attr))
        x, edge_index, edge_attr, batch = self.pool1(
            x, edge_index, edge_attr, batch)
        x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv2(x, edge_index, edge_attr))
        x, edge_index, edge_attr, batch = self.pool2(
            x, edge_index, edge_attr, batch)
        x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv3(x, edge_index, edge_attr))
        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(x1) + F.relu(x2) + F.relu(x3)

        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=self.dropout_ratio, training=self.training)
        x = F.relu(self.lin2(x))
        x = F.dropout(x, p=self.dropout_ratio, training=self.training)
        # For regression, we don't need log_softmax, just linear output
        x = self.lin3(x)

        return x

In [11]:
class HGPSLRegressionLightningModule(pl.LightningModule):
    def __init__(self, args, loss: Literal['mse', 'mae'] = 'mae', lr: float = 1e-3, weight_decay: float = 0.0):
        super().__init__()
        # Saving hyperparameters
        self.save_hyperparameters()
        
        # Initialize HGPSL regression model
        self.model = HGPSLRegressionModel(args)
        
        # Loss function
        if loss == 'mae':
            self.loss_module = nn.L1Loss()
        elif loss == 'mse':
            self.loss_module = nn.MSELoss()
        else:
            raise ValueError(f"Unsupported loss type: {loss}")
        
        self.lr = lr
        self.weight_decay = weight_decay

    def forward(self, data, mode="train"):
        x = self.model(data)
        x = x.squeeze(dim=-1) if x.dim() > 1 else x
        
        if hasattr(data, 'y'):
            loss = self.loss_module(x, data.y)
            return x, loss
        else:
            return x

    def configure_optimizers(self):
        optimizer = optim.AdamW(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        return optimizer

    def training_step(self, batch, batch_idx):
        loss = self.forward(batch, mode="train")[1]
        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        y, loss = self.forward(batch, mode="val")
        self.log("val_loss", loss)
        mae = nn.L1Loss()(y, batch.y)
        self.log("val_mae", mae)
        return loss

    def test_step(self, batch, batch_idx):
        y, loss = self.forward(batch, mode="test")
        self.log("test_loss", loss, prog_bar=True)
        mae = nn.L1Loss()(y, batch.y)   
        self.log("test_mae", mae, prog_bar=True)
        return {"test_loss": loss, "test_mae": mae}

In [12]:
def train_hgpsl_model(config, max_epochs=100, loss='mae', lr=1e-3, weight_decay=0.0):
    """
    Train HGPSL model with PyTorch Lightning
    
    Args:
        config: HGPSLConfig object with model parameters
        max_epochs: Maximum number of epochs to train
        loss: Loss function ('mae' or 'mse')
        lr: Learning rate
        weight_decay: Weight decay for optimizer
    
    Returns:
        model: Trained model
        result: Dictionary with training results
    """
    pl.seed_everything(42)

    # Create a PyTorch Lightning trainer
    root_dir = os.path.join(CHECKPOINT_PATH,
                            f"HGPSL_nhid_{config.nhid}" +
                            f"_loss_{loss}" +
                            f"_pooling_{config.pooling_ratio}" +
                            f"_dropout_{config.dropout_ratio}" +
                            f"_lr_{lr}" +
                            f"_wd_{weight_decay}")
    
    os.makedirs(root_dir, exist_ok=True)
    
    trainer = pl.Trainer(
        default_root_dir=root_dir,
        callbacks=[ModelCheckpoint(save_weights_only=True, mode="min", monitor="val_loss")],
        accelerator="cpu" if AVAIL_GPUS == 0 else "gpu",
        devices=max(1, AVAIL_GPUS),
        max_epochs=max_epochs,
        enable_progress_bar=True,
    )
    trainer.logger._default_hp_metric = None

    pl.seed_everything(42)
    model = HGPSLRegressionLightningModule(
        args=config,
        loss=loss,
        lr=lr,
        weight_decay=weight_decay
    )
    
    # Train the model
    trainer.fit(model, graph_train_loader, graph_val_loader)
    
    # Load best model
    model = HGPSLRegressionLightningModule.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)

    # Test best model on training and validation sets
    train_result = trainer.test(model, dataloaders=graph_train_loader, verbose=False)
    val_result = trainer.test(model, dataloaders=graph_val_loader, verbose=False)
    
    result = {
        "train": train_result[0]["test_loss"], 
        "val": val_result[0]["test_loss"],
        "train_mae": train_result[0]["test_mae"],
        "val_mae": val_result[0]["test_mae"]
    }
    
    return model, result

In [15]:
# Test the model with basic configuration - Quick test first
print("Testing HGPSL model with minimal configuration...")

# Create a smaller config for testing
test_config = HGPSLConfig()
test_config.nhid = 32  # Smaller hidden dimension
test_config.pooling_ratio = 0.8  # Keep more nodes
test_config.dropout_ratio = 0.2  # Less dropout
test_config.structure_learning = False  # Disable structure learning for faster testing

print("Test Configuration:")
for attr, value in vars(test_config).items():
    print(f"  {attr}: {value}")

try:
    model, result = train_hgpsl_model(
        config=test_config,
        max_epochs=5,  # Very few epochs for quick test
        loss='mae',
        lr=1e-3,
        weight_decay=1e-4
    )

    print("\nQuick Test Results:")
    print(f"Train Loss: {result['train']:.4f}, Train MAE: {result['train_mae']:.4f}")
    print(f"Val Loss: {result['val']:.4f}, Val MAE: {result['val_mae']:.4f}")
except Exception as e:
    print(f"Error during quick test: {e}")
    import traceback
    traceback.print_exc()

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42

  | Name        | Type                 | Params | Mode 
-------------------------------------------------------------
0 | model       | HGPSLRegressionModel | 5.8 K  | train
1 | loss_module | L1Loss               | 0      | train
-------------------------------------------------------------
5.8 K     Trainable params
0         Non-trainable params
5.8 K     Total params
0.023     Total estimated model params size (MB)
20        Modules in train mode
0         Modules in eval mode
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42

  | Name        | Type                 | Params | Mode 
-------------------------------------------------------------
0 | model       | HGPSLRegressionModel | 5.8 K  | train
1 | loss_module | L1Loss               | 0      | train
----------------

Testing HGPSL model with minimal configuration...
Test Configuration:
  num_features: 28
  nhid: 32
  num_classes: 1
  pooling_ratio: 0.8
  dropout_ratio: 0.2
  sample_neighbor: True
  sparse_attention: True
  structure_learning: False
  lamb: 1.0


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.
c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:476: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:476: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\trainer\conne

Testing: |          | 0/? [00:00<?, ?it/s]

c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\utilities\data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1516. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\utilities\data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1463. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\utilities\data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1427. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\utilities\data.py:79: Trying to infer the `batch_size` from an ambiguous collection.

Testing: |          | 0/? [00:00<?, ?it/s]


Quick Test Results:
Train Loss: 0.8067, Train MAE: 0.8067
Val Loss: 0.7718, Val MAE: 0.7718


In [16]:
# Now let's train with a more reasonable configuration
print("\n" + "="*60)
print("TRAINING WITH OPTIMIZED CONFIGURATION")
print("="*60)

# Create optimized config for better performance
optimized_config = HGPSLConfig()
optimized_config.nhid = 64  # Reasonable hidden dimension
optimized_config.pooling_ratio = 0.6  # Moderate pooling
optimized_config.dropout_ratio = 0.3  # Moderate dropout
optimized_config.structure_learning = True  # Enable structure learning
optimized_config.sparse_attention = True  # Keep sparse attention
optimized_config.sample_neighbor = True  # Sample neighbors for efficiency

print("Optimized Configuration:")
for attr, value in vars(optimized_config).items():
    print(f"  {attr}: {value}")

print("\nStarting optimized training...")
optimized_model, optimized_result = train_hgpsl_model(
    config=optimized_config,
    max_epochs=25,  # Reasonable number of epochs
    loss='mae',
    lr=1e-3,
    weight_decay=1e-4
)

print("\nOptimized Training Results:")
print(f"Train Loss: {optimized_result['train']:.4f}, Train MAE: {optimized_result['train_mae']:.4f}")
print(f"Val Loss: {optimized_result['val']:.4f}, Val MAE: {optimized_result['val_mae']:.4f}")

# Store the best model for later use
best_model = optimized_model

Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42

  | Name        | Type                 | Params | Mode 
-------------------------------------------------------------
0 | model       | HGPSLRegressionModel | 20.8 K | train
1 | loss_module | L1Loss               | 0      | train
-------------------------------------------------------------
20.8 K    Trainable params
0         Non-trainable params
20.8 K    Total params
0.083     Total estimated model params size (MB)
20        Modules in train mode
0         Modules in eval mode

  | Name        | Type                 | Params | Mode 
-------------------------------------------------------------
0 | model       | HGPSLRegressionModel | 20.8 K | train
1 | loss_module | L1Loss               | 0      | train
----------------


TRAINING WITH OPTIMIZED CONFIGURATION
Optimized Configuration:
  num_features: 28
  nhid: 64
  num_classes: 1
  pooling_ratio: 0.6
  dropout_ratio: 0.3
  sample_neighbor: True
  sparse_attention: True
  structure_learning: True
  lamb: 1.0

Starting optimized training...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=25` reached.


Testing: |          | 0/? [00:00<?, ?it/s]

c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\utilities\data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1413. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\utilities\data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1403. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\utilities\data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 1424. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
c:\Polimi\Master\2Sem\ACA_GraphML_Project\.conda\Lib\site-packages\pytorch_lightning\utilities\data.py:79: Trying to infer the `batch_size` from an ambiguous collection.

Testing: |          | 0/? [00:00<?, ?it/s]


Optimized Training Results:
Train Loss: 0.7217, Train MAE: 0.7217
Val Loss: 0.7040, Val MAE: 0.7040


## HGPSL Model Overview

The **Hierarchical Graph Pooling with Structure Learning (HGPSL)** model is an advanced graph neural network that combines:

### Key Features:
1. **Hierarchical Pooling**: Progressively reduces graph size while preserving important information
2. **Structure Learning**: Can learn new graph connections beyond the original structure
3. **Information Score**: Uses node information scores to determine which nodes to keep during pooling
4. **Multi-level Representations**: Combines features from different levels of the hierarchy

### Performance Characteristics:
- **Computational Cost**: Higher than basic GNNs due to structure learning and hierarchical processing
- **Memory Usage**: Moderate, with efficient sparse operations
- **Best For**: Graph-level tasks where hierarchical structure matters (molecular property prediction, etc.)

### Configuration Tips:
- **`nhid`**: Hidden dimension (32-128 for ZINC dataset)
- **`pooling_ratio`**: Fraction of nodes to keep (0.5-0.8 recommended)
- **`structure_learning`**: Enable for better performance, disable for speed
- **`sample_neighbor`**: Keep enabled for large graphs
- **`sparse_attention`**: Keep enabled for efficiency

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

@torch.no_grad()
def compute_predictions(model, data_loader):
    """Compute predictions for a given data loader"""
    model.eval()
    predictions = []
    for batch in data_loader:
        preds = model(batch)
        predictions.append(preds)
    return torch.cat(predictions, dim=0)

@torch.no_grad()
def compute_embeddings(model, data_loader):
    """Compute embeddings before the final linear layers"""
    model.eval()
    embeddings = []
    for batch in data_loader:
        # Get intermediate representations before final linear layers
        x, edge_index, batch_idx = batch.x, batch.edge_index, batch.batch
        edge_attr = None

        # Forward through GNN layers
        x = F.relu(model.model.conv1(x, edge_index, edge_attr))
        x, edge_index, edge_attr, batch_idx = model.model.pool1(x, edge_index, edge_attr, batch_idx)
        x1 = torch.cat([gmp(x, batch_idx), gap(x, batch_idx)], dim=1)

        x = F.relu(model.model.conv2(x, edge_index, edge_attr))
        x, edge_index, edge_attr, batch_idx = model.model.pool2(x, edge_index, edge_attr, batch_idx)
        x2 = torch.cat([gmp(x, batch_idx), gap(x, batch_idx)], dim=1)

        x = F.relu(model.model.conv3(x, edge_index, edge_attr))
        x3 = torch.cat([gmp(x, batch_idx), gap(x, batch_idx)], dim=1)

        x = F.relu(x1) + F.relu(x2) + F.relu(x3)
        embeddings.append(x)
    return torch.cat(embeddings, dim=0)

def plot_predictions(model, train_loader, val_loader, train_dataset, val_dataset):
    """Plot predictions vs true values"""
    train_preds = compute_predictions(model, train_loader)
    val_preds = compute_predictions(model, val_loader)
    
    plt.figure(figsize=(12, 5))
    
    # Training predictions
    plt.subplot(1, 2, 1)
    plt.scatter(train_dataset.y.cpu().numpy(), train_preds.cpu().numpy(), alpha=0.5, s=2)
    plt.plot([train_dataset.y.min(), train_dataset.y.max()], 
             [train_dataset.y.min(), train_dataset.y.max()], 'r--', lw=2)
    plt.xlabel('True Values')
    plt.ylabel('Predictions')
    plt.title('Training Set')
    
    # Validation predictions
    plt.subplot(1, 2, 2)
    plt.scatter(val_dataset.y.cpu().numpy(), val_preds.cpu().numpy(), alpha=0.5, s=2)
    plt.plot([val_dataset.y.min(), val_dataset.y.max()], 
             [val_dataset.y.min(), val_dataset.y.max()], 'r--', lw=2)
    plt.xlabel('True Values')
    plt.ylabel('Predictions')
    plt.title('Validation Set')
    
    plt.tight_layout()
    plt.show()

# Use the best model if available, otherwise use the test model
model_to_plot = best_model if 'best_model' in locals() else model
print(f"Using model for plotting: {type(model_to_plot).__name__}")

In [None]:
# Evaluate and visualize the best model
print("Plotting predictions vs true values...")
model_to_use = best_model if 'best_model' in locals() else model
plot_predictions(model_to_use, graph_train_loader, graph_val_loader, train_dataset, val_dataset)

In [None]:
# Hyperparameter experimentation
print("\n" + "="*50)
print("HYPERPARAMETER EXPERIMENTATION")
print("="*50)

results_comparison = []

# Test different hidden dimensions
for nhid in [64, 128, 256]:
    config = HGPSLConfig()
    config.nhid = nhid
    print(f"\nTesting with hidden dimension: {nhid}")
    
    model_exp, result_exp = train_hgpsl_model(
        config=config,
        max_epochs=30,  # Shorter for experimentation
        loss='mae',
        lr=1e-3,
        weight_decay=1e-4
    )
    
    results_comparison.append({
        'nhid': nhid,
        'val_mae': result_exp['val_mae']
    })
    
    print(f"Val MAE: {result_exp['val_mae']:.4f}")

# Test different pooling ratios
for pooling_ratio in [0.3, 0.5, 0.7]:
    config = HGPSLConfig()
    config.pooling_ratio = pooling_ratio
    print(f"\nTesting with pooling ratio: {pooling_ratio}")
    
    model_exp, result_exp = train_hgpsl_model(
        config=config,
        max_epochs=30,
        loss='mae',
        lr=1e-3,
        weight_decay=1e-4
    )
    
    results_comparison.append({
        'pooling_ratio': pooling_ratio,
        'val_mae': result_exp['val_mae']
    })
    
    print(f"Val MAE: {result_exp['val_mae']:.4f}")

print("\n" + "="*50)
print("EXPERIMENTAL RESULTS SUMMARY")
print("="*50)
for result in results_comparison:
    print(result)

In [None]:
# Visualize embeddings
print("\n" + "="*50)
print("EMBEDDING VISUALIZATION")
print("="*50)

# Compute embeddings
print("Computing embeddings...")
train_embeddings = compute_embeddings(model, graph_train_loader)
val_embeddings = compute_embeddings(model, graph_val_loader)

print(f"Training embeddings shape: {train_embeddings.shape}")
print(f"Validation embeddings shape: {val_embeddings.shape}")

# t-SNE visualization
try:
    from sklearn.manifold import TSNE
    print("Performing t-SNE visualization...")
    tsne = TSNE(n_components=2, random_state=42, verbose=1)
    val_embeddings_2d = tsne.fit_transform(val_embeddings.cpu().numpy())
    
    plt.figure(figsize=(10, 8))
    plt.scatter(val_embeddings_2d[:, 0], val_embeddings_2d[:, 1], 
                c=val_dataset.y.cpu().numpy(), cmap='viridis', s=10)
    plt.colorbar(label='Target Value')
    plt.title('t-SNE Visualization of HGPSL Validation Embeddings')
    plt.xlabel('t-SNE Component 1')
    plt.ylabel('t-SNE Component 2')
    plt.show()
except ImportError:
    print("scikit-learn not available for t-SNE visualization")

# UMAP visualization
try:
    import umap
    print("Performing UMAP visualization...")
    umap_model = umap.UMAP(n_components=2, random_state=42, verbose=True)
    val_embeddings_2d_umap = umap_model.fit_transform(val_embeddings.cpu().numpy())
    
    plt.figure(figsize=(10, 8))
    plt.scatter(val_embeddings_2d_umap[:, 0], val_embeddings_2d_umap[:, 1], 
                c=val_dataset.y.cpu().numpy(), cmap='viridis', s=10)
    plt.colorbar(label='Target Value')
    plt.title('UMAP Visualization of HGPSL Validation Embeddings')
    plt.xlabel('UMAP Component 1')
    plt.ylabel('UMAP Component 2')
    plt.show()
except ImportError:
    print("UMAP not available for visualization")

In [None]:
# Compare with traditional ML models using HGPSL embeddings
print("\n" + "="*50)
print("COMPARISON WITH TRADITIONAL ML MODELS")
print("="*50)

# XGBoost on HGPSL embeddings
try:
    from xgboost import XGBRegressor
    from sklearn.metrics import mean_absolute_error
    
    print("Training XGBoost on HGPSL embeddings...")
    xgb_model = XGBRegressor(
        objective='reg:squarederror',
        n_estimators=1000,
        max_depth=10,
        learning_rate=0.1,
        random_state=42
    )
    
    xgb_model.fit(
        train_embeddings.cpu().numpy(),
        train_dataset.y.cpu().numpy(),
        eval_set=[(val_embeddings.cpu().numpy(), val_dataset.y.cpu().numpy())],
        verbose=False
    )
    
    # Make predictions
    xgb_val_predictions = xgb_model.predict(val_embeddings.cpu().numpy())
    xgb_train_predictions = xgb_model.predict(train_embeddings.cpu().numpy())
    
    # Calculate MAE
    xgb_val_mae = mean_absolute_error(val_dataset.y.cpu().numpy(), xgb_val_predictions)
    xgb_train_mae = mean_absolute_error(train_dataset.y.cpu().numpy(), xgb_train_predictions)
    
    print(f"XGBoost - Train MAE: {xgb_train_mae:.4f}, Val MAE: {xgb_val_mae:.4f}")
    
    # Plot XGBoost predictions
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.scatter(train_dataset.y.cpu().numpy(), xgb_train_predictions, alpha=0.5, s=2, color='blue')
    plt.plot([train_dataset.y.min(), train_dataset.y.max()], 
             [train_dataset.y.min(), train_dataset.y.max()], 'r--', lw=2)
    plt.xlabel('True Values')
    plt.ylabel('XGBoost Predictions')
    plt.title('XGBoost on HGPSL Embeddings - Training')
    
    plt.subplot(1, 2, 2)
    plt.scatter(val_dataset.y.cpu().numpy(), xgb_val_predictions, alpha=0.5, s=2, color='orange')
    plt.plot([val_dataset.y.min(), val_dataset.y.max()], 
             [val_dataset.y.min(), val_dataset.y.max()], 'r--', lw=2)
    plt.xlabel('True Values')
    plt.ylabel('XGBoost Predictions')
    plt.title('XGBoost on HGPSL Embeddings - Validation')
    
    plt.tight_layout()
    plt.show()
    
except ImportError:
    print("XGBoost not available")

# Random Forest on HGPSL embeddings
try:
    from sklearn.ensemble import RandomForestRegressor
    
    print("Training Random Forest on HGPSL embeddings...")
    rf_model = RandomForestRegressor(
        n_estimators=1000,
        max_depth=10,
        random_state=42,
        n_jobs=-1
    )
    
    rf_model.fit(
        train_embeddings.cpu().numpy(),
        train_dataset.y.cpu().numpy()
    )
    
    # Make predictions
    rf_val_predictions = rf_model.predict(val_embeddings.cpu().numpy())
    rf_train_predictions = rf_model.predict(train_embeddings.cpu().numpy())
    
    # Calculate MAE
    rf_val_mae = mean_absolute_error(val_dataset.y.cpu().numpy(), rf_val_predictions)
    rf_train_mae = mean_absolute_error(train_dataset.y.cpu().numpy(), rf_train_predictions)
    
    print(f"Random Forest - Train MAE: {rf_train_mae:.4f}, Val MAE: {rf_val_mae:.4f}")
    
except ImportError:
    print("scikit-learn not available for Random Forest")

In [None]:
# Summary and Conclusions
print("\n" + "="*50)
print("SUMMARY AND CONCLUSIONS")
print("="*50)

print("""
## HGPSL Model Testing Summary

This notebook demonstrates a complete PyTorch Lightning pipeline for testing the HGPSL 
(Hierarchical Graph Pooling with Structure Learning) model on the ZINC molecular dataset.

### Key Components:

1. **PyTorch Lightning Module**: HGPSLRegressionLightningModule
   - Wraps the HGPSL model for regression tasks
   - Handles training and validation loops
   - Supports MAE and MSE loss functions
   - Includes proper optimizer configuration

2. **Modified HGPSL Model**: HGPSLRegressionModel
   - Adapted from the original classification model
   - Removed log_softmax for regression output
   - Maintains hierarchical pooling capabilities

3. **Training Pipeline**: train_hgpsl_model()
   - Automated training with checkpointing
   - Model selection based on validation loss
   - Comprehensive result reporting

4. **Evaluation and Visualization**:
   - Prediction vs true value plots for train/val sets
   - Embedding visualization with t-SNE/UMAP
   - Comparison with traditional ML models
   - Hyperparameter experimentation

### Model Architecture Features:
- Hierarchical graph pooling with attention
- Structure learning capabilities
- Multi-level graph representations
- Global pooling aggregation

### Usage:
```python
# Basic usage
config = HGPSLConfig()
model, results = train_hgpsl_model(config, max_epochs=100)

# With custom parameters
config.nhid = 256
config.pooling_ratio = 0.3
model, results = train_hgpsl_model(config, loss='mae', lr=1e-3)
```

This provides a comprehensive framework for testing and experimenting with HGPSL models
on graph regression tasks using training and validation data.
""")

print("Testing completed successfully!")