In [39]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GINConv, GATConv, global_mean_pool, EdgeConv, TransformerConv
from torch_geometric.loader import DataLoader
from torch.nn import Linear, Sequential, ReLU, BatchNorm1d
import numpy as np
import pandas as pd
from tqdm import tqdm
import time

## 1. Define Models

In [5]:
# --- 1. Define the GCN Model (with Batch Normalization and F.relu()) ---
class GCN(torch.nn.Module):
    # ... (GCN class implementation - same as in previous response) ...
    def __init__(self, num_node_features, hidden_channels, output_dim=1):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.bn1 = BatchNorm1d(hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.bn2 = BatchNorm1d(hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.bn3 = BatchNorm1d(hidden_channels)
        self.lin = Linear(hidden_channels, output_dim)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index)
        x = self.bn3(x)
        x = F.relu(x)
        x = global_mean_pool(x, batch)
        x = self.lin(x)
        return x

In [6]:
# --- 2. Define the GIN Model (with Batch Normalization and F.relu()) ---
class GIN(torch.nn.Module):
    # ... (GIN class implementation - same as in previous response) ...
    def __init__(self, num_node_features, hidden_channels, output_dim=1):
        super(GIN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GINConv(
            Sequential(Linear(num_node_features, hidden_channels), BatchNorm1d(hidden_channels), F.relu(),
                       Linear(hidden_channels, hidden_channels), F.relu())
        )
        self.bn1 = BatchNorm1d(hidden_channels)
        self.conv2 = GINConv(
            Sequential(Linear(hidden_channels, hidden_channels), BatchNorm1d(hidden_channels), F.relu(),
                       Linear(hidden_channels, hidden_channels), F.relu())
        )
        self.bn2 = BatchNorm1d(hidden_channels)
        self.conv3 = GINConv(
            Sequential(Linear(hidden_channels, hidden_channels), BatchNorm1d(hidden_channels), F.relu(),
                       Linear(hidden_channels, hidden_channels), F.relu())
        )
        self.bn3 = BatchNorm1d(hidden_channels)
        self.lin = Linear(hidden_channels, output_dim)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index)
        x = self.bn3(x)
        x = F.relu(x)
        x = global_mean_pool(x, batch)
        x = self.lin(x)
        return x

In [7]:
# --- 3. Define the GAT Model (with Batch Normalization and F.relu()) ---
class GAT(torch.nn.Module):
    # ... (GAT class implementation - same as in previous response) ...
    def __init__(self, num_node_features, hidden_channels, heads=8, output_dim=1):
        super(GAT, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GATConv(num_node_features, hidden_channels, heads=heads)
        self.bn1 = BatchNorm1d(hidden_channels * heads)
        self.conv2 = GATConv(hidden_channels * heads, hidden_channels, heads=heads)
        self.bn2 = BatchNorm1d(hidden_channels * heads)
        self.conv3 = GATConv(hidden_channels * heads, hidden_channels, heads=1)
        self.bn3 = BatchNorm1d(hidden_channels)
        self.lin = Linear(hidden_channels, output_dim)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index)
        x = self.bn3(x)
        x = F.relu(x)
        x = global_mean_pool(x, batch)
        x = self.lin(x)
        return x


In [28]:
# --- 4. Define the EdgeConv Model (Corrected Linear Input Dimension) ---
class EdgeConvNet(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, output_dim=1):
        super(EdgeConvNet, self).__init__()
        torch.manual_seed(12345)
        # Corrected Linear Input Dimension to 2 * num_node_features
        self.conv1 = EdgeConv(Sequential(Linear(2 * num_node_features, hidden_channels), BatchNorm1d(hidden_channels), ReLU())) # Note: Input to Linear is now 2 * num_node_features
        self.bn1 = BatchNorm1d(hidden_channels)
        self.conv2 = EdgeConv(Sequential(Linear(2 * hidden_channels, hidden_channels), BatchNorm1d(hidden_channels), ReLU())) # Input is still 2 * hidden_channels in subsequent EdgeConvs
        self.bn2 = BatchNorm1d(hidden_channels)
        self.conv3 = EdgeConv(Sequential(Linear(2 * hidden_channels, hidden_channels), BatchNorm1d(hidden_channels), ReLU()))
        self.bn3 = BatchNorm1d(hidden_channels)
        self.lin = Linear(hidden_channels, output_dim)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        # 3 EdgeConv layers with Batch Normalization
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index)
        x = self.bn3(x)
        x = F.relu(x)
        x = global_mean_pool(x, batch)
        x = self.lin(x)
        return x

In [35]:
# --- 4. Define the Graph Transformer Model (using TransformerConv) ---
class GraphTransformerNet(torch.nn.Module):
    def __init__(self, num_node_features, num_edge_features, hidden_channels, heads=4, output_dim=1): # Added attention heads
        super(GraphTransformerNet, self).__init__()
        torch.manual_seed(12345)
        # TransformerConv layers - now aware of edge features
        self.conv1 = TransformerConv(in_channels=num_node_features, out_channels=hidden_channels, heads=heads, edge_dim=num_edge_features) # Pass edge_dim
        self.bn1 = BatchNorm1d(hidden_channels * heads)
        self.conv2 = TransformerConv(in_channels=hidden_channels * heads, out_channels=hidden_channels, heads=heads, edge_dim=num_edge_features) # Pass edge_dim
        self.bn2 = BatchNorm1d(hidden_channels * heads)
        self.conv3 = TransformerConv(in_channels=hidden_channels * heads, out_channels=hidden_channels, heads=1, edge_dim=num_edge_features) # Pass edge_dim, output heads=1
        self.bn3 = BatchNorm1d(hidden_channels)
        self.lin = Linear(hidden_channels, output_dim)

    def forward(self, data):
        x, edge_index, batch, edge_attr = data.x, data.edge_index, data.batch, data.edge_attr # Edge features are used

        # 3 TransformerConv layers with Batch Normalization, now passing edge_attr
        x = self.conv1(x, edge_index, edge_attr) # Pass edge_attr to conv layers
        x = self.bn1(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index, edge_attr) # Pass edge_attr to conv layers
        x = self.bn2(x)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index, edge_attr) # Pass edge_attr to conv layers
        x = self.bn3(x)
        x = F.relu(x)

        # Graph-level readout (mean pooling)
        x = global_mean_pool(x, batch)

        # Final linear layer for regression
        x = self.lin(x)
        return x

## 2. Data Loading and Preparation 

In [9]:
import sys
if sys.platform.startswith(("linux", "darwin")):
    !mamba install -q -y -c pyg pyg

In [10]:
from torch_geometric.datasets import QM9
from pathlib import Path
# specify the local data path
HERE = Path("./data")
DATA = HERE
# load dataset
qm9 = QM9(root=DATA)

In [11]:
qm9[0]

Data(x=[5, 11], edge_index=[2, 8], edge_attr=[8, 4], y=[1, 19], pos=[5, 3], z=[5], smiles='[H]C([H])([H])[H]', name='gdb_1', idx=[1])

In [21]:
# --- 5. Data Loading and Preparation (Assuming QM9 dataset is loaded as 'qm9' as in your notebook) ---
# (This section is the same as before, no changes needed)
y_target = pd.DataFrame(qm9.data.y.numpy())
qm9.data.y = torch.Tensor(y_target[0]) # Use only the first column (Dipole Moment)

qm9 = qm9.shuffle() # Shuffle the dataset

data_size = len(qm9)
train_index = int(data_size * 0.8)
test_index = train_index + int(data_size * 0.1)
val_index = test_index + int(data_size * 0.1)

# --- Data Normalization ---
train_mean = qm9.data.y[0:train_index].mean() # Calculate mean on training set ONLY
train_std = qm9.data.y[0:train_index].std()   # Calculate std on training set ONLY

qm9.data.y = (qm9.data.y - train_mean) / train_std # Normalize entire target variable

train_loader = DataLoader(qm9[0:train_index], batch_size=64, shuffle=True)
test_loader = DataLoader(qm9[train_index:test_index], batch_size=64, shuffle=False) # No need to shuffle test/val
val_loader = DataLoader(qm9[test_index:val_index], batch_size=64, shuffle=False)



## 3. Define Training Function

In [13]:
# --- 6. Training and Evaluation Functions (Same as before - no changes needed) ---
def train(model, optimizer, loader, loss_fn):
    model.train()
    total_loss = 0
    for data in loader:
        optimizer.zero_grad()
        out = model(data)
        loss = loss_fn(out, data.y.unsqueeze(1)) # Unsqueeze to match output shape
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

@torch.no_grad()
def evaluate(model, loader, loss_fn):
    model.eval()
    total_loss = 0
    for data in loader:
        out = model(data)
        loss = loss_fn(out, data.y.unsqueeze(1)) # Unsqueeze to match output shape
        total_loss += loss.item()
    return total_loss / len(loader)

## 4. Training

In [38]:
# --- 7. Run Training and Evaluation for each model (Split into Cells) ---

model_types = {"GCN": GCN, "GIN": GIN, "GAT": GAT, "GraphTransformer": GraphTransformerNet} # Removed EdgeConv from model_types
results = {}

In [25]:
# --- 7.1. Training GCN Model ---
model_name = "GCN"
model_class = GCN

print(f"\n--- Training {model_name} Model ---")

hidden_channels = 64
model = model_class(num_node_features=qm9.num_node_features, hidden_channels=hidden_channels)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_function = torch.nn.MSELoss()
epochs = 100
train_losses_gcn = []
val_losses_gcn = []

start_time = time.time() # Start time for training

for epoch in tqdm(range(1, epochs + 1), desc=f"Training {model_name}"):
    epoch_start_time = time.time() # Epoch start time
    train_loss = train(model, optimizer, train_loader, loss_function)
    val_loss = evaluate(model, val_loader, loss_function)
    train_losses_gcn.append(train_loss)
    val_losses_gcn.append(val_loss)
    epoch_time = time.time() - epoch_start_time # Epoch training time

    if epoch % 10 == 0:
        print(f'Epoch: {epoch:03d}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Epoch Time: {epoch_time:.2f}s') # Print epoch time

test_loss_gcn = evaluate(model, test_loader, loss_function)
results["GCN"] = {"test_loss": test_loss_gcn, "train_losses": train_losses_gcn, "val_losses": val_losses_gcn}
print(f'\n{model_name} Test Loss: {test_loss_gcn:.4f}')

torch.save(model.state_dict(), f"{model_name}_model.pt") # Save model
print(f"Saved {model_name} model to: {model_name}_model.pt") # Confirmation message

Training 1:   0%|          | 0/5 [00:00<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (2308x22 and 128x64)

In [37]:
# --- 7.6. Training Graph Transformer Model ---
model_name = "GraphTransformer" # New model name
model_class = GraphTransformerNet # Use the GraphTransformerNet class definition

print(f"\n--- Training {model_name} Model ---")

hidden_channels = 64 # Common hidden channel size for all models
# Initialize GraphTransformerNet, passing num_edge_features
model = model_class(num_node_features=qm9.num_node_features, num_edge_features=qm9.num_edge_features, hidden_channels=hidden_channels, heads=4) # GraphTransformer Model, heads=4

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_function = torch.nn.MSELoss()
epochs = 1
train_losses_graph_transformer = [] # Specific loss lists for GraphTransformer
val_losses_graph_transformer = []

for epoch in tqdm(range(1, epochs + 1), desc=f"Training {model_name}"):
    train_loss = train(model, optimizer, train_loader, loss_function)
    val_loss = evaluate(model, val_loader, loss_function)
    train_losses_graph_transformer.append(train_loss)
    val_losses_graph_transformer.append(val_loss)
    if epoch % 10 == 0: # Print less frequently
        print(f'Epoch: {epoch:03d}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

test_loss_graph_transformer = evaluate(model, test_loader, loss_function)
#results["GraphTransformer"] = {"test_loss": test_loss_graph_transformer, "train_losses": train_losses_graph_transformer, "val_losses": val_losses_graph_transformer} # Store results for GraphTransformer
print(f'\n{model_name} Test Loss: {test_loss_graph_transformer:.4f}')



--- Training GraphTransformer Model ---


Training GraphTransformer: 100%|██████████| 1/1 [01:39<00:00, 99.73s/it]



GraphTransformer Test Loss: 0.4860


In [None]:


# --- 7. Run Training and Evaluation for each model (including EdgeConvNet) ---

model_types = {"GCN": GCN, "GIN": GIN, "GAT": GAT, "EdgeConv": EdgeConvNet} # Added EdgeConvNet to model_types
results = {} # Store results for comparison

for model_name, model_class in model_types.items():
    print(f"\n--- Training {model_name} Model ---")

    hidden_channels = 64 # Common hidden channel size for all models
    if model_name == "GAT":
        model = model_class(num_node_features=qm9.num_node_features, hidden_channels=hidden_channels, heads=8) # GAT with 8 attention heads
    elif model_name == "EdgeConv":
        model = model_class(num_node_features=qm9.num_node_features, hidden_channels=hidden_channels) # EdgeConv Model
    else:
        model = model_class(num_node_features=qm9.num_node_features, hidden_channels=hidden_channels)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_function = torch.nn.MSELoss()
    epochs = 100
    train_losses = []
    val_losses = []

    for epoch in tqdm(range(1, epochs + 1), desc=f"Training {model_name}"):
        train_loss = train(model, optimizer, train_loader, loss_function)
        val_loss = evaluate(model, val_loader, loss_function)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        if epoch % 10 == 0: # Print less frequently
            print(f'Epoch: {epoch:03d}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

    test_loss = evaluate(model, test_loader, loss_function)
    results[model_name] = {"test_loss": test_loss, "train_losses": train_losses, "val_losses": val_losses}
    print(f'\n{model_name} Test Loss: {test_loss:.4f}')


# --- 8. Compare and Plot Results ---
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 8))
for model_name, result in results.items():
    plt.plot(range(1, epochs + 1), result["val_losses"], label=f'{model_name} Validation Loss')

plt.xlabel('Epoch')
plt.ylabel('Validation Loss (MSE)')
plt.title('Validation Loss Comparison for GCN, GIN, GAT, and EdgeConv (with Batch Norm and F.relu)') # Updated title
plt.legend()
plt.grid(True)
plt.show()


for model_name, result in results.items():
    print(f"{model_name} Test Loss: {result['test_loss']:.4f}")