# **Graph Learning Project - ZINC exp 1**

By Shahar Cohen 205669260 & Alexander petrunin 205782568

# Installation

In [1]:
!pip install -q torch-geometric

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m61.4/63.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━[0m [32m0.7/1.1 MB[0m [31m19.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[?25h

# SETUP

In [35]:
import torch
import torch.nn as nn
from torch_geometric.nn import GPSConv, GatedGraphConv, TransformerConv, GINEConv
from torch_geometric.data import Data
import torch.nn.functional as F
from torch_geometric.nn import global_mean_pool, global_add_pool

from torch_geometric.transforms import AddLaplacianEigenvectorPE
import torch_geometric

import torch.optim as optim

from torch_geometric.datasets import ZINC
from torch_geometric.loader import DataLoader

import torch.optim as optim
from torch_geometric.data import DataLoader
from sklearn.metrics import mean_squared_error

from torch_geometric.transforms import AddRandomWalkPE

import torch.nn.functional as F

# MODEL:

In [4]:
class MLPBlock(nn.Module):
    def __init__(self, in_channels, hidden_channels):
        super(MLPBlock, self).__init__()
        self.fc1 = nn.Linear(in_channels, hidden_channels)
        self.fc2 = nn.Linear(hidden_channels, hidden_channels)

    def forward(self, x):
        x = x.float()  # Ensure the input is float before passing it to the linear layer
        x = F.relu(self.fc1(x))  # Apply ReLU activation after the first linear layer
        x = self.fc2(x)  # The second layer keeps the number of features as hidden_channels
        return x


In [32]:
class GraphGPSModel(nn.Module):

    def __init__(self, input_dim, hidden_dim, output_dim, pe_in_dim, pe_out_dim, num_layers):
        super(GraphGPSModel, self).__init__()

        # MLP layers
        self.mlp1 = MLPBlock(input_dim + pe_out_dim, hidden_dim)

        # Create MLP layers for GINEConv GPSConv layers
        self.mlps = nn.ModuleList([
            nn.Sequential(
                nn.Linear(hidden_dim, hidden_dim),
                nn.ReLU(),
                nn.Linear(hidden_dim, hidden_dim)
            )
            for _ in range(num_layers)
        ])

        # Create GPSConv layers
        self.gps_layers = nn.ModuleList([
            GPSConv(
                hidden_dim,
                conv=GINEConv(self.mlps[i], eps=0.0, train_eps=False, edge_dim=3),
                heads=4,
                attn_kwargs={'dropout': 0.5}
            )
            for i in range(num_layers)
        ])

        # Final fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)

        # PE layers
        self.bn_pe = nn.BatchNorm1d(pe_in_dim)
        self.fc_pe = nn.Linear(pe_in_dim, pe_out_dim)



    def forward(self, data):

        x, edge_index, batch, pe, edge_attr = (
            data.x,
            data.edge_index,
            data.batch,
            data.random_walk_pe,
            data.edge_attr
        )

        # Transform edge attributes
        dummy_tensor = torch.zeros(edge_attr.size(0), 3, device=edge_attr.device)  # Change size to 3 for the new dummy
        dummy_tensor[edge_attr == 1, 0] = 1  # Keep the condition for edge_attr == 1
        dummy_tensor[edge_attr == 2, 1] = 1  # Keep the condition for edge_attr == 2
        dummy_tensor[edge_attr == 3, 2] = 1  # New condition for edge_attr == 3
        edge_attr = dummy_tensor


        # Process positional encodings (PE)
        pe = self.bn_pe(pe)
        pe = self.fc_pe(pe)

        # Concatenate PE to node features
        x = torch.cat([x, pe], dim=1)

        # Initial MLP processing
        x = self.mlp1(x)

        # Sequentially apply GPSConv layers
        for gps_layer in self.gps_layers:
            x = gps_layer(x, edge_index, batch=batch, edge_attr=edge_attr)

        # Global pooling to aggregate node features into graph features
        x = global_add_pool(x, batch)

        # Final classification layer
        x = self.fc(x)
        return x


# Load ZINC and add PE:

In [8]:
# Load the ZINC dataset
transform = AddRandomWalkPE(walk_length=20)

# Load the ZINC dataset with predefined splits
train_dataset = ZINC(root='./data', subset=True, split='train', transform=transform)
val_dataset = ZINC(root='./data', subset=True, split='val', transform=transform)
test_dataset = ZINC(root='./data', subset=True, split='test', transform=transform)

# Create DataLoaders for batching
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

Downloading https://www.dropbox.com/s/feo9qle74kg48gy/molecules.zip?dl=1
Extracting data/molecules.zip
Downloading https://raw.githubusercontent.com/graphdeeplearning/benchmarking-gnns/master/data/molecules/train.index
Downloading https://raw.githubusercontent.com/graphdeeplearning/benchmarking-gnns/master/data/molecules/val.index
Downloading https://raw.githubusercontent.com/graphdeeplearning/benchmarking-gnns/master/data/molecules/test.index
Processing...
Processing train dataset: 100%|██████████| 10000/10000 [00:00<00:00, 12798.26it/s]
Processing val dataset: 100%|██████████| 1000/1000 [00:00<00:00, 3800.26it/s]
Processing test dataset: 100%|██████████| 1000/1000 [00:00<00:00, 8769.89it/s]
Done!


Some tests that can be deleted before submission:

In [17]:

# Example of a data sample from the training set
data = train_dataset[10]
print(f'Training Node feature shape: {data.x.shape}, Edge index shape: {data.edge_index.shape}')

# Example of a data sample from the testing set
test_data = test_dataset[0]
print(f'Testing Node feature shape: {test_data.x.shape}, Edge index shape: {test_data.edge_index.shape}')

Training Node feature shape: torch.Size([23, 1]), Edge index shape: torch.Size([2, 48])
Testing Node feature shape: torch.Size([16, 1]), Edge index shape: torch.Size([2, 34])


In [37]:
data

Data(x=[18, 1], edge_index=[2, 38], edge_attr=[38], y=[1], random_walk_pe=[18, 20])

In [20]:
from collections import Counter

# Initialize a Counter to aggregate edge attributes across the entire dataset
total_edge_attr_counts = Counter()

# Iterate over the training dataset
for data in train_dataset:
    # Convert edge_attr tensor to a list and update the Counter
    total_edge_attr_counts.update(data.edge_attr.tolist())

# Print the aggregated counts
print(total_edge_attr_counts)

Counter({1: 370120, 2: 127096, 3: 1342})


# Training

In [33]:
# Training loop
def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        optimizer.zero_grad()

        # Move data to the same device as the model
        data = data.to(device)

        # Forward pass
        output = model(data)

        # Get the target values (penalized logP)
        y = data.y.view(-1, 1).to(device)  # Ensure target is on the same device as the model

        # Compute the loss
        loss = criterion(output, y)
        loss.backward()

        # Optimization step
        optimizer.step()

        total_loss += loss.item()


    return total_loss / len(train_loader)


# Define a function to evaluate the model on a given dataset
def evaluate(loader):
    model.eval()
    total_loss = 0
    with torch.no_grad():  # Disable gradient computation for evaluation
        for data in loader:
            data = data.to(device)
            output = model(data)
            y = data.y.view(-1, 1).to(device)
            loss = criterion(output, y)
            total_loss += loss.item()
    return total_loss / len(loader)


In [38]:
num_layers = 10
input_dim = train_dataset.num_features
hidden_dim = 64
output_dim = 1
pe_in_dim = 20
pe_out_dim = 28

weight_decay = 1e-5
lr = 0.001
epochs_num = 250

In [39]:
# Define the model

model = GraphGPSModel(input_dim=input_dim, hidden_dim=hidden_dim,  output_dim=output_dim, pe_in_dim=pe_in_dim, pe_out_dim=pe_out_dim, num_layers=num_layers)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Loss function
criterion = nn.MSELoss()

# Initialize variables to track the best model
best_val_loss = float('inf')
best_model = None

# Training the model for epochs_num:
for epoch in range(epochs_num):
    # Train the model for one epoch
    train_loss = train()

    # Evaluate the model on the validation set
    val_loss = evaluate(val_loader)

    print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')

    # Check if this is the best validation loss we've seen
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        # Save a copy of the best model
        best_model = model.state_dict()  # No need for deepcopy
        print(f'New best model saved at epoch {epoch+1} with Validation Loss: {val_loss:.4f}')

# After training, you can save the best model to disk
torch.save(best_model, 'best_model_ZINC.pth')
print("Best model saved to 'best_model_ZINC.pth'.")

Epoch 1, Train Loss: 3.6688, Validation Loss: 1.7366
New best model saved at epoch 1 with Validation Loss: 1.7366
Epoch 2, Train Loss: 1.4720, Validation Loss: 1.5853
New best model saved at epoch 2 with Validation Loss: 1.5853
Epoch 3, Train Loss: 1.2599, Validation Loss: 1.9845
Epoch 4, Train Loss: 1.1281, Validation Loss: 1.1673
New best model saved at epoch 4 with Validation Loss: 1.1673
Epoch 5, Train Loss: 0.9987, Validation Loss: 1.1917
Epoch 6, Train Loss: 0.9766, Validation Loss: 0.9305
New best model saved at epoch 6 with Validation Loss: 0.9305
Epoch 7, Train Loss: 0.9103, Validation Loss: 1.0626
Epoch 8, Train Loss: 0.9221, Validation Loss: 1.1310
Epoch 9, Train Loss: 0.8731, Validation Loss: 1.1618
Epoch 10, Train Loss: 0.9117, Validation Loss: 0.8711
New best model saved at epoch 10 with Validation Loss: 0.8711
Epoch 11, Train Loss: 0.7969, Validation Loss: 0.7376
New best model saved at epoch 11 with Validation Loss: 0.7376
Epoch 12, Train Loss: 0.8817, Validation Loss: 

#Test Score:

In [40]:
def test_score():
    model.eval()  # Set the model to evaluation mode
    total_mae = 0.0
    num_batches = 0

    with torch.no_grad():  # Disable gradient computation during evaluation
        for batch in test_loader:  # Assuming you have a DataLoader for your test set
            # Move batch data to the same device as the model
            data = batch.to(device)

            # Forward pass (prediction)
            output = model(data)

            # Ensure target is the correct shape
            target = data.y.view(-1, 1).to(device)  # Match output shape: [batch_size, 1]

            # Compute Mean Absolute Error (MAE)
            mae_loss = F.l1_loss(output, target)

            total_mae += mae_loss.item()
            num_batches += 1

    # Return average MAE over all batches in the test set
    avg_mae = total_mae / num_batches
    return avg_mae


# Load the best model's state dictionary
model.load_state_dict(best_model)
model.to(device)  # Ensure the model is on the correct device (GPU or CPU)

# Now you can evaluate the model on the test set
test_mae = test_score()
print(f"Test MAE: {test_mae:.4f}")



Test MAE: 0.2661
