<a href="https://colab.research.google.com/github/Ankita5051/Deep-learning-projects/blob/main/Predicting_Molecular_Properties.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install torch_geometric



In [4]:
!pip install gensim



In [5]:
import torch
import torch_geometric
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import random_split
from torch_geometric.datasets import QM9
from torch_geometric.data import DataLoader
from torch_geometric.nn import MessagePassing,global_mean_pool
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.utils import add_self_loops, degree
from torch_geometric.nn import global_mean_pool
from sklearn.metrics import mean_squared_error
from torch_geometric.utils import softmax
from math import sqrt
import torch.optim as optim
from torch_geometric.loader import DataLoader
import networkx as nx

QM9 Dataset:

The QM9 dataset is a widely used benchmark dataset in the field of graph neural networks (GNNs) and molecular property prediction. It contains about 134,000 small organic molecules with up to 9 heavy atoms (C, O, N, F). Each molecule is represented as a graph, where atoms are nodes and bonds are edges.

Key features of QM9:
- Number of graphs: ~134,000
- Node features: Atom properties (e.g., atomic number, charge)
- Edge features: Bond properties (e.g., bond type)
- Graph labels: Various molecular properties (e.g., energy, dipole moment)
	Available target properties:
0: mu, 1: alpha, 2: homo, 3: lumo, 4: gap, 5: r2, 6: zpve, 7: U0, 8: U, 9: H, 10: G, 11:Cv, 12: omega1

Link: https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.datasets.QM9.html

The dataset is used for regression tasks, predicting molecular properties from graph structures.

**Use Case:**
We are going to use 1000 graphs for training, 100 graphs for validation and 100 graphs for test.
We will use ‘0: mu (dipole moment)’, first property, as a label for the regression task.
You will get the Data-Loaded in the code notebook.
This is the Regression task so you have to take one label for every graph.


In [6]:
# Load the QM9 dataset
dataset = QM9(root='data/QM9')
print(dataset.num_node_features)
# Access a single graph in the dataset
data = dataset[0]

# Print general information about the dataset
print(f"Number of graphs in the dataset: {len(dataset)}")
print(f"Number of features per node: {data.x.shape[1]}")
print(f"Number of edge features: {data.edge_attr.shape[1]}")
print(f"Number of nodes in the first graph: {data.num_nodes}")
print(f"Number of edges in the first graph: {data.num_edges}")
print(f"Number of edge features in the first graph: {data.edge_attr.size()}")
print("graph")
print(data)
# Investigate the node features
print("\nNode features:")
print(data.x)

# Investigate the edge features
print("\nEdge features:")
print(data.edge_attr)

# Investigate the adjacency list (edges)
print("\nEdges (connectivity):")
print(data.edge_index)

# Investigate the target properties (e.g., energy, dipole moment)
print("\nTarget properties:")
print(data.y)

11
Number of graphs in the dataset: 130831
Number of features per node: 11
Number of edge features: 4
Number of nodes in the first graph: 5
Number of edges in the first graph: 8
Number of edge features in the first graph: torch.Size([8, 4])
graph
Data(x=[5, 11], edge_index=[2, 8], edge_attr=[8, 4], y=[1, 19], pos=[5, 3], idx=[1], name='gdb_1', z=[5])

Node features:
tensor([[0., 1., 0., 0., 0., 6., 0., 0., 0., 0., 4.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]])

Edge features:
tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.]])

Edges (connectivity):
tensor([[0, 0, 0, 0, 1, 2, 3, 4],
        [1, 2, 3, 4, 0, 0, 0, 0]])

Target properties:
tensor([[    0.0000,   

# Preparing dataset for for the further process:

In [7]:
for data in dataset:
    data.y = data.y[:, 0:1]  # Only dipole moment (mu)

# Normalize the target
y_all = torch.cat([data.y for data in dataset])
mean_y, std_y = y_all.mean(), y_all.std()
for data in dataset:
    data.y = (data.y - mean_y) / std_y

# Train/val/test split
train_dataset = dataset[:1000]
val_dataset = dataset[1000:1100]
test_dataset = dataset[1100:1200]
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


# Use the library implementation of following shallow embedding methods to generate the
 node embeddings and then compute the graph features by averaging all the node features.
 * DeepWalk (embedding_dimensions= 64, walk_length=10, num_walks=50)
 * Node2Vec(embedding_dimensions= 64, walk_length=10, num_walks=50, p=1,
 q=0.5)
 Now, implement a custom Deep Neural Network for the regression task. [Every graph
 has one embedding and corresponding label to be predicted]
# Report the following:
 * RootMeanSquare Error (RMSE) Metric for each of the methods in the test set.

In [8]:
import numpy as np
from gensim.models import Word2Vec

class CustomDeepWalk:
    def __init__(self, graph, dimensions=64, walk_length=10, num_walks=50, seed=42):
        self.graph = graph
        self.dimensions = dimensions
        self.walk_length = walk_length
        self.num_walks = num_walks
        self.walks = []
        self.seed = seed
        np.random.seed(self.seed)

    def simulate_random_walks(self):
        nodes = list(self.graph.nodes())
        for _ in range(self.num_walks):
            np.random.shuffle(nodes)  # Shuffle node order for diverse walks
            for node in nodes:
                self.walks.append(self._single_walk(node))

    def _single_walk(self, start_node):
        walk = [start_node]
        while len(walk) < self.walk_length:
            current_node = walk[-1]
            neighbors = list(self.graph.neighbors(current_node))
            if len(neighbors) == 0:
                break  # Prevent walk getting stuck
            walk.append(np.random.choice(neighbors))
        return walk

    def train_model(self):
        self.simulate_random_walks()
        # Use sg=1 for Skip-Gram, workers=4 for speed, and epochs>1 for stability
        self.model = Word2Vec(
            sentences=self.walks,
            vector_size=self.dimensions,
            window=5,
            min_count=1,
            workers=4,
            sg=1,
            epochs=10,
            seed=self.seed
        )

    def extract_embeddings(self):
        # Normalize embeddings for stability
        embeddings = {}
        for node in self.graph.nodes():
            vec = self.model.wv[node]
            norm_vec = vec / (np.linalg.norm(vec) + 1e-10)  # Avoid divide by zero
            embeddings[node] = norm_vec
        return embeddings


In [9]:
import numpy as np
from gensim.models import Word2Vec

class CustomNode2Vec:
    def __init__(self, graph, dimensions=64, walk_length=10, num_walks=50, p=1.0, q=1.0, seed=42):
        self.graph = graph
        self.dimensions = dimensions
        self.walk_length = walk_length
        self.num_walks = num_walks
        self.p = p
        self.q = q
        self.walks = []
        self.seed = seed
        np.random.seed(self.seed)

    def simulate_random_walks(self):
        nodes = list(self.graph.nodes())
        for _ in range(self.num_walks):
            np.random.shuffle(nodes)  # Shuffle nodes for walk diversity
            for node in nodes:
                self.walks.append(self.node2vec_walk(walk_length=self.walk_length, start_node=node))

    def node2vec_walk(self, walk_length, start_node):
        walk = [start_node]
        while len(walk) < walk_length:
            current = walk[-1]
            neighbors = list(self.graph.neighbors(current))
            if len(neighbors) == 0:
                break
            if len(walk) == 1:
                walk.append(np.random.choice(neighbors))
            else:
                prev = walk[-2]
                probs = []
                for neighbor in neighbors:
                    if neighbor == prev:
                        prob = 1.0 / self.p
                    elif self.graph.has_edge(neighbor, prev):
                        prob = 1.0
                    else:
                        prob = 1.0 / self.q
                    probs.append(prob)
                probs = np.array(probs)
                probs /= probs.sum()
                walk.append(np.random.choice(neighbors, p=probs))
        return walk

    def train_model(self):
        self.simulate_random_walks()
        self.model = Word2Vec(
            sentences=self.walks,
            vector_size=self.dimensions,
            window=5,
            min_count=1,
            workers=4,
            sg=1,
            epochs=10,
            seed=self.seed
        )

    def extract_embeddings(self):
        embeddings = {}
        for node in self.graph.nodes():
            vec = self.model.wv[node]
            norm_vec = vec / (np.linalg.norm(vec) + 1e-10)
            embeddings[node] = norm_vec
        return embeddings


In [10]:
def get_deepwalk_embeddings(data, dimensions=64, walk_length=10, num_walks=50, seed=42):
    """
    Generate node embeddings for a single graph using improved DeepWalk.
    """
    # Create the networkx graph
    G = nx.Graph()
    edge_index = data.edge_index.cpu().numpy()
    for i in range(edge_index.shape[1]):
        G.add_edge(edge_index[0, i], edge_index[1, i])

    # If the graph is disconnected, keep only the largest connected component for embedding
    if not nx.is_connected(G):
        largest_cc = max(nx.connected_components(G), key=len)
        G = G.subgraph(largest_cc).copy()

    deepwalk = CustomDeepWalk(
        G,
        dimensions=dimensions,
        walk_length=walk_length,
        num_walks=num_walks,
        seed=seed
    )
    deepwalk.train_model()
    return deepwalk.extract_embeddings()

def get_node2vec_embeddings(data, dimensions=64, walk_length=10, num_walks=50, p=1, q=1, seed=42):
    """
    Generate node embeddings for a single graph using improved Node2Vec.
    """
    G = nx.Graph()
    edge_index = data.edge_index.cpu().numpy()
    for i in range(edge_index.shape[1]):
        G.add_edge(edge_index[0, i], edge_index[1, i])

    # Handle disconnected components
    if not nx.is_connected(G):
        largest_cc = max(nx.connected_components(G), key=len)
        G = G.subgraph(largest_cc).copy()

    node2vec = CustomNode2Vec(
        G,
        dimensions=dimensions,
        walk_length=walk_length,
        num_walks=num_walks,
        p=p,
        q=q,
        seed=seed
    )
    node2vec.train_model()
    return node2vec.extract_embeddings()


In [11]:
import torch
import numpy as np

def calculate_graph_features(node_embeddings, dataset, embedding_dim=64):
    """
    Computes graph-level features by averaging node embeddings for each graph in the dataset.

    Args:
        node_embeddings (dict): Mapping from node identifiers to their embeddings (numpy arrays or tensors).
        dataset (torch_geometric.data.Dataset): Dataset containing graphs.
        embedding_dim (int): Dimension of the node embeddings.

    Returns:
        torch.Tensor: Tensor of shape (num_graphs, embedding_dim) containing graph-level features.
    """
    graph_features = []
    for i in range(len(dataset)):
        edge_index = dataset[i].edge_index.cpu().numpy()
        unique_nodes = np.unique(edge_index)

        node_features = []
        for node in unique_nodes:
            if node in node_embeddings:
                emb = node_embeddings[node]
                if isinstance(emb, torch.Tensor):
                    node_features.append(emb)
                else:
                    # Convert numpy array to torch tensor
                    node_features.append(torch.tensor(emb, dtype=torch.float32))
        if node_features:
            graph_feature = torch.stack(node_features).mean(dim=0)
        else:
            graph_feature = torch.zeros((embedding_dim,), dtype=torch.float32)
        graph_features.append(graph_feature)

    return torch.stack(graph_features)
# For each graph in the dataset, build local node embeddings and mean-pool them for a graph feature
def get_graph_feature_with_embedding(dataset, embedder_fn, **kwargs):
    features = []
    for i in range(len(dataset)):
        node_emb = embedder_fn(dataset[i], **kwargs)
        edge_index = dataset[i].edge_index.cpu().numpy()
        unique_nodes = np.unique(edge_index)
        node_vectors = []
        for node in unique_nodes:
            if node in node_emb:
                node_vectors.append(torch.tensor(node_emb[node], dtype=torch.float32))
        if node_vectors:
            graph_feature = torch.stack(node_vectors).mean(0)
        else:
            graph_feature = torch.zeros(kwargs.get('dimensions', 64), dtype=torch.float32)
        features.append(graph_feature)
    return torch.stack(features)


In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from tqdm import tqdm

class SimpleRegressionNN(nn.Module):
    def __init__(self, input_dim, output_dim=19):
        super(SimpleRegressionNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)  # Predict 19 outputs by default

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

def train_neural_model(model, train_loader, num_epochs=100, learning_rate=0.001, patience=50, device=None):
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    loss_fn = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    best_loss = float('inf')
    best_model_wts = None
    epochs_no_improve = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
        for features, targets in loop:
            features, targets = features.to(device), targets.to(device)
            optimizer.zero_grad()
            predictions = model(features)
            loss = loss_fn(predictions, targets)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * features.size(0)
            loop.set_postfix(loss=loss.item())

        epoch_loss = running_loss / len(train_loader.dataset)
        if(epoch +1 )%10==0:
          print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.6f}")

        # Early stopping
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            best_model_wts = model.state_dict()
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping triggered after {epoch+1} epochs.")
                break

    if best_model_wts is not None:
        model.load_state_dict(best_model_wts)
    return model

def compute_rmse(model, test_loader, device=None):
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()
    loss_fn = nn.MSELoss(reduction='sum')
    total_loss = 0.0
    total_samples = 0
    with torch.no_grad():
        for features, targets in test_loader:
            features, targets = features.to(device), targets.to(device)
            predictions = model(features)
            loss = loss_fn(predictions, targets)
            total_loss += loss.item()
            total_samples += features.size(0) * targets.size(1)
    rmse = np.sqrt(total_loss / total_samples)
    return rmse


In [41]:
from torch.utils.data import DataLoader, TensorDataset

def create_dataloader(features, targets, batch_size=32, shuffle=True, num_workers=4, pin_memory=True):
    """
    Wraps features and targets in TensorDataset and returns a DataLoader with recommended settings.
    """
    # Convert to tensors if not already
    if not torch.is_tensor(features):
        features = torch.tensor(features, dtype=torch.float32)
    if not torch.is_tensor(targets):
       # targets = torch.tensor(targets, dtype=torch.float32)
        targets = torch.stack([data.y[0].unsqueeze(0) for data in dataset[:1200]]).float()
    dataset = TensorDataset(features, targets)
    return DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle,
        num_workers=num_workers, pin_memory=pin_memory
    )


In [14]:
all_node_embeddings_n2v = {}
all_node_embeddings_dw = {}
for i, graph in enumerate(dataset[:1200]):
    emb = get_node2vec_embeddings(graph, dimensions=64, walk_length=10, num_walks=50, p=1, q=0.5, seed=42)
    emb_dw = get_deepwalk_embeddings(graph, dimensions=64, walk_length=10, num_walks=50, seed=42)
    all_node_embeddings_n2v.update({(i, node): vec for node, vec in emb.items()})
    all_node_embeddings_dw.update({(i, node): vec for node, vec in emb_dw.items()})




Epoch 10/200, Loss: nan




Epoch 20/200, Loss: nan




Epoch 30/200, Loss: nan




Epoch 40/200, Loss: nan




Epoch 50/200, Loss: nan
Early stopping triggered after 50 epochs.




Epoch 10/200, Loss: nan




Epoch 20/200, Loss: nan




Epoch 30/200, Loss: nan




Epoch 40/200, Loss: nan


                                                                       

Epoch 50/200, Loss: nan
Early stopping triggered after 50 epochs.




DeepWalk Model RMSE: nan
Node2Vec Model RMSE: nan


In [25]:
# Generate Node2Vec embeddings and graph features
graph_features_n2v = calculate_graph_features(all_node_embeddings_n2v, dataset, embedding_dim=64)

# Generate deepwalk embeddings and graph features
graph_features_dw = calculate_graph_features(all_node_embeddings_dw, dataset, embedding_dim=64)
eps = 1e-8  # small number to prevent division by zero
graph_features_n2v = torch.stack([f / (f.norm() + eps) for f in graph_features_n2v])
graph_features_dw = torch.stack([f / (f.norm() + eps) for f in graph_features_dw])


# Extract and prepare normalized target values (dipole moment scaled)
targets = torch.stack([data.y.squeeze() for data in dataset[:1200]]).float()  # Assuming y normalized already
targets_mean = targets.mean()
targets_std = targets.std() + 1e-8  # prevent division by zero
targets = (targets - targets_mean) / targets_std
# Create dataloaders for training, validation and testing sets
train_loader_n2v = create_dataloader(graph_features_n2v[:1000], targets[:1000], batch_size=32)
val_loader_n2v = create_dataloader(graph_features_n2v[1000:1100], targets[1000:1100], batch_size=32, shuffle=False)
test_loader_n2v = create_dataloader(graph_features_n2v[1100:1200], targets[1100:1200], batch_size=32, shuffle=False)

train_loader_dw = create_dataloader(graph_features_dw[:1000], targets[:1000], batch_size=32)
val_loader_dw = create_dataloader(graph_features_dw[1000:1100], targets[1000:1100], batch_size=32, shuffle=False)
test_loader_dw = create_dataloader(graph_features_dw[1100:1200], targets[1100:1200], batch_size=32, shuffle=False)

# Initialize and train the regression models using improved training function and early stopping
regression_model_dw = SimpleRegressionNN(input_dim=64)
regression_model_dw = train_neural_model(regression_model_dw, train_loader_dw, num_epochs=200, learning_rate=0.002, patience=50)

regression_model_n2v = SimpleRegressionNN(input_dim=64)
regression_model_n2v = train_neural_model(regression_model_n2v, train_loader_n2v, num_epochs=200, learning_rate=0.002, patience=50)

# Evaluate the trained models on test sets
rmse_dw = compute_rmse(regression_model_dw, test_loader_dw)
rmse_n2v = compute_rmse(regression_model_n2v, test_loader_n2v)

print(f"DeepWalk Model RMSE: {rmse_dw:.4f}")
print(f"Node2Vec Model RMSE: {rmse_n2v:.4f}")



Epoch 10/200, Loss: 0.736347




Epoch 20/200, Loss: 0.736717




Epoch 30/200, Loss: 0.736248




Epoch 40/200, Loss: 0.736705




Epoch 50/200, Loss: 0.736699




Epoch 60/200, Loss: 0.736345




Epoch 70/200, Loss: 0.736447




Epoch 80/200, Loss: 0.736187




Epoch 90/200, Loss: 0.736515




Epoch 100/200, Loss: 0.736218




Epoch 110/200, Loss: 0.736717




Epoch 120/200, Loss: 0.736408




Epoch 130/200, Loss: 0.736401




Epoch 140/200, Loss: 0.737180




Epoch 150/200, Loss: 0.736237




Epoch 160/200, Loss: 0.736423




Early stopping triggered after 164 epochs.




Epoch 10/200, Loss: 0.740454




Epoch 20/200, Loss: 0.736365




Epoch 30/200, Loss: 0.736300




Epoch 40/200, Loss: 0.736662




Epoch 50/200, Loss: 0.736376




Epoch 60/200, Loss: 0.736724




Epoch 70/200, Loss: 0.736273




Epoch 80/200, Loss: 0.736719




Epoch 90/200, Loss: 0.736412




Epoch 100/200, Loss: 0.736552




Epoch 110/200, Loss: 0.736518




Epoch 120/200, Loss: 0.736454




Epoch 130/200, Loss: 0.736460




Epoch 140/200, Loss: 0.736197




Epoch 150/200, Loss: 0.737023




Epoch 160/200, Loss: 0.736249




Epoch 170/200, Loss: 0.736250




Epoch 180/200, Loss: 0.736300




Epoch 190/200, Loss: 0.736348




Epoch 200/200, Loss: 0.736111
DeepWalk Model RMSE: 0.1427
Node2Vec Model RMSE: 0.1295


# Part 2
 * Implement a Graph Convolutional Network (GCN) using the original node
 features.
 * Youcantry out various aggregators like sum, mean etc to get graph
 features at the end.
 * Showtheeffect of GCN layers into the learning [use upto 4 GCN layers].
 * Perform Regression on the test set and report RMSE

In [None]:
class CustomGCNLayer(MessagePassing):
    def __init__(self, input_dim, output_dim):
        super(CustomGCNLayer, self).__init__(aggr='add')
        self.linear_transform = nn.Linear(input_dim, output_dim)

    def forward(self, node_features, edge_indices):

        edge_indices, _ = add_self_loops(edge_indices, num_nodes=node_features.size(0))
        # Compute degree and normalization factor
        row, col = edge_indices
        degree_vector = degree(col, node_features.size(0), dtype=node_features.dtype)
        degree_inv_sqrt = degree_vector.pow(-0.5)
        norm = degree_inv_sqrt[row] * degree_inv_sqrt[col]

        # Propagate messages between nodes (i.e., perform graph convolution)
        return self.propagate(edge_indices, x=node_features, norm=norm)

    def message(self, x_j, norm):
        # Scale neighbor features by normalization factor
        return norm.view(-1, 1) * x_j

    def update(self, aggregated_messages):
        # Apply the learned linear transformation to the aggregated features
        return self.linear_transform(aggregated_messages)


class CustomGCN(nn.Module):
    def __init__(self, node_input_dim, hidden_dim, output_dim):
        super(CustomGCN, self).__init__()
        self.gcn_layer1 = CustomGCNLayer(node_input_dim, hidden_dim)
        self.gcn_layer2 = CustomGCNLayer(hidden_dim, hidden_dim)
        self.gcn_layer3 = CustomGCNLayer(hidden_dim, hidden_dim)
        self.gcn_layer4 = CustomGCNLayer(hidden_dim, hidden_dim)
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, data):
        # Extract node features, edge indices, and batch information from the input graph
        node_features, edge_indices, batch = data.x, data.edge_index, data.batch

        # Apply the first GCN layer followed by ReLU activation
        node_features = F.relu(self.gcn_layer1(node_features, edge_indices))


        node_features = F.relu(self.gcn_layer2(node_features, edge_indices))


        node_features = F.relu(self.gcn_layer3(node_features, edge_indices))

        # Apply the fourth GCN layer (no activation here)
        node_features = self.gcn_layer4(node_features, edge_indices)


        graph_features = global_mean_pool(node_features, batch)


        graph_output = self.output_layer(graph_features)

        return graph_output


In [None]:
# Function to train the model on the training data
def train_model(model, data_loader, optimizer, loss_function):
    model.train()
    total_loss = 0
    for batch in data_loader:
        optimizer.zero_grad()  # Clear gradients
        predictions = model(batch)  # Forward pass
        loss = loss_function(predictions, batch.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()  # Accumulate the loss
    return total_loss / len(data_loader)


def evaluate_model(model, data_loader, loss_function):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0
    actual_values = []
    predicted_values = []

    with torch.no_grad():
        for batch in data_loader:
            predictions = model(batch)
            loss = loss_function(predictions, batch.y)
            total_loss += loss.item()

            # Collect predictions and true values for RMSE calculation
            predicted_values.append(predictions.cpu().numpy())
            actual_values.append(batch.y.cpu().numpy())  # Move true values to CPU and store

    # Flatten lists and compute RMSE
    predicted_values = np.concatenate(predicted_values, axis=0)
    actual_values = np.concatenate(actual_values, axis=0)  # Combine true values
    rmse_value = mean_squared_error(actual_values, predicted_values, squared=False)

    return total_loss / len(data_loader), rmse_value


In [None]:
# Initialize the GCN model, optimizer, and loss function
model = CustomGCN(node_input_dim=dataset.num_node_features, hidden_dim=64, output_dim=19)  # Adjusted to 19 output dimensions
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_function = nn.MSELoss()


epochs = 100
for epoch in range(epochs):
    train_loss = train_model(model, train_loader, optimizer, loss_function)
    val_loss, val_rmse = evaluate_model(model, val_loader, loss_function)


    print(f'Epoch {epoch + 1}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}, Validation RMSE: {val_rmse:.4f}')

# Test the model on the test set and report the RMSE
test_loss, test_rmse = evaluate_model(model, test_loader, loss_function)
print(f'Test Loss: {test_loss:.4f}, Test RMSE: {test_rmse:.4f}')


Epoch 1, Training Loss: 32296331.9000, Validation Loss: 9597495.5000, Validation RMSE: 1531.4095
Epoch 2, Training Loss: 21948447.0031, Validation Loss: 1999896.5625, Validation RMSE: 768.3661
Epoch 3, Training Loss: 20631206.0453, Validation Loss: 845039.7500, Validation RMSE: 479.4162
Epoch 4, Training Loss: 20513828.9094, Validation Loss: 405293.4375, Validation RMSE: 336.6413
Epoch 5, Training Loss: 20490119.8977, Validation Loss: 562641.4375, Validation RMSE: 401.1791
Epoch 6, Training Loss: 20497012.1465, Validation Loss: 486030.3594, Validation RMSE: 364.7804
Epoch 7, Training Loss: 20488393.4898, Validation Loss: 559117.2969, Validation RMSE: 381.3387
Epoch 8, Training Loss: 20473330.7367, Validation Loss: 513033.6250, Validation RMSE: 383.0806
Epoch 9, Training Loss: 20493252.5781, Validation Loss: 419295.1719, Validation RMSE: 341.3011
Epoch 10, Training Loss: 20482585.5328, Validation Loss: 528169.5156, Validation RMSE: 371.7399
Epoch 11, Training Loss: 20472571.3422, Valida

#Part-3:
* Implement an attention-based GNN, incorporating the concepts of the
 Edge-Weighted Graph Attention Network (EGATConv)
* Youcantry out various aggregators like ‘sum’, ‘mean’ etc to get graph
 features at the end.
* Showtheeffect of EGATConv layers into the learning [use upto 4 layers].
* Perform Regression on the test set and report RMSE


In [None]:
# Custom Edge-Guided Graph Attention Layer
class CustomEGATConv(MessagePassing):
    def __init__(self, input_dim, output_dim, edge_dim, num_heads=1):
        super(CustomEGATConv, self).__init__(aggr='add', node_dim=0)  # Using "add" aggregation
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_heads = num_heads
        self.edge_dim = edge_dim


        self.node_lin = nn.Linear(input_dim, num_heads * output_dim, bias=False)


        self.attn_layer = nn.Linear(2 * output_dim + edge_dim, 1)

        self.initialize_parameters()

    def initialize_parameters(self):
        # Initialize weights with Xavier uniform distribution
        nn.init.xavier_uniform_(self.node_lin.weight)
        nn.init.xavier_uniform_(self.attn_layer.weight)

    def forward(self, node_features, edge_index, edge_features):
        # Apply linear transformation on node features
        node_features = self.node_lin(node_features)
        node_features = node_features.view(-1, self.num_heads, self.output_dim)

        # Perform message passing with attention
        return self.propagate(edge_index, x=node_features, edge_attr=edge_features)

    def message(self, x_i, x_j, edge_attr, index, ptr, size_i):
        # Reshape edge attributes for concatenation
        edge_attr = edge_attr.view(-1, 1, edge_attr.size(-1))

        # Reshape node features and concatenate with edge features
        x_i = x_i.view(-1, self.num_heads, self.output_dim)
        x_j = x_j.view(-1, self.num_heads, self.output_dim)
        combined_features = torch.cat([x_i, x_j, edge_attr.repeat(1, self.num_heads, 1)], dim=-1)

        # Apply attention mechanism
        attention_scores = F.leaky_relu(self.attn_layer(combined_features).squeeze(-1), negative_slope=0.2)

        attention_weights = softmax(attention_scores, index)

        # Weight the neighboring node features by the attention coefficients
        return attention_weights.unsqueeze(-1) * x_j

    def update(self, aggregated_messages):
        # Return the final aggregated output by averaging across heads
        return aggregated_messages.mean(dim=1)

# Attention-based Graph Neural Network (EGAT) model
class CustomEGATModel(nn.Module):
    def __init__(self, input_dim, edge_dim, hidden_dim, output_dim):
        super(CustomEGATModel, self).__init__()

        # Define multiple EGAT layers
        self.egat_conv1 = CustomEGATConv(input_dim, hidden_dim, edge_dim, num_heads=1)
        self.egat_conv2 = CustomEGATConv(hidden_dim, hidden_dim, edge_dim, num_heads=1)
        self.egat_conv3 = CustomEGATConv(hidden_dim, hidden_dim, edge_dim, num_heads=1)
        self.egat_conv4 = CustomEGATConv(hidden_dim, hidden_dim, edge_dim, num_heads=1)

        # Final linear layer for regression
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, data):
        # Extract relevant data attributes (node features, edges, and edge features)
        node_features, edge_index, edge_features, batch = data.x, data.edge_index, data.edge_attr, data.batch


        node_features = F.relu(self.egat_conv1(node_features, edge_index, edge_features))

        node_features = F.relu(self.egat_conv2(node_features, edge_index, edge_features))

        node_features = F.relu(self.egat_conv3(node_features, edge_index, edge_features))

        node_features = self.egat_conv4(node_features, edge_index, edge_features)

        # Pool the node features into a graph-level representation
        graph_representation = global_mean_pool(node_features, batch)

        # Use the final linear layer for regression task
        return self.output_layer(graph_representation)


In [None]:
# Function to perform model training
def train_model(model, loader, optimizer, criterion):
    model.train()
    total_train_loss = 0
    for batch in loader:
        optimizer.zero_grad()  # Reset gradients
        predictions = model(batch)  # Forward pass
        loss = criterion(predictions, batch.y)  # Compute loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update parameters
        total_train_loss += loss.item()  # Accumulate loss
    average_train_loss = total_train_loss / len(loader)  # Average loss per batch
    return average_train_loss

# Function to evaluate model performance
def evaluate_model(model, loader, criterion):
    model.eval()
    total_val_loss = 0
    all_true_values = []
    all_predictions = []

    with torch.no_grad():
        for batch in loader:
            predictions = model(batch)  # Forward pass
            loss = criterion(predictions, batch.y)  # Compute loss
            total_val_loss += loss.item()  # Accumulate loss

            # Collect predictions and true values
            all_predictions.append(predictions.cpu().numpy())
            all_true_values.append(batch.y.cpu().numpy())

    # Concatenate and compute RMSE
    predictions = np.concatenate(all_predictions, axis=0)
    true_values = np.concatenate(all_true_values, axis=0)
    rmse_score = mean_squared_error(true_values, predictions, squared=False)
    average_val_loss = total_val_loss / len(loader)  # Average loss per batch

    return average_val_loss, rmse_score

# Set up the model, optimizer, and loss function
model = EGATModel(num_node_features=dataset.num_node_features,
                  num_edge_features=dataset.num_edge_features,
                  hidden_channels=64,
                  num_classes=19)  # Number of output classes set to 19

optimizer = optim.Adam(model.parameters(), lr=0.01)  # Adam optimizer
loss_function = nn.MSELoss()  # Loss function for regression

# Training and evaluation loop
epochs = 20
for epoch in range(epochs):
    training_loss = train_model(model, train_loader, optimizer, loss_function)
    validation_loss, validation_rmse = evaluate_model(model, val_loader, loss_function)
    print(f'Epoch {epoch + 1}: Training Loss: {training_loss:.4f}, Validation Loss: {validation_loss:.4f}, Validation RMSE: {validation_rmse:.4f}')

# Evaluate model on the test set
test_loss, test_rmse = evaluate_model(model, test_loader, loss_function)
print(f'Test Loss: {test_loss:.4f}, Test RMSE: {test_rmse:.4f}')


Epoch 1: Training Loss: 30752615.8938, Validation Loss: 1056368.5312, Validation RMSE: 605.3462
Epoch 2: Training Loss: 21317321.8086, Validation Loss: 280282.3672, Validation RMSE: 308.0165
Epoch 3: Training Loss: 20614601.8250, Validation Loss: 565800.4375, Validation RMSE: 396.3531
Epoch 4: Training Loss: 20478109.0000, Validation Loss: 641567.4844, Validation RMSE: 391.8039
Epoch 5: Training Loss: 20487089.6812, Validation Loss: 479095.2891, Validation RMSE: 353.4192
Epoch 6: Training Loss: 20489859.2598, Validation Loss: 633021.0469, Validation RMSE: 412.3197
Epoch 7: Training Loss: 20476869.2109, Validation Loss: 624366.4531, Validation RMSE: 409.4086
Epoch 8: Training Loss: 20459560.7820, Validation Loss: 464726.2109, Validation RMSE: 336.2867
Epoch 9: Training Loss: 20478980.6766, Validation Loss: 521926.1953, Validation RMSE: 353.6171
Epoch 10: Training Loss: 20533319.3031, Validation Loss: 522738.7734, Validation RMSE: 378.8055
Epoch 11: Training Loss: 20484002.2113, Validati