# Simple GNN with WandB


<h2>Preparation</h2>


Import the libraries and set the random seed.


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import  global_mean_pool
#from graph_builder import GraphBuilder  # <-- External builder
import pandas as pd
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_add_pool, GraphNorm
from sklearn.model_selection import train_test_split
import ast
from torch_geometric.utils import degree
from types import SimpleNamespace

In [2]:
import wandb

In [3]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mdian-gabriele[0m ([33mdian-gabriele-desydeutsches-elektronen-synchrotron[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

<!--Empty Space for separating topics-->


## Define the model 

In [4]:
# Impport module from different folder. Set the path to the directory ../GNN_FIrst_Try
import sys
import os
sys.path.insert(0, os.path.abspath('../GNN_First_Try'))

In [5]:
from GraphBuilder_single_edge import GraphBuilder
from save_model_results import save_model_architecture, append_evaluation_results, evaluate_model

In [6]:
# Defining a simple GNN model. 
class SimpleGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels,dropout):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.norm1 = GraphNorm(hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.norm2 = GraphNorm(hidden_channels,hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.norm3 = GraphNorm(hidden_channels)
        self.lin = torch.nn.Linear(hidden_channels, 2)
        self.dropout = torch.nn.Dropout(dropout)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        batch = data.batch # For multiple graphs in a batch
        x = F.relu(self.norm1(self.conv1(x, edge_index)))
        x = self.dropout(x)
        x = F.relu(self.norm2(self.conv2(x, edge_index)))
        x = self.dropout(x)
        x = F.relu(self.norm3(self.conv3(x, edge_index)))
        x = self.dropout(x)
        x = global_mean_pool(x, batch)
        return self.lin(x)

## Pass the configuration of the model to wandb

Save the configuration of the hyperparameters

In [87]:
config =  SimpleNamespace(
    lr= 1e-4,
    epochs= 70,
    weight_decay= 5e-4,
    hidden_channels= 128,
    dropout= 0.2,
    in_channels= 4,
    batch_size=20)

In [88]:
# Create the model object.
model = SimpleGNN(in_channels=config.in_channels, hidden_channels=config.hidden_channels,dropout=config.dropout)
# Create the optimizer.
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = config.lr, weight_decay=config.weight_decay)

### Data preparation

In [89]:
loop = 7

First we read the edges and coefficients of the csv files and save them in lists.

In [90]:
# Create the edge and y lists from the csv files\
edges=[]
y=[]
for i in range(loop,loop+1):
    filename = f'../Graph_Edge_Data/den_graph_data_{loop}.csv'
    df = pd.read_csv(filename)
    edges += df['EDGES'].tolist()
    y += df['COEFFICIENTS'].tolist()
edges = [ast.literal_eval(e) for e in edges]    

In [91]:
# Import the function to add eigenvector features
from torch_geometric.transforms import AddLaplacianEigenvectorPE
eigen_vec= AddLaplacianEigenvectorPE(k=3,attr_name=None)

We need to now translate the edges into dataset forms for training and testing.

In [92]:
# Define the data object through GraphBuilder, then add the eigenvector features
data=[GraphBuilder(solid_edges=x,coeff=y0).build() for x,y0 in zip(edges,y)]
data = [eigen_vec(d) for d in data]

In [93]:
# Split train and test data
train_data, test_data = train_test_split(data, test_size=0.2, random_state=43)

In [94]:
# Load the data into DataLoader
train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=config.batch_size, shuffle=False)

### Model training

We are interested in graph classification of 0 and 1. We add two graph convolutional layers, making sure that the message passing is extended to two neighbours, and then add graph pooling to average over the whole graph.

In [95]:
def train_model(model, train_loader, test_loader, optimizer, criterion, device, n_epochs =10):

    accuracy_list = []
    loss_list = []
    patience_counter = 0
    patience = 3
    model.to(device)

    wandb.init(
        project="simple_GNN_7_loop",
        config=config
        )
    
    example_ct = 0 # Counter for the number of examples processed

    for epoch in range(n_epochs):
        model.train()
        total_loss = 0

        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()

            out = model(batch)             # out = model(batch) handles batch.x, batch.edge_index, etc.
            loss = criterion(out, batch.coeff) # Use batch.y (or batch.coeff if that's what your dataset uses)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        loss_list.append(total_loss)



        example_ct += len(batch)
        metrics = {
            "train/train_loss": total_loss,
            "train/epoch": epoch + 1,
            "train/example_ct": example_ct
            }
        wandb.log(metrics)
        
        # Validation
        model.eval()
        correct = 0
        total = 0
        val_loss= 0

        with torch.no_grad():
            for batch in test_loader:
                batch = batch.to(device)
                out = model(batch)
                _, predicted = torch.max(out, 1)
                correct += (predicted == batch.coeff).sum().item()
                total += batch.num_graphs  # graph-level classification
                val_loss += criterion(out, batch.coeff) 

        accuracy = correct / total
        accuracy_list.append(accuracy)

        print(f"Epoch {epoch+1}: Loss={total_loss:.2f}, Accuracy={accuracy:.2f}")
    
        # Compute train and validation metrics
        val_metrics = {
            "val/val_loss": val_loss,
            "val/val_accuracy": accuracy
        }
        wandb.log(val_metrics)

        if epoch>50: 
            if loss_list[epoch-10]-loss_list[epoch] < 0.1:
                patience_counter += 1
                if patience_counter >= patience:
                    print("Early stopping") 
                    break   
        

In [96]:
train_model(model,train_loader,test_loader,optimizer,device='cpu',criterion=criterion, n_epochs=config.epochs)

0,1
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇███
train/example_ct,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇██
train/train_loss,█▇▆▆▆▄▅▄▄▄▄▄▄▄▃▃▂▃▄▃▂▃▂▃▂▂▂▃▂▂▁▂▃▂▂▂▂▂▂▂
val/val_accuracy,▆▃▁▁▁▁▁▃▃▁▁▁▃▃▃▃▃▃▃▃▃▃▃▆▆▆▆█████████████
val/val_loss,█▇▆▆▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train/epoch,55.0
train/example_ct,605.0
train/train_loss,3.87979
val/val_accuracy,0.75758
val/val_loss,0.98641


Epoch 1: Loss=4.96, Accuracy=0.61
Epoch 2: Loss=4.80, Accuracy=0.67
Epoch 3: Loss=4.59, Accuracy=0.67
Epoch 4: Loss=4.48, Accuracy=0.67
Epoch 5: Loss=4.40, Accuracy=0.67
Epoch 6: Loss=4.30, Accuracy=0.67
Epoch 7: Loss=4.26, Accuracy=0.67
Epoch 8: Loss=4.18, Accuracy=0.67
Epoch 9: Loss=4.10, Accuracy=0.67
Epoch 10: Loss=4.04, Accuracy=0.67
Epoch 11: Loss=4.11, Accuracy=0.64
Epoch 12: Loss=4.05, Accuracy=0.67
Epoch 13: Loss=3.90, Accuracy=0.70
Epoch 14: Loss=4.08, Accuracy=0.76
Epoch 15: Loss=3.80, Accuracy=0.82
Epoch 16: Loss=4.23, Accuracy=0.79
Epoch 17: Loss=3.95, Accuracy=0.79
Epoch 18: Loss=3.89, Accuracy=0.82
Epoch 19: Loss=3.86, Accuracy=0.82
Epoch 20: Loss=3.84, Accuracy=0.79
Epoch 21: Loss=3.78, Accuracy=0.79
Epoch 22: Loss=3.74, Accuracy=0.79
Epoch 23: Loss=3.86, Accuracy=0.79
Epoch 24: Loss=3.77, Accuracy=0.79
Epoch 25: Loss=3.75, Accuracy=0.82
Epoch 26: Loss=3.66, Accuracy=0.82
Epoch 27: Loss=3.66, Accuracy=0.82
Epoch 28: Loss=3.69, Accuracy=0.82
Epoch 29: Loss=3.52, Accuracy

### Model evaluation

We evaluate the model on test set in terms of Accuracy, Precision, Recall and F1 score. Then we save the results in a txt file. Then we also print the same metrics for the training set.