In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import  global_mean_pool
#from graph_builder import GraphBuilder  # <-- External builder
import pandas as pd
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_add_pool, GraphNorm
from sklearn.model_selection import train_test_split
import ast
from torch_geometric.utils import degree

In [2]:
from GraphBuilder_single_edge import GraphBuilder
from save_model_results import save_model_architecture, append_evaluation_results, evaluate_model

## Model architecture

In [3]:
# Defining a simple GNN model. 
class SimpleGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels,dropout):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.norm1 = GraphNorm(hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.norm2 = GraphNorm(hidden_channels,hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.norm3 = GraphNorm(hidden_channels)
        self.lin = torch.nn.Linear(hidden_channels, 2)
        self.dropout = torch.nn.Dropout(dropout)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        batch = data.batch # For multiple graphs in a batch
        x = F.relu(self.norm1(self.conv1(x, edge_index)))
        x = self.dropout(x)
        x = F.relu(self.norm2(self.conv2(x, edge_index)))
        x = self.dropout(x)
        x = F.relu(self.norm3(self.conv3(x, edge_index)))
        x = self.dropout(x)
        x = global_mean_pool(x, batch)
        return self.lin(x)

In [4]:
# Create the model object.
model = SimpleGNN(in_channels=4, hidden_channels=32,dropout=0.2)

In [5]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=5e-4)

In [6]:
# Generate text file with model architecture
model_results_path = save_model_architecture(model)

## 7-10 loop 

### Data preparation

In [55]:
loop=10

In [None]:
# Reset Model 
model = SimpleGNN(in_channels=4, hidden_channels=32,dropout=0.2)
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=5e-4)

First we read the edges and coefficients of the csv files and save them in lists.

In [57]:
# Create the edge and y lists from the csv files\
edges=[]
y=[]
for i in range(loop,loop+1):
    filename = f'../Graph_Edge_Data/den_graph_data_{loop}.csv'
    df = pd.read_csv(filename)
    edges += df['EDGES'].tolist()
    y += df['COEFFICIENTS'].tolist()
edges = [ast.literal_eval(e) for e in edges]    

In [58]:
# Import the function to add eigenvector features
from torch_geometric.transforms import AddLaplacianEigenvectorPE
eigen_vec= AddLaplacianEigenvectorPE(k=3,attr_name=None)

We need to now translate the edges into dataset forms for training and testing.

In [59]:
# Define the data object through GraphBuilder, then add the eigenvector features
data=[GraphBuilder(solid_edges=x,coeff=y0).build() for x,y0 in zip(edges,y)]
data = [eigen_vec(d) for d in data]

In [60]:
# Split train and test data
train_data, test_data = train_test_split(data, test_size=0.2, random_state=43)

In [61]:
# Load the data into DataLoader
train_loader = DataLoader(train_data, batch_size=20, shuffle=True)
test_loader = DataLoader(test_data, batch_size=20, shuffle=False)

### Model training

We are interested in graph classification of 0 and 1. We add two graph convolutional layers, making sure that the message passing is extended to two neighbours, and then add graph pooling to average over the whole graph.

In [62]:
def train_model(model, train_loader, test_loader, optimizer, criterion, device, n_epochs=70):
    accuracy_list = []
    loss_list = []
    patience_counter = 0
    patience = 3
    model.to(device)

    for epoch in range(n_epochs):
        model.train()
        total_loss = 0

        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()

            out = model(batch)             # out = model(batch) handles batch.x, batch.edge_index, etc.
            loss = criterion(out, batch.coeff) # Use batch.y (or batch.coeff if that's what your dataset uses)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        loss_list.append(total_loss)

        # Validation
        model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for batch in test_loader:
                batch = batch.to(device)
                out = model(batch)
                _, predicted = torch.max(out, 1)
                correct += (predicted == batch.coeff).sum().item()
                total += batch.num_graphs  # graph-level classification

        accuracy = correct / total
        accuracy_list.append(accuracy)

        print(f"Epoch {epoch+1}: Loss={total_loss:.2f}, Accuracy={accuracy:.2f}")

        if epoch>50: 
            if loss_list[epoch-10]-loss_list[epoch] < 0.1:
                patience_counter += 1
                if patience_counter >= patience:
                    print("Early stopping") 
                    break   

In [63]:
train_model(model,train_loader,test_loader,optimizer,device='cpu',criterion=criterion)

Epoch 1: Loss=2909.39, Accuracy=0.79
Epoch 2: Loss=2732.63, Accuracy=0.79
Epoch 3: Loss=2697.15, Accuracy=0.80
Epoch 4: Loss=2677.59, Accuracy=0.79
Epoch 5: Loss=2672.20, Accuracy=0.80
Epoch 6: Loss=2663.90, Accuracy=0.80
Epoch 7: Loss=2667.04, Accuracy=0.79
Epoch 8: Loss=2665.50, Accuracy=0.80
Epoch 9: Loss=2652.61, Accuracy=0.80
Epoch 10: Loss=2653.56, Accuracy=0.80
Epoch 11: Loss=2655.35, Accuracy=0.80
Epoch 12: Loss=2649.25, Accuracy=0.80
Epoch 13: Loss=2643.79, Accuracy=0.80
Epoch 14: Loss=2645.77, Accuracy=0.80
Epoch 15: Loss=2648.18, Accuracy=0.78
Epoch 16: Loss=2651.22, Accuracy=0.80
Epoch 17: Loss=2641.99, Accuracy=0.80
Epoch 18: Loss=2635.20, Accuracy=0.80
Epoch 19: Loss=2641.45, Accuracy=0.80
Epoch 20: Loss=2636.14, Accuracy=0.80
Epoch 21: Loss=2642.03, Accuracy=0.79
Epoch 22: Loss=2639.17, Accuracy=0.80
Epoch 23: Loss=2643.03, Accuracy=0.80
Epoch 24: Loss=2641.12, Accuracy=0.80
Epoch 25: Loss=2641.20, Accuracy=0.80
Epoch 26: Loss=2642.85, Accuracy=0.80
Epoch 27: Loss=2645.2

### Model evaluation

We evaluate the model on test set in terms of Accuracy, Precision, Recall and F1 score. Then we save the results in a txt file. Then we also print the same metrics for the training set.

In [64]:
evaluation =evaluate_model(model, test_loader, device='cpu')

In [65]:
append_evaluation_results(model_results_path, evaluation, loop = loop )

In [66]:
#Save results on training set
evaluation =evaluate_model(model, train_loader, device='cpu', set_type = "train")
append_evaluation_results(model_results_path, evaluation, loop = loop)

In [67]:
evaluation_train =evaluate_model(model, train_loader, device='cpu')
print(evaluation_train)

{'Set Type': 'Test', 'Accuracy': 0.8030277077674733, 'Precision': array([0.87729353, 0.68086896]), 'Recall': array([0.81889966, 0.77134198]), 'F1 Score': array([0.84709145, 0.72328723]), 'Confusion Matrix': array([[66891, 14793],
       [ 9356, 31561]])}
