In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from torch_geometric.loader import DataLoader
from sklearn.model_selection import train_test_split
import ast
from torch_geometric.utils import degree

In [12]:

from torch_geometric.nn import (
    GCNConv, GATConv, GINConv, GraphConv, EdgeConv,
    GraphNorm, BatchNorm, LayerNorm,
    global_mean_pool, global_max_pool, global_add_pool,
    JumpingKnowledge, AttentionalAggregation,
    TransformerConv, GATv2Conv, DeepSetsAggregation
) 
from torch_geometric.nn.pool import TopKPooling, SAGPooling

In [3]:
from GraphBuilder_with_features import GraphBuilder
from save_model_results import save_model_architecture, append_evaluation_results, evaluate_model

## Claude Models

In [49]:
class PlanarGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, num_classes=2, dropout=0.5):
        super().__init__()
        
        self.dropout = dropout
        self.hidden_channels = hidden_channels
        
        # Multi-scale convolution layers
        # Use different types of convolutions to capture different patterns
        self.conv1_gcn = GCNConv(in_channels, hidden_channels)
        self.conv1_gat = GATConv(in_channels, hidden_channels, heads=4, concat=False)
        self.conv1_gin = GINConv(
            nn.Sequential(
                nn.Linear(in_channels, hidden_channels),
                nn.ReLU(),
                nn.Linear(hidden_channels, hidden_channels)
            )
        )
        
        # Fusion layer for multi-scale features
        self.fusion1 = nn.Linear(hidden_channels * 3, hidden_channels)
        self.norm1 = GraphNorm(hidden_channels)
        
        # Second layer with attention
        self.conv2 = GATv2Conv(hidden_channels, hidden_channels, heads=8, concat=False, edge_dim=4)
        self.norm2 = GraphNorm(hidden_channels)
        
        # Transformer layer for long-range dependencies
        self.transformer = TransformerConv(hidden_channels, hidden_channels, heads=4, concat=False)
        self.norm3 = GraphNorm(hidden_channels)
        
        # EdgeConv for capturing local geometric structure
        self.edge_conv = EdgeConv(
            nn.Sequential(
                nn.Linear(hidden_channels * 2, hidden_channels),
                nn.ReLU(),
                nn.Linear(hidden_channels, hidden_channels)
            )
        )
        self.norm4 = GraphNorm(hidden_channels)
        
        # Jumping Knowledge for multi-layer information
        self.jump = JumpingKnowledge(mode='lstm', channels=hidden_channels, num_layers=4)
        
        # Hierarchical pooling
        self.pool1 = TopKPooling(hidden_channels, ratio=0.8)
        self.pool2 = SAGPooling(hidden_channels, ratio=0.5)
        
        # Attentional aggregation
        self.att_pool = AttentionalAggregation(
            gate_nn=nn.Sequential(
                nn.Linear(hidden_channels, hidden_channels),
                nn.ReLU(),
                nn.Linear(hidden_channels, 1)
            )
        )
        
        # Multiple readout functions
        self.readout_mlp = nn.Sequential(
            nn.Linear(hidden_channels * 4, hidden_channels * 2),  # 4 different pooling methods
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels * 2, hidden_channels),
            nn.ReLU(),
            nn.Dropout(dropout)
        )
        
        # Final classifier with residual connection
        self.classifier = nn.Sequential(
            nn.Linear(hidden_channels, hidden_channels // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels // 2, num_classes)
        )
        
        # Graph-level features
        self.graph_features_mlp = nn.Sequential(
            nn.Linear(10, hidden_channels // 4),  # Adjust based on number of graph-level features
            nn.ReLU()
        )
        
        # Final fusion
        self.final_fusion = nn.Linear(hidden_channels + hidden_channels // 4, hidden_channels)
    
    def extract_graph_features(self, data):
        """Extract graph-level topological features"""
        # This would typically be pre-computed for efficiency
        # Here's a simplified version
        batch_size = data.batch.max().item() + 1
        graph_features = []
        
        for i in range(batch_size):
            mask = data.batch == i
            num_nodes = mask.sum().item()
            num_edges = data.edge_index[:, data.batch[data.edge_index[0]] == i].shape[1] // 2
            
            # Basic graph statistics
            avg_degree = (2 * num_edges) / num_nodes if num_nodes > 0 else 0
            density = (2 * num_edges) / (num_nodes * (num_nodes - 1)) if num_nodes > 1 else 0
            
            # Placeholder for more sophisticated features
            features = [
                num_nodes, num_edges, avg_degree, density,
                0, 0, 0, 0, 0, 0  # Placeholder for additional features
            ]
            
            graph_features.append(features)
        
        return torch.FloatTensor(graph_features).to(data.x.device)
    
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        edge_attr = getattr(data, 'edge_attr', None)
        
        # Store intermediate representations for jumping knowledge
        xs = []
        
        # Multi-scale first layer
        x1_gcn = F.relu(self.conv1_gcn(x, edge_index))
        x1_gat = F.relu(self.conv1_gat(x, edge_index))
        x1_gin = F.relu(self.conv1_gin(x, edge_index))
        
        # Fuse multi-scale features
        x1 = torch.cat([x1_gcn, x1_gat, x1_gin], dim=1)
        x1 = self.fusion1(x1)
        x1 = self.norm1(x1)
        x1 = F.dropout(x1, self.dropout, training=self.training)
        xs.append(x1)
        
        # Attention layer with edge features
        x2 = F.relu(self.conv2(x1, edge_index, edge_attr))
        x2 = self.norm2(x2)
        x2 = F.dropout(x2, self.dropout, training=self.training)
        xs.append(x2)
        
        # Transformer for long-range dependencies
        x3 = F.relu(self.transformer(x2, edge_index))
        x3 = self.norm3(x3)
        x3 = F.dropout(x3, self.dropout, training=self.training)
        xs.append(x3)
        
        # Edge convolution for geometric structure
        x4 = F.relu(self.edge_conv(x3, edge_index))
        x4 = self.norm4(x4)
        x4 = F.dropout(x4, self.dropout, training=self.training)
        xs.append(x4)
        
        # Jumping knowledge aggregation
        x = self.jump(xs)
        
        # Multiple pooling strategies
        pool1 = global_mean_pool(x, batch)
        pool2 = global_max_pool(x, batch)
        pool3 = global_add_pool(x, batch)
        pool4 = self.att_pool(x, batch)
        
        # Combine different pooling methods
        graph_repr = torch.cat([pool1, pool2, pool3, pool4], dim=1)
        graph_repr = self.readout_mlp(graph_repr)
        
        # Add graph-level features
        graph_feats = self.extract_graph_features(data)
        graph_feats = self.graph_features_mlp(graph_feats)
        
        # Final fusion
        combined = torch.cat([graph_repr, graph_feats], dim=1)
        combined = self.final_fusion(combined)
        
        # Classification
        out = self.classifier(combined)
        
        return out


    def __init__(self, in_channels, hidden_channels, num_classes=2, dropout=0.3):
        super().__init__()
        
        # Use GIN layers (often more powerful than GCN)
        self.convs = nn.ModuleList([
            GINConv(nn.Sequential(
                nn.Linear(in_channels if i == 0 else hidden_channels, hidden_channels),
                nn.BatchNorm1d(hidden_channels),
                nn.ReLU(),
                nn.Linear(hidden_channels, hidden_channels),
                nn.BatchNorm1d(hidden_channels),
                nn.ReLU()
            )) for i in range(5)
        ])
        
        # Batch normalization instead of GraphNorm
        self.batch_norms = nn.ModuleList([
            nn.BatchNorm1d(hidden_channels) for _ in range(5)
        ])
        
        # Jumping knowledge
        self.jump = JumpingKnowledge(mode='cat')
        
        # Enhanced readout
        self.pool = nn.Sequential(
            nn.Linear(hidden_channels * 5, hidden_channels * 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels * 2, hidden_channels)
        )
        
        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(hidden_channels * 3, hidden_channels),  # 3 pooling methods
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels, hidden_channels // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels // 2, num_classes)
        )
        
        self.dropout = dropout
    
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        
        xs = []
        for conv, bn in zip(self.convs, self.batch_norms):
            x = conv(x, edge_index)
            x = bn(x)
            x = F.relu(x)
            x = F.dropout(x, self.dropout, training=self.training)
            xs.append(x)
        
        # Jumping knowledge
        x = self.jump(xs)
        x = self.pool(x)
        
        # Multiple pooling
        pool1 = global_mean_pool(x, batch)
        pool2 = global_max_pool(x, batch)
        pool3 = global_add_pool(x, batch)
        
        graph_repr = torch.cat([pool1, pool2, pool3], dim=1)
        
        return self.classifier(graph_repr)

Alternative: Simpler but effective architecture focusing on planar properties

In [51]:
class PlanarGNN_Simple(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, num_classes=2, dropout=0.3):
        super().__init__()
        
        # Use GIN layers (often more powerful than GCN)
        self.convs = nn.ModuleList([
            GINConv(nn.Sequential(
                nn.Linear(in_channels if i == 0 else hidden_channels, hidden_channels),
                nn.BatchNorm1d(hidden_channels),
                nn.ReLU(),
                nn.Linear(hidden_channels, hidden_channels),
                nn.BatchNorm1d(hidden_channels),
                nn.ReLU()
            )) for i in range(5)
        ])
        
        # Batch normalization instead of GraphNorm
        self.batch_norms = nn.ModuleList([
            nn.BatchNorm1d(hidden_channels) for _ in range(5)
        ])
        
        # Jumping knowledge
        self.jump = JumpingKnowledge(mode='cat')
        
        # Enhanced readout
        self.pool = nn.Sequential(
            nn.Linear(hidden_channels * 5, hidden_channels * 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels * 2, hidden_channels)
        )
        
        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(hidden_channels * 3, hidden_channels),  # 3 pooling methods
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels, hidden_channels // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_channels // 2, num_classes)
        )
        
        self.dropout = dropout
    
    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        
        xs = []
        for conv, bn in zip(self.convs, self.batch_norms):
            x = conv(x, edge_index)
            x = bn(x)
            x = F.relu(x)
            x = F.dropout(x, self.dropout, training=self.training)
            xs.append(x)
        
        # Jumping knowledge
        x = self.jump(xs)
        x = self.pool(x)
        
        # Multiple pooling
        pool1 = global_mean_pool(x, batch)
        pool2 = global_max_pool(x, batch)
        pool3 = global_add_pool(x, batch)
        
        graph_repr = torch.cat([pool1, pool2, pool3], dim=1)
        
        return self.classifier(graph_repr)

## Model architecture

In [23]:
# Defining a simple GNN model. 
class SimpleGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels,dropout):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.norm1 = GraphNorm(hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.norm2 = GraphNorm(hidden_channels,hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.norm3 = GraphNorm(hidden_channels)
        self.lin = torch.nn.Linear(hidden_channels, 2)
        self.dropout = torch.nn.Dropout(dropout)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        batch = data.batch # For multiple graphs in a batch
        x = F.relu(self.norm1(self.conv1(x, edge_index)))
        x = self.dropout(x)
        x = F.relu(self.norm2(self.conv2(x, edge_index)))
        x = self.dropout(x)
        x = F.relu(self.norm3(self.conv3(x, edge_index)))
        x = self.dropout(x)
        x = global_mean_pool(x, batch)
        return self.lin(x)

In [None]:
# Create the model object.
# model = SimpleGNN(in_channels=26, hidden_channels=32,dropout=0.2)

In [54]:
model =  PlanarGNN_Simple( in_channels = 26, hidden_channels = 32, num_classes=2, dropout=0.3)

In [56]:
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate, weight_decay=5e-4)

In [57]:
# Generate text file with model architecture
model_results_path = save_model_architecture(model)
model_results_path
#model_results_path = "results/model_3.txt"

'results/model_16.txt'

## 7-10 loop 

### Data preparation

In [58]:
loop=8

In [59]:
# Reset model parameteters
for layer in model.children():
   if hasattr(layer, 'reset_parameters'):
       layer.reset_parameters()

First we read the edges and coefficients of the csv files and save them in lists.

In [60]:
# Create the edge and y lists from the csv files\
edges=[]
y=[]
for i in range(loop,loop+1):
    filename = f'../Graph_Edge_Data/den_graph_data_{loop}.csv'
    df = pd.read_csv(filename)
    edges += df['EDGES'].tolist()
    y += df['COEFFICIENTS'].tolist()
edges = [ast.literal_eval(e) for e in edges]    

We need to now translate the edges into dataset forms for training and testing.

In [61]:
# Define the data object through GraphBuilder
data=[GraphBuilder(solid_edges=x,coeff=y0).build() for x,y0 in zip(edges,y)]

In [62]:
data[2].x.shape

torch.Size([12, 26])

In [63]:
# Split train and test data
train_data, test_data = train_test_split(data, test_size=0.2, random_state=43)

In [64]:
# Load the data into DataLoader
train_loader = DataLoader(train_data, batch_size=20, shuffle=True)
test_loader = DataLoader(test_data, batch_size=20, shuffle=False)

### Model training

We are interested in graph classification of 0 and 1. We add two graph convolutional layers, making sure that the message passing is extended to two neighbours, and then add graph pooling to average over the whole graph.

In [65]:
def train_model(model, train_loader, test_loader, optimizer, criterion, device, n_epochs=70):
    accuracy_list = []
    loss_list = []
    patience_counter = 0
    patience = 3
    model.to(device)

    for epoch in range(n_epochs):
        model.train()
        total_loss = 0

        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()

            out = model(batch)             # out = model(batch) handles batch.x, batch.edge_index, etc.
            loss = criterion(out, batch.y) # Use batch.y (or batch.y if that's what your dataset uses)

            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        loss_list.append(total_loss)

        # Validation
        model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for batch in test_loader:
                batch = batch.to(device)
                out = model(batch)
                _, predicted = torch.max(out, 1)
                correct += (predicted == batch.y).sum().item()
                total += batch.num_graphs  # graph-level classification

        accuracy = correct / total
        accuracy_list.append(accuracy)

        print(f"Epoch {epoch+1}: Loss={total_loss:.2f}, Accuracy={accuracy:.2f}")

        if epoch>50: 
            if loss_list[epoch-10]-loss_list[epoch] < 0.1:
                patience_counter += 1
                if patience_counter >= patience:
                    print("Early stopping") 
                    break   

In [67]:
train_model(model,train_loader,test_loader,optimizer,device='cpu',criterion=criterion)

Epoch 1: Loss=24.23, Accuracy=0.67
Epoch 2: Loss=23.50, Accuracy=0.73
Epoch 3: Loss=23.41, Accuracy=0.62
Epoch 4: Loss=24.10, Accuracy=0.68
Epoch 5: Loss=23.31, Accuracy=0.60
Epoch 6: Loss=23.57, Accuracy=0.73
Epoch 7: Loss=23.17, Accuracy=0.62
Epoch 8: Loss=22.69, Accuracy=0.61
Epoch 9: Loss=23.24, Accuracy=0.66
Epoch 10: Loss=23.37, Accuracy=0.75
Epoch 11: Loss=22.64, Accuracy=0.74
Epoch 12: Loss=23.89, Accuracy=0.65
Epoch 13: Loss=22.85, Accuracy=0.76
Epoch 14: Loss=22.32, Accuracy=0.66
Epoch 15: Loss=22.34, Accuracy=0.55
Epoch 16: Loss=24.03, Accuracy=0.71
Epoch 17: Loss=23.26, Accuracy=0.66
Epoch 18: Loss=22.49, Accuracy=0.54
Epoch 19: Loss=24.23, Accuracy=0.80
Epoch 20: Loss=21.89, Accuracy=0.68
Epoch 21: Loss=23.89, Accuracy=0.76
Epoch 22: Loss=21.36, Accuracy=0.70
Epoch 23: Loss=22.69, Accuracy=0.72
Epoch 24: Loss=22.49, Accuracy=0.72
Epoch 25: Loss=21.77, Accuracy=0.70
Epoch 26: Loss=22.27, Accuracy=0.77
Epoch 27: Loss=20.75, Accuracy=0.77
Epoch 28: Loss=22.60, Accuracy=0.79
E

### Model evaluation

We evaluate the model on test set in terms of Accuracy, Precision, Recall and F1 score. Then we save the results in a txt file. Then we also print the same metrics for the training set.

In [None]:
evaluation =evaluate_model(model, test_loader, device='cpu')

In [None]:
append_evaluation_results(model_results_path, evaluation, loop = loop )

In [None]:
#Save results on training set
evaluation =evaluate_model(model, train_loader, device='cpu', set_type = "train")
append_evaluation_results(model_results_path, evaluation, loop = loop)