In [25]:
import torch
import torch.nn.functional as F
import pandas as pd
from torch.nn import Linear, ReLU, Sequential
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GINConv, global_add_pool
from sklearn.model_selection import train_test_split
import ast

In [None]:

# ----------------------------
#  Step 1: Load and parse CSVs
# ----------------------------

In [26]:
def parse_edge_line(line):
    # Split by comma only *outside* curly braces
    edges_str = line.strip().split('","')  # Handles entries like "{1, 2}","{1, 3}",...
    cleaned = [edge.replace('{', '(').replace('}', ')').replace('"', '') for edge in edges_str]
    return [tuple(ast.literal_eval(edge)) for edge in cleaned]

# Load the file manually
edges_per_graph = []
with open("/home/rigers/Documents/GitHub/ML-correlator/Rigers/GNN/data_8_Loop/edges8Loop.csv", 'r') as file:
    for line in file:
        edges = parse_edge_line(line)
        edges_per_graph.append(edges)

labels = []
with open("/home/rigers/Documents/GitHub/ML-correlator/Rigers/GNN/data_8_Loop/coeffs8Loop.csv", 'r') as f:
    for line in f:
        labels.append(int(line.strip()))

In [None]:
# ----------------------------
#  Step 2: Convert to PyG Data objects
# ----------------------------

In [27]:
graph_list = []
for edges, label in zip(edges_per_graph, labels):
    edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
    num_nodes = edge_index.max().item() + 1
    x = torch.eye(num_nodes)  # Identity features
    y = torch.tensor([label], dtype=torch.long)
    graph_list.append(Data(x=x, edge_index=edge_index, y=y))

In [None]:
# ----------------------------
# Step 3: Train/test split
# ----------------------------

In [28]:

train_data, test_data = train_test_split(graph_list, test_size=0.2, random_state=42)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

In [None]:
# ----------------------------
# Step 4: Define GIN model
# ----------------------------

In [29]:
class GIN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_dim, num_classes):
        super().__init__()
        nn1 = Sequential(Linear(num_node_features, hidden_dim), ReLU(), Linear(hidden_dim, hidden_dim))
        self.conv1 = GINConv(nn1)

        nn2 = Sequential(Linear(hidden_dim, hidden_dim), ReLU(), Linear(hidden_dim, hidden_dim))
        self.conv2 = GINConv(nn2)

        self.linear = Linear(hidden_dim, num_classes)

    def forward(self, x, edge_index, batch):
        x = self.conv1(x, edge_index)
        x = self.conv2(x, edge_index)
        x = global_add_pool(x, batch)
        return self.linear(x)

In [None]:
# ----------------------------
#  Step 5: Train & Evaluate
# ----------------------------

In [30]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GIN(num_node_features=graph_list[0].x.size(1), hidden_dim=64, num_classes=len(set(labels))).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

In [31]:
# --- Training loop
for epoch in range(1, 200):
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.batch)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"[Epoch {epoch:03d}] Loss: {total_loss:.4f}")

[Epoch 001] Loss: 34.6001
[Epoch 002] Loss: 31.7933
[Epoch 003] Loss: 31.9796
[Epoch 004] Loss: 31.6843
[Epoch 005] Loss: 31.8002
[Epoch 006] Loss: 31.8384
[Epoch 007] Loss: 32.0771
[Epoch 008] Loss: 32.1821
[Epoch 009] Loss: 32.0976
[Epoch 010] Loss: 31.8674
[Epoch 011] Loss: 31.4624
[Epoch 012] Loss: 31.5245
[Epoch 013] Loss: 31.6921
[Epoch 014] Loss: 31.5753
[Epoch 015] Loss: 31.2448
[Epoch 016] Loss: 31.5451
[Epoch 017] Loss: 31.8943
[Epoch 018] Loss: 31.9082
[Epoch 019] Loss: 31.7340
[Epoch 020] Loss: 31.8192
[Epoch 021] Loss: 31.3835
[Epoch 022] Loss: 31.8150
[Epoch 023] Loss: 31.5621
[Epoch 024] Loss: 31.9390
[Epoch 025] Loss: 31.5413
[Epoch 026] Loss: 31.6681
[Epoch 027] Loss: 31.4917
[Epoch 028] Loss: 31.7837
[Epoch 029] Loss: 31.8234
[Epoch 030] Loss: 31.5585
[Epoch 031] Loss: 31.7601
[Epoch 032] Loss: 31.8431
[Epoch 033] Loss: 31.6665
[Epoch 034] Loss: 31.6741
[Epoch 035] Loss: 31.6706
[Epoch 036] Loss: 31.8154
[Epoch 037] Loss: 31.6420
[Epoch 038] Loss: 31.5322
[Epoch 039] 

In [32]:
# --- Evaluation loop
model.eval()
correct = 0
total = 0
for data in test_loader:
    data = data.to(device)
    out = model(data.x, data.edge_index, data.batch)
    pred = out.argmax(dim=1)
    correct += (pred == data.y).sum().item()
    total += data.num_graphs

accuracy = correct / total
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.8147
