In [None]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install torch_geometric
!pip install -U scikit-learn

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
from sklearn.model_selection import train_test_split
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Define your Graph Neural Network model
class GCNClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCNClassifier, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# Assuming you have your edge lists and labels
# Load your data into PyTorch Geometric format
# Construct your graphs and labels
# Example:
# edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]], dtype=torch.long)
# x = torch.tensor([[0, 1], [1, 0], [0, 1]], dtype=torch.float)  # Node features

# Define your dataset
class GraphDataset(torch.utils.data.Dataset):
    def __init__(self, graphs, labels):
        self.graphs = graphs
        self.labels = labels

    def __len__(self):
        return len(self.graphs)

    def __getitem__(self, idx):
        return self.graphs[idx], self.labels[idx]

graphs = []

HOME = os.getcwd()
DATA_PATH = os.path.join(HOME, "drive", "MyDrive","GNN_data")
for current in os.listdir(DATA_PATH):
  file_path = os.path.join(DATA_PATH,current)
  with open(file_path, 'r') as f:
    content = f.read()

    tmp = content.split(',')
    tmp = list(map(int, tmp))
    graphs.append(tmp)

labels = [1,1,1] # Currently doing maunal labelling

max_len = max(map(len, graphs))

# Append zeros to each inner list
for inner_list in graphs:
    while len(inner_list) < max_len:
        inner_list.append(0)

print(graphs)

edge_index = torch.tensor(graphs, dtype=torch.long)
x = torch.tensor(labels, dtype=torch.float)  # Node features

# Split your data into train/validation/test sets
train_graphs, test_graphs, train_labels, test_labels = train_test_split(graphs, labels, test_size=0.2)
train_graphs, val_graphs, train_labels, val_labels = train_test_split(train_graphs, train_labels, test_size=0.2)

# Create DataLoader for each set
train_dataset = GraphDataset(train_graphs, train_labels)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_dataset = GraphDataset(val_graphs, val_labels)
val_loader = DataLoader(val_dataset, batch_size=64)
test_dataset = GraphDataset(test_graphs, test_labels)
test_loader = DataLoader(test_dataset, batch_size=64)


# Initialize your model, loss function, and optimizer
if len(x.shape) == 1:
    input_dim = 1  # Set input dimension to 1 for 1D input
else:
    input_dim = x.shape[1]
# input_dim = x.shape[1]  # Get the number of features from the input data X
hidden_dim = 64
output_dim = len(set(labels))  # Number of unique labels
model = GCNClassifier(input_dim, hidden_dim, output_dim)
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
def train(model, loader, optimizer, criterion):
    model.train()
    for data, labels in loader:
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()

# Evaluation loop
def evaluate(model, loader, criterion):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, labels in loader:
            out = model(data)
            _, predicted = torch.max(out, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

# Train and validate the model
num_epochs = 10
for epoch in range(num_epochs):
    train(model, train_loader, optimizer, criterion)
    val_accuracy = evaluate(model, val_loader, criterion)
    print(f"Epoch {epoch+1}, Validation Accuracy: {val_accuracy}")

# Test the final model
test_accuracy = evaluate(model, test_loader, criterion)
print(f"Test Accuracy: {test_accuracy}")


[[0, 1, 0, 2, 1, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 2, 1, 3, 3, 4, 0, 5, 0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11, 9, 4, 10, 4, 9, 3, 11, 5, 0, 0, 0, 0], [0, 1, 1, 2, 1, 3, 0, 4, 4, 5, 4, 6, 4, 7, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 14, 6, 15, 7]]


IndexError: tuple index out of range