In [3]:
import pandas as pd
import torch
import numpy as np
import networkx as nx
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler
#from torch_geometric.nn import GCNConv
from torch_geometric.nn import SAGEConv
from sklearn.model_selection import train_test_split
from sklearn.neighbors import kneighbors_graph
import torch.nn.functional as F
from torch.nn import Linear

# Load dataset
df = pd.read_csv("diabetes.csv")  # Ensure you have the dataset

# Extract features and target
X = df.drop(columns=["Outcome"]).values  # Features
y = df["Outcome"].values  # Labels

# Normalize features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Convert to tensors
X = torch.tensor(X, dtype=torch.float)
y = torch.tensor(y, dtype=torch.long)

# Create a similarity graph using k-nearest neighbors
A = kneighbors_graph(X, n_neighbors=10, mode='connectivity')  # Create adjacency matrix
G = nx.from_scipy_sparse_array(A)  # Convert to NetworkX graph

# Extract edge index
edge_index = torch.tensor(list(G.edges), dtype=torch.long).t().contiguous()

# Create PyTorch Geometric data object
data = Data(x=X, edge_index=edge_index, y=y)

class GNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNN, self).__init__()
        self.conv1 = SAGEConv(input_dim, hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, hidden_dim)
        self.conv3 = SAGEConv(hidden_dim, output_dim)
        self.fc = Linear(output_dim, 2)  # Binary classification

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.3, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = self.conv3(x, edge_index)
        x = F.relu(x)
        x = self.fc(x)
        return F.log_softmax(x, dim=1)


# Initialize model with increased hidden_dim and extra layers
model = GNN(input_dim=X.shape[1], hidden_dim=32, output_dim=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    return loss.item()

# Training loop
for epoch in range(300):  # Increased to 300 epochs
    loss = train()
    if epoch % 20 == 0:
        print(f'Epoch {epoch}, Loss: {loss:.4f}')

def test():
    model.eval()
    with torch.no_grad():
        out = model(data)
        pred = out.argmax(dim=1)
        acc = (pred == data.y).sum().item() / len(data.y)
        print(f'Accuracy: {acc:.4f}')

test()


Epoch 0, Loss: 0.6651
Epoch 20, Loss: 0.4671
Epoch 40, Loss: 0.4379
Epoch 60, Loss: 0.4132
Epoch 80, Loss: 0.3981
Epoch 100, Loss: 0.3651
Epoch 120, Loss: 0.3411
Epoch 140, Loss: 0.3184
Epoch 160, Loss: 0.3056
Epoch 180, Loss: 0.2791
Epoch 200, Loss: 0.2574
Epoch 220, Loss: 0.2348
Epoch 240, Loss: 0.2246
Epoch 260, Loss: 0.1966
Epoch 280, Loss: 0.1818
Accuracy: 0.9714


SyntaxError: invalid syntax (3184595879.py, line 1)