Import and Data Preparation

In [1]:
import torch
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from improved_gcn import prepare_data, ImprovedGCN, train_model

# Load and prepare data
data = prepare_data()
print(f"Data: {data.num_nodes} nodes, {data.num_node_features} features")
print(f"Train: {data.train_mask.sum()}, Test: {data.test_mask.sum()}")

  from .autonotebook import tqdm as notebook_tqdm


Data: 6 nodes, 7 features
Train: 2, Test: 4


Model Definition

In [2]:

class SimpleGCN(torch.nn.Module):
    def __init__(self, num_features, num_classes):
        super().__init__()
        self.conv1 = GCNConv(num_features, 4)
        self.conv2 = GCNConv(4, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

Training and Comparison

In [3]:
# Original simple model
print("=== Original Simple GCN ===")
model_simple = SimpleGCN(data.num_node_features, 2)
optimizer = torch.optim.Adam(model_simple.parameters(), lr=0.01)

for epoch in range(100):
    model_simple.train()
    optimizer.zero_grad()
    out = model_simple(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

model_simple.eval()
pred_simple = model_simple(data).argmax(dim=1)
acc_simple = (pred_simple[data.test_mask] == data.y[data.test_mask]).float().mean()
print(f'Simple GCN Accuracy: {acc_simple:.4f}')

# Improved model
print("\n=== Improved GCN ===")
model_improved = ImprovedGCN(num_features=data.num_node_features, num_classes=2)
model_improved = train_model(model_improved, data)

model_improved.eval()
pred_improved = model_improved(data).argmax(dim=1)
acc_improved = (pred_improved[data.test_mask] == data.y[data.test_mask]).float().mean()
print(f'Improved GCN Accuracy: {acc_improved:.4f}')

print(f"\nImprovement: {acc_improved - acc_simple:.4f}")

=== Original Simple GCN ===
Simple GCN Accuracy: 1.0000

=== Improved GCN ===
Epoch 000, Loss: 0.6847, Test Acc: 0.5000
Epoch 050, Loss: 0.1133, Test Acc: 1.0000
Epoch 100, Loss: 0.0229, Test Acc: 1.0000
Epoch 150, Loss: 0.0116, Test Acc: 1.0000
Early stopping at epoch 153
Improved GCN Accuracy: 1.0000

Improvement: 0.0000


# NOTE: Accuracy 100% because data set very small  , we can test harder dataset like

In [None]:
# realistic_test
import networkx as nx
import torch
from torch_geometric.data import Data
from improved_gcn import ImprovedGCN, train_model, enhance_features
import random

def create_realistic_data():
    """Create a more realistic social network with some overlap"""
    G = nx.Graph()

    # Create communities with some connections between them
    edges = [
        # Normal user community (0-5)
        (0,1), (1,2), (2,0), (3,4), (4,5), (5,3), (1,3),

        # Bot community (6-11)
        (6,7), (7,8), (8,6), (9,10), (10,11), (11,9), (7,9),

        # Some cross-connections (bots connecting to normal users)
        (6,2),  # bot 6 follows normal user 2
        (10,4), # bot 10 follows normal user 4
        (8,1),  # bot 8 follows normal user 1
    ]

    G.add_edges_from(edges)

    # Features: one-hot + random noise (more realistic)
    num_nodes = 12
    features = torch.eye(num_nodes, dtype=torch.float)
    # Add some noise to simulate real-world data
    noise = torch.randn(num_nodes, num_nodes) * 0.1
    features = features + noise

    # Labels: first 6 normal (0), last 6 bots (1)
    labels = torch.tensor([0,0,0,0,0,0, 1,1,1,1,1,1], dtype=torch.long)

    # Convert to PyG Data
    edge_index = torch.tensor(list(G.edges())).t().contiguous()
    edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1)
    data = Data(x=features, edge_index=edge_index, y=labels)

    # Enhanced features
    data.x = enhance_features(data)

    # Balanced train/test split
    train_mask = torch.zeros(num_nodes, dtype=torch.bool)
    test_mask = torch.zeros(num_nodes, dtype=torch.bool)

    # Train on 4 from each class, test on 2 from each class
    train_indices = [0,1,2,3, 6,7,8,9]    # 4 normal + 4 bots
    test_indices = [4,5, 10,11]           # 2 normal + 2 bots

    train_mask[train_indices] = True
    test_mask[test_indices] = True

    data.train_mask = train_mask
    data.test_mask = test_mask

    return data

def test_with_noise():
    """Test with noisy labels (some mislabeled nodes)"""
    data = create_realistic_data()

    # Simulate real-world noise: mislabel 2 nodes
    noisy_labels = data.y.clone()
    noisy_labels[2] = 1  # Normal user mislabeled as bot
    noisy_labels[7] = 0  # Bot mislabeled as normal
    data.y = noisy_labels

    print(f"Realistic data: {data.num_nodes} nodes")
    print(f"Train: {data.train_mask.sum()}, Test: {data.test_mask.sum()}")
    print(f"Graph density: {len(data.edge_index[0])/(data.num_nodes*(data.num_nodes-1)):.3f}")

    # Test Improved GCN
    model = ImprovedGCN(num_features=data.num_node_features, num_classes=2, hidden_dim=16)
    model = train_model(model, data, epochs=300)

    model.eval()
    with torch.no_grad():
        pred = model(data).argmax(dim=1)
        acc = (pred[data.test_mask] == data.y[data.test_mask]).float().mean()

    print(f'\nRealistic Test Accuracy: {acc:.4f}')
    print("This is more representative of real-world performance!")

    # Show predictions vs actual
    print("\nPredictions vs Actual:")
    for i in range(data.num_nodes):
        if data.test_mask[i]:
            status = "✓" if pred[i] == data.y[i] else "✗"
            print(f"Node {i}: Predicted {pred[i].item()}, Actual {data.y[i].item()} {status}")

if __name__ == "__main__":
    print("=== Testing with Realistic Data ===")
    test_with_noise()

=== Testing with Realistic Data ===
Realistic data: 12 nodes
Train: 8, Test: 4
Graph density: 0.258
Epoch 000, Loss: 0.7361, Test Acc: 0.0000
Epoch 050, Loss: 0.6244, Test Acc: 1.0000
Epoch 100, Loss: 0.5659, Test Acc: 0.7500
Epoch 150, Loss: 0.5300, Test Acc: 1.0000
Early stopping at epoch 175

Realistic Test Accuracy: 0.7500
This is more representative of real-world performance!

Predictions vs Actual:
Node 4: Predicted 0, Actual 0 ✓
Node 5: Predicted 0, Actual 0 ✓
Node 10: Predicted 0, Actual 1 ✗
Node 11: Predicted 1, Actual 1 ✓
