<a href="https://colab.research.google.com/github/KushagraD006/SFEGNN-vs-GNC/blob/main/GCN_vs_SFEGNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision torchaudio
!pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.0.0+cpu.html
!pip install torch-geometric


Looking in links: https://data.pyg.org/whl/torch-2.0.0+cpu.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_scatter-2.1.2%2Bpt20cpu-cp311-cp311-linux_x86_64.whl (494 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m494.0/494.0 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcpu/torch_sparse-0.6.18%2Bpt20cpu-cp311-cp311-linux_x86_64.whl (1.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-scatter, torch-sparse
Successfully installed torch-scatter-2.1.2+pt20cpu torch-sparse-0.6.18+pt20cpu
Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py

In [None]:
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler
from torch_geometric.data import Data

# Load your dataset
df = pd.read_csv("/content/transaction_dataset.csv")  # Upload your file to Colab

# Clean and select features
drop_cols = ['Unnamed: 0', 'Index', 'Address', 'FLAG',
             'ERC20 most sent token type', 'ERC20_most_rec_token_type']
features = df.drop(columns=[col for col in drop_cols if col in df.columns])

# Keep only numeric columns
features = features.select_dtypes(include=['int64', 'float64'])

labels = df['FLAG']

# Normalize features
scaler = StandardScaler()
X = torch.tensor(scaler.fit_transform(features), dtype=torch.float)
y = torch.tensor(labels.values, dtype=torch.long)


# Create simple synthetic edge connections (for demo, until real tx edges used)
edge_index = torch.tensor([[i, i + 1] for i in range(len(df) - 1)], dtype=torch.long).t().contiguous()
edge_index = torch.cat([edge_index, edge_index[[1, 0]]], dim=1)

# Create graph data object
data = Data(x=X, edge_index=edge_index, y=y)


In [None]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GATConv
from torch_geometric.loader import DataLoader

# Train & test helper
def train(model, data, optimizer, criterion):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    return loss.item()

def test(model, data):
    model.eval()
    out = model(data)
    pred = out.argmax(dim=1)
    acc = (pred == data.y).sum().item() / data.num_nodes
    return acc


In [None]:
import torch.nn as nn
class GCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return x


In [None]:
class GAT(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=2):
        super(GAT, self).__init__()
        self.conv1 = GATConv(in_channels, hidden_channels, heads=heads)
        self.conv2 = GATConv(hidden_channels * heads, out_channels, heads=1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return x


In [None]:
# Example: Train GCN
model = GCN(in_channels=data.num_features, hidden_channels=32, out_channels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

for epoch in range(1, 201):
    loss = train(model, data, optimizer, criterion)
    if epoch % 20 == 0:
        acc = test(model, data)
        print(f'Epoch {epoch}, Loss: {loss:.4f}, Accuracy: {acc:.4f}')


Epoch 20, Loss: nan, Accuracy: 0.7786
Epoch 40, Loss: nan, Accuracy: 0.7786
Epoch 60, Loss: nan, Accuracy: 0.7786
Epoch 80, Loss: nan, Accuracy: 0.7786
Epoch 100, Loss: nan, Accuracy: 0.7786
Epoch 120, Loss: nan, Accuracy: 0.7786


In [None]:
from torch_geometric.nn import SGConv

class SGC(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(SGC, self).__init__()
        self.conv = SGConv(in_channels, out_channels, K=2, cached=True)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv(x, edge_index)
        return x


In [None]:
from torch_geometric.nn import GCN2Conv

class GCNII(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=4, alpha=0.1, theta=0.5):
        super(GCNII, self).__init__()
        self.lins = torch.nn.ModuleList()
        self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
        self.convs = torch.nn.ModuleList
        for _ in range(num_layers):
            self.convs.append(GCN2Conv(hidden_channels, alpha=alpha, theta=theta, layer=_ + 1))

        self.lins.append(torch.nn.Linear(hidden_channels, out_channels))
        self.relu = torch.nn.ReLU()

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x_0 = self.lins[0](x)
        x = x_0
        for conv in self.convs:
            x = self.relu(conv(x, x_0, edge_index))
        x = self.lins[-1](x)
        return x


In [None]:
class H2GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(H2GCN, self).__init__()
        self.lin1 = torch.nn.Linear(in_channels, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels * 2, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        # 1-hop
        x_1 = torch_geometric.nn.conv.GCNConv(x.size(1), x.size(1))(x, edge_index)
        # 2-hop (same operation again)
        x_2 = torch_geometric.nn.conv.GCNConv(x.size(1), x.size(1))(x_1, edge_index)

        x = self.lin1(x)
        x_cat = torch.cat([x_1, x_2], dim=1)
        x_out = self.lin2(x_cat)
        return x_out


In [None]:
class GIoGNN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GIoGNN, self).__init__()
        self.conv = GCNConv(in_channels, hidden_channels)
        self.global_pool = torch.nn.Linear(in_channels, hidden_channels)
        self.lin = torch.nn.Linear(hidden_channels * 2, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        local = self.conv(x, edge_index).relu()
        global_feat = self.global_pool(torch.mean(x, dim=0, keepdim=True)).repeat(x.size(0), 1)
        x_combined = torch.cat([local, global_feat], dim=1)
        return self.lin(x_combined)


In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Make sure data.x is a NumPy array with no missing values
features_np = data.x.cpu().numpy()

# Optional sanity check
features_np = np.nan_to_num(features_np)  # Replace NaN with 0

# Compute cosine similarity matrix
similarity_matrix = cosine_similarity(features_np)  # Shape: (num_nodes, num_nodes)


# Threshold to build edges (e.g., similarity > 0.7)
threshold = 0.7
edges = []
weights = []

for i in range(len(similarity_matrix)):
    for j in range(i + 1, len(similarity_matrix)):
        sim = similarity_matrix[i][j]
        if sim > threshold:
            edges.append([i, j])
            edges.append([j, i])
            weights.append(sim)
            weights.append(sim)

# Convert to tensors
edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
edge_weight = torch.tensor(weights, dtype=torch.float)


In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class SFEGNN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(SFEGNN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index, edge_weight):
        x = self.conv1(x, edge_index, edge_weight).relu()
        x = self.conv2(x, edge_index, edge_weight)
        return x




In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class SFEGNN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(SFEGNN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index, edge_weight):
        x = self.conv1(x, edge_index, edge_weight).relu()
        x = self.conv2(x, edge_index, edge_weight)
        return x


In [None]:
model = SFEGNN(data.num_features, 32, 2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

for epoch in range(1, 201):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, edge_index, edge_weight)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()

    if epoch % 20 == 0:
        model.eval()
        pred = out.argmax(dim=1)
        acc = (pred == data.y).sum().item() / data.num_nodes
        print(f'Epoch {epoch} - Loss: {loss.item():.4f}, Accuracy: {acc:.4f}')


Epoch 20 - Loss: nan, Accuracy: 0.7786
Epoch 40 - Loss: nan, Accuracy: 0.7786
Epoch 60 - Loss: nan, Accuracy: 0.7786
Epoch 80 - Loss: nan, Accuracy: 0.7786
Epoch 100 - Loss: nan, Accuracy: 0.7786
Epoch 120 - Loss: nan, Accuracy: 0.7786
Epoch 140 - Loss: nan, Accuracy: 0.7786
Epoch 160 - Loss: nan, Accuracy: 0.7786
Epoch 180 - Loss: nan, Accuracy: 0.7786
Epoch 200 - Loss: nan, Accuracy: 0.7786
