In [3]:
pip install torch torch_geometric torch_sparse torch_scatter torch_cluster torch_spline_conv


Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_sparse
  Downloading torch_sparse-0.6.18.tar.gz (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m210.0/210.0 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torch_scatter
  Downloading torch_scatter-2.1.2.tar.gz (108 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.0/108.0 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torch_cluster
  Downloading torch_cluster-1.6.3.tar.gz (54 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.5/54.5 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting to

In [8]:
import torch
from torch_geometric.datasets import Planetoid

# Load the Cora dataset
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0]  # Data object that contains the graph and labels

print(f"Dataset: {dataset}")
print(f"Number of Classes: {dataset.num_classes}")
print(f"Number of Features per Node: {dataset.num_node_features}")


Dataset: Cora()
Number of Classes: 7
Number of Features per Node: 1433


In [9]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)


In [10]:
from torch_geometric.nn import GATConv

class GAT(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GAT, self).__init__()
        self.gat1 = GATConv(in_channels, hidden_channels)
        self.gat2 = GATConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.gat1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.gat2(x, edge_index)
        return F.log_softmax(x, dim=1)


In [11]:
from torch_geometric.nn import SAGEConv

class GraphSAGE(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)


In [12]:
from torch.optim import Adam
from sklearn.metrics import accuracy_score

def train(model, data, epochs=200):
    optimizer = Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    best_acc = 0

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()

        # Evaluate model
        model.eval()
        pred = out.argmax(dim=1)
        correct = pred[data.test_mask] == data.y[data.test_mask]
        acc = correct.sum().item() / data.test_mask.sum().item()

        if acc > best_acc:
            best_acc = acc

        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {loss.item()}, Accuracy: {acc}")

    return best_acc  # Return the best accuracy achieved


In [14]:
# Dataset properties
in_channels = dataset.num_node_features
hidden_channels = 16
out_channels = dataset.num_classes

# Train each model and save accuracy
results = {}


In [15]:

# Train GCN
gcn_model = GCN(in_channels, hidden_channels, out_channels)
gcn_acc = train(gcn_model, data)
results["GCN"] = gcn_acc

Epoch 0, Loss: 1.9498487710952759, Accuracy: 0.1
Epoch 10, Loss: 0.9080252647399902, Accuracy: 0.517
Epoch 20, Loss: 0.3137349784374237, Accuracy: 0.692
Epoch 30, Loss: 0.15855495631694794, Accuracy: 0.721
Epoch 40, Loss: 0.0808139219880104, Accuracy: 0.746
Epoch 50, Loss: 0.07542916387319565, Accuracy: 0.751
Epoch 60, Loss: 0.08600606769323349, Accuracy: 0.737
Epoch 70, Loss: 0.0753401517868042, Accuracy: 0.728
Epoch 80, Loss: 0.04738631844520569, Accuracy: 0.748
Epoch 90, Loss: 0.04363935440778732, Accuracy: 0.737
Epoch 100, Loss: 0.04079645872116089, Accuracy: 0.734
Epoch 110, Loss: 0.03587867319583893, Accuracy: 0.777
Epoch 120, Loss: 0.043690189719200134, Accuracy: 0.744
Epoch 130, Loss: 0.03996475785970688, Accuracy: 0.749
Epoch 140, Loss: 0.03506223484873772, Accuracy: 0.758
Epoch 150, Loss: 0.026677921414375305, Accuracy: 0.754
Epoch 160, Loss: 0.03857060894370079, Accuracy: 0.769
Epoch 170, Loss: 0.033315323293209076, Accuracy: 0.77
Epoch 180, Loss: 0.0622716099023819, Accurac

In [16]:
# Train GAT
gat_model = GAT(in_channels, hidden_channels, out_channels)
gat_acc = train(gat_model, data)
results["GAT"] = gat_acc

Epoch 0, Loss: 1.9430389404296875, Accuracy: 0.112
Epoch 10, Loss: 0.5885289907455444, Accuracy: 0.656
Epoch 20, Loss: 0.19330593943595886, Accuracy: 0.648
Epoch 30, Loss: 0.06863647699356079, Accuracy: 0.672
Epoch 40, Loss: 0.06388278305530548, Accuracy: 0.666
Epoch 50, Loss: 0.03512073680758476, Accuracy: 0.686
Epoch 60, Loss: 0.06959237158298492, Accuracy: 0.677
Epoch 70, Loss: 0.044131457805633545, Accuracy: 0.667
Epoch 80, Loss: 0.04455618932843208, Accuracy: 0.668
Epoch 90, Loss: 0.057447127997875214, Accuracy: 0.677
Epoch 100, Loss: 0.025777781382203102, Accuracy: 0.692
Epoch 110, Loss: 0.02570956014096737, Accuracy: 0.695
Epoch 120, Loss: 0.018027668818831444, Accuracy: 0.694
Epoch 130, Loss: 0.024086039513349533, Accuracy: 0.692
Epoch 140, Loss: 0.06009970232844353, Accuracy: 0.699
Epoch 150, Loss: 0.015641387552022934, Accuracy: 0.683
Epoch 160, Loss: 0.036236722022295, Accuracy: 0.679
Epoch 170, Loss: 0.03493603318929672, Accuracy: 0.657
Epoch 180, Loss: 0.015252487733960152

In [17]:
# Train GraphSAGE
sage_model = GraphSAGE(in_channels, hidden_channels, out_channels)
sage_acc = train(sage_model, data)
results["GraphSAGE"] = sage_acc

Epoch 0, Loss: 1.9640132188796997, Accuracy: 0.271
Epoch 10, Loss: 0.39888569712638855, Accuracy: 0.556
Epoch 20, Loss: 0.09296319633722305, Accuracy: 0.655
Epoch 30, Loss: 0.021930448710918427, Accuracy: 0.641
Epoch 40, Loss: 0.012668009847402573, Accuracy: 0.664
Epoch 50, Loss: 0.02172553911805153, Accuracy: 0.687
Epoch 60, Loss: 0.009457425214350224, Accuracy: 0.677
Epoch 70, Loss: 0.013935382477939129, Accuracy: 0.65
Epoch 80, Loss: 0.018374335020780563, Accuracy: 0.68
Epoch 90, Loss: 0.016975831240415573, Accuracy: 0.695
Epoch 100, Loss: 0.01684812642633915, Accuracy: 0.699
Epoch 110, Loss: 0.011345162987709045, Accuracy: 0.703
Epoch 120, Loss: 0.025140266865491867, Accuracy: 0.709
Epoch 130, Loss: 0.012245284393429756, Accuracy: 0.713
Epoch 140, Loss: 0.022301368415355682, Accuracy: 0.746
Epoch 150, Loss: 0.012789209373295307, Accuracy: 0.732
Epoch 160, Loss: 0.018247660249471664, Accuracy: 0.725
Epoch 170, Loss: 0.006146749947220087, Accuracy: 0.743
Epoch 180, Loss: 0.0282290149

In [18]:
# Display stored results
print("Final Model Performance:", results)

Final Model Performance: {'GCN': 0.787, 'GAT': 0.725, 'GraphSAGE': 0.754}


In [19]:
import pandas as pd

# Convert results dictionary into a DataFrame
df = pd.DataFrame(list(results.items()), columns=['Model', 'Test Accuracy'])

# Display the table
print(df)


       Model  Test Accuracy
0        GCN          0.787
1        GAT          0.725
2  GraphSAGE          0.754


In [20]:
import plotly.express as px

# Create a bar chart
fig = px.bar(df, x='Model', y='Test Accuracy', title="Comparison of GNN Models on Cora Dataset",
             labels={'Test Accuracy': 'Accuracy'}, text=df['Test Accuracy'].round(4))

# Show the chart
fig.show()


In [21]:
import torch
from torch_geometric.datasets import Planetoid

# Load the Pubmed dataset
dataset = Planetoid(root='/tmp/Pubmed', name='Pubmed')
data = dataset[0]  # Graph data


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.pubmed.test.index
Processing...
Done!


In [22]:
# Dataset properties
in_channels = dataset.num_node_features
hidden_channels = 16
out_channels = dataset.num_classes

# Train each model and save accuracy
results_pubmed = {}

# Train GCN
gcn_model = GCN(in_channels, hidden_channels, out_channels)
gcn_acc = train(gcn_model, data)
results_pubmed["GCN"] = gcn_acc

# Train GAT
gat_model = GAT(in_channels, hidden_channels, out_channels)
gat_acc = train(gat_model, data)
results_pubmed["GAT"] = gat_acc

# Train GraphSAGE
sage_model = GraphSAGE(in_channels, hidden_channels, out_channels)
sage_acc = train(sage_model, data)
results_pubmed["GraphSAGE"] = sage_acc

# Display stored results
print("Final Model Performance on Pubmed:", results_pubmed)


Epoch 0, Loss: 1.098767638206482, Accuracy: 0.396
Epoch 10, Loss: 0.9510867595672607, Accuracy: 0.669
Epoch 20, Loss: 0.7078843712806702, Accuracy: 0.686
Epoch 30, Loss: 0.5176490545272827, Accuracy: 0.714
Epoch 40, Loss: 0.36463505029678345, Accuracy: 0.732
Epoch 50, Loss: 0.25304609537124634, Accuracy: 0.742
Epoch 60, Loss: 0.22433553636074066, Accuracy: 0.745
Epoch 70, Loss: 0.18621423840522766, Accuracy: 0.741
Epoch 80, Loss: 0.13399305939674377, Accuracy: 0.743
Epoch 90, Loss: 0.14476284384727478, Accuracy: 0.749
Epoch 100, Loss: 0.1340041160583496, Accuracy: 0.735
Epoch 110, Loss: 0.12096945196390152, Accuracy: 0.75
Epoch 120, Loss: 0.13850736618041992, Accuracy: 0.751
Epoch 130, Loss: 0.11761327087879181, Accuracy: 0.761
Epoch 140, Loss: 0.0789644792675972, Accuracy: 0.751
Epoch 150, Loss: 0.08250304311513901, Accuracy: 0.756
Epoch 160, Loss: 0.09119772911071777, Accuracy: 0.752
Epoch 170, Loss: 0.0871506854891777, Accuracy: 0.746
Epoch 180, Loss: 0.07095935195684433, Accuracy: 

In [23]:
import pandas as pd

# Convert results dictionary into a DataFrame
df_pubmed = pd.DataFrame(list(results_pubmed.items()), columns=['Model', 'Test Accuracy'])

# Display the table
print(df_pubmed)


       Model  Test Accuracy
0        GCN          0.773
1        GAT          0.749
2  GraphSAGE          0.763


In [24]:
import plotly.express as px

# Create a bar chart
fig = px.bar(df_pubmed, x='Model', y='Test Accuracy', title="Comparison of GNN Models on Pubmed Dataset",
             labels={'Test Accuracy': 'Accuracy'}, text=df_pubmed['Test Accuracy'].round(4))

# Show the chart
fig.show()


In [25]:
import torch
from torch_geometric.datasets import Planetoid

# Load the Citeseer dataset
dataset = Planetoid(root='/tmp/Citeseer', name='Citeseer')
data = dataset[0]  # Graph data


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


In [26]:
# Dataset properties
in_channels = dataset.num_node_features
hidden_channels = 16
out_channels = dataset.num_classes

# Train each model and save accuracy
results_citeseer = {}

# Train GCN
gcn_model = GCN(in_channels, hidden_channels, out_channels)
gcn_acc = train(gcn_model, data)
results_citeseer["GCN"] = gcn_acc

# Train GAT
gat_model = GAT(in_channels, hidden_channels, out_channels)
gat_acc = train(gat_model, data)
results_citeseer["GAT"] = gat_acc

# Train GraphSAGE
sage_model = GraphSAGE(in_channels, hidden_channels, out_channels)
sage_acc = train(sage_model, data)
results_citeseer["GraphSAGE"] = sage_acc

# Display stored results
print("Final Model Performance on Citeseer:", results_citeseer)


Epoch 0, Loss: 1.807287335395813, Accuracy: 0.176
Epoch 10, Loss: 0.37362268567085266, Accuracy: 0.587
Epoch 20, Loss: 0.12839671969413757, Accuracy: 0.596
Epoch 30, Loss: 0.0374593511223793, Accuracy: 0.595
Epoch 40, Loss: 0.03845708444714546, Accuracy: 0.582
Epoch 50, Loss: 0.04523908719420433, Accuracy: 0.608
Epoch 60, Loss: 0.03858126699924469, Accuracy: 0.598
Epoch 70, Loss: 0.02277093566954136, Accuracy: 0.585
Epoch 80, Loss: 0.04233996197581291, Accuracy: 0.611
Epoch 90, Loss: 0.046666182577610016, Accuracy: 0.624
Epoch 100, Loss: 0.03412645310163498, Accuracy: 0.622
Epoch 110, Loss: 0.0259031243622303, Accuracy: 0.633
Epoch 120, Loss: 0.040846388787031174, Accuracy: 0.626
Epoch 130, Loss: 0.02573266066610813, Accuracy: 0.621
Epoch 140, Loss: 0.04653751850128174, Accuracy: 0.625
Epoch 150, Loss: 0.041970979422330856, Accuracy: 0.616
Epoch 160, Loss: 0.035401903092861176, Accuracy: 0.605
Epoch 170, Loss: 0.027451304718852043, Accuracy: 0.641
Epoch 180, Loss: 0.026573218405246735,

In [27]:
import pandas as pd

# Convert results dictionary into a DataFrame
df_citeseer = pd.DataFrame(list(results_citeseer.items()), columns=['Model', 'Test Accuracy'])

# Display the table
print(df_citeseer)


       Model  Test Accuracy
0        GCN          0.649
1        GAT          0.616
2  GraphSAGE          0.598


In [28]:
import plotly.express as px

# Create a bar chart
fig = px.bar(df_citeseer, x='Model', y='Test Accuracy', title="Comparison of GNN Models on Citeseer Dataset",
             labels={'Test Accuracy': 'Accuracy'}, text=df_citeseer['Test Accuracy'].round(4))

# Show the chart
fig.show()


In [29]:
# Combined results from all datasets
combined_results = {
    "Dataset": ["Cora", "Cora", "Cora", "Pubmed", "Pubmed", "Pubmed", "Citeseer", "Citeseer", "Citeseer"],
    "Model": ["GCN", "GAT", "GraphSAGE", "GCN", "GAT", "GraphSAGE", "GCN", "GAT", "GraphSAGE"],
    "Test Accuracy": [results["GCN"], results["GAT"], results["GraphSAGE"],
                      results_pubmed["GCN"], results_pubmed["GAT"], results_pubmed["GraphSAGE"],
                      results_citeseer["GCN"], results_citeseer["GAT"], results_citeseer["GraphSAGE"]]
}


In [30]:
import pandas as pd

# Convert dictionary to DataFrame
df_combined = pd.DataFrame(combined_results)

# Display the table
print(df_combined)


    Dataset      Model  Test Accuracy
0      Cora        GCN          0.787
1      Cora        GAT          0.725
2      Cora  GraphSAGE          0.754
3    Pubmed        GCN          0.773
4    Pubmed        GAT          0.749
5    Pubmed  GraphSAGE          0.763
6  Citeseer        GCN          0.649
7  Citeseer        GAT          0.616
8  Citeseer  GraphSAGE          0.598


In [31]:
import plotly.express as px

# Create grouped bar chart
fig = px.bar(df_combined, x="Dataset", y="Test Accuracy", color="Model",
             title="Performance Comparison of GNN Models Across Datasets",
             barmode="group", text=df_combined["Test Accuracy"].round(4))

# Show the chart
fig.show()
