In [1]:
!pip install torch_geometric
!pip install torch-scatter torch-sparse torch-cluster -f https://data.pyg.org/whl/torch-2.0.0+cpu.html


Looking in links: https://data.pyg.org/whl/torch-2.0.0+cpu.html


In [2]:
import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GATConv, MessagePassing
from torch_geometric.data import Data, DataLoader
from torch_geometric.utils import from_networkx
from nltk.corpus import wordnet as wn
import requests
import random
# Define MPNN Class
class MPNNLayer(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(MPNNLayer, self).__init__(aggr='mean')  # Mean aggregation
        self.lin = nn.Linear(in_channels, out_channels)
        self.edge_update = nn.Linear(in_channels, out_channels)

    def forward(self, x, edge_index, edge_attr=None):
        # Start message passing
        return self.propagate(edge_index, x=x, edge_attr=edge_attr)

    def message(self, x_j, edge_attr):
        # Combine node features with edge features
        return self.lin(x_j) + (self.edge_update(edge_attr) if edge_attr is not None else 0)

    def update(self, aggr_out):
        return F.relu(aggr_out)

class MPNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MPNN, self).__init__()
        self.mpnn1 = MPNNLayer(input_dim, hidden_dim)
        self.mpnn2 = MPNNLayer(hidden_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, edge_index, edge_attr=None):
        x = self.mpnn1(x, edge_index, edge_attr)
        x = self.mpnn2(x, edge_index, edge_attr)
        return self.fc(x)

# Define GCN Class
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        return self.fc(x)

# Define GAT Class
class GAT(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GAT, self).__init__()
        self.conv1 = GATConv(input_dim, hidden_dim, heads=1)
        self.conv2 = GATConv(hidden_dim, hidden_dim, heads=1)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        return self.fc(x)





In [3]:
!pip install nltk
import nltk
nltk.download('wordnet')




[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [4]:
# ---------------- WordNet API Integration ----------------
def build_wordnet_graph_api(root_synset, depth=2):
    """
    Build a graph using WordNet API starting from a root synset.
    Args:
        root_synset (str): Starting synset (e.g., 'dog.n.01').
        depth (int): Depth of traversal in WordNet hierarchy.
    Returns:
        G (nx.DiGraph): NetworkX directed graph.
    """
    G = nx.DiGraph()
    visited = set()

    def add_edges(synset, current_depth):
        if current_depth > depth or synset in visited:
            return
        visited.add(synset)

        # Add hypernyms and hyponyms as edges
        for hypernym in synset.hypernyms():
            G.add_edge(synset.name(), hypernym.name(), relation='hypernym')
            add_edges(hypernym, current_depth + 1)

        for hyponym in synset.hyponyms():
            G.add_edge(synset.name(), hyponym.name(), relation='hyponym')
            add_edges(hyponym, current_depth + 1)

    root = wn.synset(root_synset)
    add_edges(root, 0)
    return G

def wordnet_to_torch_geometric(G):
    """
    Convert WordNet NetworkX graph to PyTorch Geometric Data format.
    """
    mapping = {node: idx for idx, node in enumerate(G.nodes)}
    G = nx.relabel_nodes(G, mapping)

    edge_index = torch.tensor(list(G.edges), dtype=torch.long).t().contiguous()
    num_nodes = G.number_of_nodes()
    x = torch.rand(num_nodes, 16)  # Random node features
    y = torch.randint(0, 2, (num_nodes,))  # Random node labels for classification
    return Data(x=x, edge_index=edge_index, y=y)

# ---------------- ConceptNet API Integration ----------------
def build_conceptnet_graph_api(concept, max_edges=1000):
    """
    Build a graph using ConceptNet API.
    Args:
        concept (str): Concept to query from ConceptNet API (e.g., 'dog').
        max_edges (int): Maximum number of edges to fetch.
    Returns:
        G (nx.DiGraph): NetworkX directed graph.
    """
    url = f"http://api.conceptnet.io/c/en/{concept}"
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data from ConceptNet API: {response.status_code}")

    data = response.json()
    G = nx.DiGraph()
    for edge in data['edges'][:max_edges]:
        source = edge['start']['label']
        target = edge['end']['label']
        relation = edge['rel']['label']
        G.add_edge(source, target, relation=relation)
    return G

def conceptnet_to_torch_geometric(G):
    """
    Convert ConceptNet NetworkX graph to PyTorch Geometric Data format.
    """
    mapping = {node: idx for idx, node in enumerate(G.nodes)}
    G = nx.relabel_nodes(G, mapping)

    edge_index = torch.tensor(list(G.edges), dtype=torch.long).t().contiguous()
    num_nodes = G.number_of_nodes()
    x = torch.rand(num_nodes, 16)  # Random node features
    y = torch.randint(0, 2, (num_nodes,))  # Random node labels for classification
    return Data(x=x, edge_index=edge_index, y=y)

# ---------------- Metrics Calculation ----------------
def calculate_mrr(predictions, labels):
    """
    Calculate Mean Reciprocal Rank (MRR).
    Args:
        predictions (torch.Tensor): Model predictions (logits or probabilities).
        labels (torch.Tensor): Ground truth labels.
    """
    ranks = []
    for i in range(len(labels)):
        sorted_indices = predictions[i].argsort(descending=True)
        rank = (sorted_indices == labels[i]).nonzero(as_tuple=True)[0].item() + 1
        ranks.append(1.0 / rank)
    return sum(ranks) / len(ranks)

def calculate_hits_k(predictions, labels, k=10):
    """
    Calculate Hits@K.
    Args:
        predictions (torch.Tensor): Model predictions (logits or probabilities).
        labels (torch.Tensor): Ground truth labels.
        k (int): Number of top predictions to consider.
    """
    hits = 0
    for i in range(len(labels)):
        top_k_indices = predictions[i].argsort(descending=True)[:k]
        if labels[i] in top_k_indices:
            hits += 1
    return hits / len(labels)

# ---------------- Training and Evaluation ----------------
def train(model, data, optimizer, criterion):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    return loss.item()

def evaluate(model, loader, criterion):
    model.eval()
    mrr, hits_10, loss = 0, 0, 0
    with torch.no_grad():
        for data in loader:
            out = model(data.x, data.edge_index)
            mrr += calculate_mrr(out, data.y)
            hits_10 += calculate_hits_k(out, data.y, k=10)
            loss += criterion(out, data.y).item()
    size = len(loader.dataset)
    return mrr / size, hits_10 / size, loss / size

# ---------------- Main Experiment ----------------
if __name__ == "__main__":
    # WordNet Graph
    root_synset = "dog.n.01"
    G_wordnet = build_wordnet_graph_api(root_synset, depth=2)
    data_wordnet = wordnet_to_torch_geometric(G_wordnet)

    # ConceptNet Graph
    concept = "dog"
    G_conceptnet = build_conceptnet_graph_api(concept, max_edges=1000)
    data_conceptnet = conceptnet_to_torch_geometric(G_conceptnet)

    # DataLoader
    datasets = {'WordNet': [data_wordnet], 'ConceptNet': [data_conceptnet]}
    loaders = {name: DataLoader(data, batch_size=1, shuffle=False) for name, data in datasets.items()}

    # Initialize models
    input_dim = 16
    hidden_dim = 32
    output_dim = 2
    models = {'GCN': GCN(input_dim, hidden_dim, output_dim),
              'GAT': GAT(input_dim, hidden_dim, output_dim),
              'MPNN': MPNN(input_dim, hidden_dim, output_dim)}

    # Training setup
    criterion = nn.CrossEntropyLoss()
    for dataset_name, loader in loaders.items():
        print(f"Dataset: {dataset_name}")
        for model_name, model in models.items():
            optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

            # Train model
            for epoch in range(10):
                for batch in loader:
                    loss = train(model, batch, optimizer, criterion)
                print(f"{model_name} Epoch {epoch+1}, Loss: {loss:.4f}")

            # Evaluate model
            mrr, hits_10, avg_loss = evaluate(model, loader, criterion)
            print(f"{model_name} Results on {dataset_name}:")
            print(f"  MRR: {mrr:.4f}, Hits@10: {hits_10:.4f}, Avg Loss: {avg_loss:.4f}")




Dataset: WordNet
GCN Epoch 1, Loss: 0.6995
GCN Epoch 2, Loss: 0.6911
GCN Epoch 3, Loss: 0.6927
GCN Epoch 4, Loss: 0.6923
GCN Epoch 5, Loss: 0.6907
GCN Epoch 6, Loss: 0.6899
GCN Epoch 7, Loss: 0.6895
GCN Epoch 8, Loss: 0.6888
GCN Epoch 9, Loss: 0.6881
GCN Epoch 10, Loss: 0.6871
GCN Results on WordNet:
  MRR: 0.7837, Hits@10: 1.0000, Avg Loss: 0.6858
GAT Epoch 1, Loss: 0.6948
GAT Epoch 2, Loss: 0.6918
GAT Epoch 3, Loss: 0.6889
GAT Epoch 4, Loss: 0.6872
GAT Epoch 5, Loss: 0.6849
GAT Epoch 6, Loss: 0.6839
GAT Epoch 7, Loss: 0.6817
GAT Epoch 8, Loss: 0.6801
GAT Epoch 9, Loss: 0.6774
GAT Epoch 10, Loss: 0.6749
GAT Results on WordNet:
  MRR: 0.7877, Hits@10: 1.0000, Avg Loss: 0.6720
MPNN Epoch 1, Loss: 0.6969
MPNN Epoch 2, Loss: 0.6920
MPNN Epoch 3, Loss: 0.6912
MPNN Epoch 4, Loss: 0.6906
MPNN Epoch 5, Loss: 0.6893
MPNN Epoch 6, Loss: 0.6880
MPNN Epoch 7, Loss: 0.6866
MPNN Epoch 8, Loss: 0.6855
MPNN Epoch 9, Loss: 0.6840
MPNN Epoch 10, Loss: 0.6825
MPNN Results on WordNet:
  MRR: 0.7937, Hits