<a href="https://colab.research.google.com/github/Sansa03/GCNProject/blob/main/forTesting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install rdflib torch torch-geometric networkx


Collecting rdflib
  Downloading rdflib-7.1.4-py3-none-any.whl.metadata (11 kB)
Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4

In [None]:
import rdflib
import networkx as nx
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from sklearn.model_selection import train_test_split # Import train_test_split



# Load RDF graph from TTL file
def load_rdf_graph(ttl_file):
    g = rdflib.Graph()
    g.parse(ttl_file, format="turtle")

    nx_graph = nx.DiGraph()
    for subj, pred, obj in g:
        nx_graph.add_edge(str(subj), str(obj))

    return nx_graph

# Convert NetworkX graph to PyTorch Geometric format
def convert_to_pyg_graph(nx_graph):
    node_mapping = {node: i for i, node in enumerate(nx_graph.nodes())}

    edge_index = torch.tensor(
        [[node_mapping[u], node_mapping[v]] for u, v in nx_graph.edges()],
        dtype=torch.long
    ).t().contiguous()

    num_nodes = len(node_mapping)
    x = torch.eye(num_nodes)  # node features (identity matrix)

    return Data(x=x, edge_index=edge_index)

In [None]:
# Load TTL and convert
nx_graph = load_rdf_graph("/content/drive/MyDrive/agriculture_kg_fully_fixed.ttl")
pyg_graph = convert_to_pyg_graph(nx_graph)


class GCN(torch.nn.Module):
    def __init__(self, in_features, hidden_dim, out_features):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_features, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, out_features)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index)
        return x

# Define model
model = GCN(in_features=pyg_graph.x.shape[1], hidden_dim=16, out_features=2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()


In [None]:
for epoch in range(200):
    optimizer.zero_grad()
    out = model(pyg_graph.x, pyg_graph.edge_index)
    loss = criterion(out, torch.randn_like(out))
    loss.backward()
    optimizer.step()
    if epoch % 50 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item()}")


Epoch 0, Loss: 0.9684293270111084
Epoch 50, Loss: 0.9900079369544983
Epoch 100, Loss: 1.0262489318847656
Epoch 150, Loss: 1.0093971490859985


In [None]:
# Define GCN Model
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)


data = pyg_graph

num_nodes = data.num_nodes
true_labels = torch.randint(0, 2, (num_nodes,))
model = GCN(in_channels=data.x.shape[1], hidden_channels=16, out_channels=2)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)



In [None]:
# Split data into training and testing
num_nodes = data.num_nodes
indices = torch.arange(num_nodes)
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=42)

train_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask = torch.zeros(num_nodes, dtype=torch.bool)
train_mask[train_indices] = True
test_mask[test_indices] = True
# Training Function
def train():

    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out, true_labels)  # Cross-entropy loss
    loss.backward()
    optimizer.step()
    return loss.item()

# Accuracy Calculation Function
def Test():
    model.eval()
    with torch.no_grad():
        out = model(data)
        pred = out.argmax(dim=1)
        correct = (pred == true_labels).sum().item()
        accuracy = correct / num_nodes  # Calculate accuracy
    return accuracy

# Training Loop
for epoch in range(100):
    loss = train()
    if epoch % 10 == 0:  # Print accuracy every 10 epochs
        acc = Test()
        print(f"Epoch {epoch+1}, Loss: {loss:.4f}, Accuracy: {acc:.4f}")

Epoch 1, Loss: 0.6926, Accuracy: 0.6272
Epoch 11, Loss: 0.5748, Accuracy: 0.7416
Epoch 21, Loss: 0.4520, Accuracy: 0.8422
Epoch 31, Loss: 0.3555, Accuracy: 0.9053
Epoch 41, Loss: 0.2837, Accuracy: 0.9310
Epoch 51, Loss: 0.2369, Accuracy: 0.9566
Epoch 61, Loss: 0.2081, Accuracy: 0.9684
Epoch 71, Loss: 0.1886, Accuracy: 0.9763
Epoch 81, Loss: 0.1738, Accuracy: 0.9763
Epoch 91, Loss: 0.1620, Accuracy: 0.9763


In [None]:
with torch.no_grad():
    gcn_embeddings = model(data)  # Get node embeddings after training
print("Trained Node Embeddings:", gcn_embeddings)

torch.save(model.state_dict(), "gcn_trained_model.pth")


Trained Node Embeddings: tensor([[-7.6718e-02, -2.6057e+00],
        [-1.7541e-03, -6.3467e+00],
        [-2.4991e+00, -8.5731e-02],
        ...,
        [-5.0697e+00, -6.3038e-03],
        [-4.7875e-04, -7.6446e+00],
        [-1.4306e-01, -2.0152e+00]])


In [None]:
# Testing Function
@torch.no_grad()
def test():
    out = model(data)
    pred = out.argmax(dim=1)
    correct = (pred[test_mask] == true_labels[test_mask]).sum().item()
    accuracy = correct / test_mask.sum().item()  # Compute accuracy
    return accuracy

# Compute test accuracy
training_accuracy = Test()
print(f"training Accuracy: {training_accuracy:.4f}")
testing_accuracy = test()
print(f"Testing Accuracy: {testing_accuracy * 100:.2f}%")


training Accuracy: 0.9822
Testing Accuracy: 99.02%
