### Generate Embeddings

In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid
import numpy as np

# Load dataset
dataset = Planetoid(root='../data', name='Cora')
data = dataset[0]

# Define GCN model
class GCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# Initialize and load model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GCN(input_dim=dataset.num_node_features, hidden_dim=16, output_dim=dataset.num_classes).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Training function
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

# Train model
for epoch in range(200):
    train()

# Generate embeddings
model.eval()
with torch.no_grad():
    embeddings = model(data).cpu().numpy()
    labels = data.y.cpu().numpy()
    train_mask = data.train_mask.cpu().numpy()
    test_mask = data.test_mask.cpu().numpy()

# Save embeddings and labels
np.savez('embeddings.npz', embeddings=embeddings, labels=labels, train_mask=train_mask, test_mask=test_mask)
print("Embeddings saved successfully.")


Embeddings saved successfully.


In [2]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load embeddings
data = np.load('embeddings.npz')
embeddings = data['embeddings']
labels = data['labels']
train_mask = data['train_mask']
test_mask = data['test_mask']

# Train Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(embeddings[train_mask], labels[train_mask])

# Test and evaluate
rf_preds = rf_model.predict(embeddings[test_mask])
acc = accuracy_score(labels[test_mask], rf_preds)
prec = precision_score(labels[test_mask], rf_preds, average='weighted')
rec = recall_score(labels[test_mask], rf_preds, average='weighted')
f1 = f1_score(labels[test_mask], rf_preds, average='weighted')

print(f'Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}, F1-Score: {f1:.4f}')

Accuracy: 0.7450, Precision: 0.7882, Recall: 0.7450, F1-Score: 0.7495
