# Baseline Methods

This notebook demonstrates baseline methods for friend recommendation:
1. Common Neighbors
2. Jaccard Coefficient
3. Adamic-Adar
4. Preferential Attachment
5. Node2Vec
6. Matrix Factorization


In [None]:
import sys
import os
sys.path.append(os.path.join(os.path.dirname(os.getcwd())))

import torch
import numpy as np
from src.data.heuristics import compute_heuristics, predict_links_heuristics
from src.baselines.node2vec import Node2VecBaseline
from src.baselines.matrix_factorization import MatrixFactorizationBaseline
from src.evaluation import compute_metrics, compute_ranking_metrics

# Load data
data = torch.load("data/processed/facebook_combined.pt")
link_data = torch.load("data/processed/facebook_link_data.pt")

print(f"Graph: {data.num_nodes} nodes, {data.edge_index.size(1) // 2} edges")


In [None]:
# Load or compute heuristics
try:
    heuristics = torch.load("data/processed/facebook_heuristics.pt")
    print("Loaded precomputed heuristics")
except:
    print("Computing heuristics...")
    heuristics = compute_heuristics(data)
    torch.save(heuristics, "data/processed/facebook_heuristics.pt")


In [None]:
# Evaluate heuristics on test set
test_edges = link_data['test_edges']
test_labels = link_data['test_labels'].numpy()

results = {}
for method_name, scores_dict in heuristics.items():
    # Get scores for test edges
    test_scores = []
    for i in range(test_edges.size(1)):
        src, dst = test_edges[0, i].item(), test_edges[1, i].item()
        edge = (src, dst)
        if edge in scores_dict:
            test_scores.append(scores_dict[edge])
        else:
            test_scores.append(0.0)
    
    test_scores = np.array(test_scores)
    # Normalize scores
    if test_scores.max() > 0:
        test_scores = test_scores / test_scores.max()
    
    # Compute metrics
    metrics = compute_metrics(test_scores, test_labels)
    ranking_metrics = compute_ranking_metrics(test_scores, test_labels, k_values=[5, 10, 20])
    
    results[method_name] = {**metrics, **ranking_metrics}
    print(f"{method_name}: AUC={metrics['auc']:.4f}, AP={metrics['ap']:.4f}, P@10={ranking_metrics['precision@10']:.4f}")


In [None]:
# Train Node2Vec baseline
print("Training Node2Vec...")
node2vec = Node2VecBaseline(dimensions=64, walk_length=30, num_walks=200)
node2vec.train(data.edge_index, data.num_nodes)

# Evaluate Node2Vec
node2vec_scores = node2vec.predict(test_edges)
node2vec_scores = (node2vec_scores - node2vec_scores.min()) / (node2vec_scores.max() - node2vec_scores.min() + 1e-8)

metrics_node2vec = compute_metrics(node2vec_scores, test_labels)
ranking_metrics_node2vec = compute_ranking_metrics(node2vec_scores, test_labels, k_values=[5, 10, 20])

results['node2vec'] = {**metrics_node2vec, **ranking_metrics_node2vec}
print(f"Node2Vec: AUC={metrics_node2vec['auc']:.4f}, AP={metrics_node2vec['ap']:.4f}, P@10={ranking_metrics_node2vec['precision@10']:.4f}")
