In [1]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m57.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [2]:
# 2) Imports & load your CSV
import pandas as pd
import torch
from torch_geometric.data import HeteroData
from torch_geometric.nn import HeteroConv, SAGEConv
from sklearn.model_selection import train_test_split

# load interactions
df = pd.read_csv('user_post_interactions.csv')
# df columns: [user_id, post_id, viewed, liked]
df.head()


Unnamed: 0,user_id,post_id,viewed,liked
0,5,1346,1,0
1,5,1357,1,0
2,5,1365,1,0
3,5,1367,1,0
4,5,1358,1,0


In [3]:
# 3) Re‑index user_ids and post_ids to contiguous integers
user2idx = {u:i for i,u in enumerate(df['user_id'].unique())}
post2idx = {p:i for i,p in enumerate(df['post_id'].unique())}

df['u_idx'] = df['user_id'].map(user2idx)
df['p_idx'] = df['post_id'].map(post2idx)

num_users = len(user2idx)
num_posts = len(post2idx)
print(f"{num_users=}  {num_posts=}")


num_users=631  num_posts=1210


In [4]:
# 4) Build a HeteroData graph
data = HeteroData()
data['user'].num_nodes = num_users
data['post'].num_nodes = num_posts

# Edges: separate out 'viewed' and 'liked'
viewed_df = df[df['viewed'] == 1]
liked_df  = df[df['liked'] == 1]

viewed_edges = torch.tensor([viewed_df['u_idx'].values, viewed_df['p_idx'].values], dtype=torch.long)
liked_edges  = torch.tensor([liked_df['u_idx'].values, liked_df['p_idx'].values], dtype=torch.long)

# Add 'viewed' and 'liked' edges (both directions)
data['user', 'viewed', 'post'].edge_index = viewed_edges
data['post', 'rev_viewed', 'user'].edge_index = viewed_edges.flip(0)

data['user', 'liked', 'post'].edge_index = liked_edges
data['post', 'rev_liked', 'user'].edge_index = liked_edges.flip(0)

# Labels only on 'liked' edges
data['user', 'liked', 'post'].edge_label = torch.ones(liked_edges.size(1))  # all are positive


# 5) Split edges into train/test
liked_edges_np = liked_edges.t().numpy()
train_idx, test_idx = train_test_split(liked_edges_np, test_size=0.2, random_state=42)

def set_split(edges, name):
    ei = torch.tensor(edges.T, dtype=torch.long)
    data['user', 'liked', 'post'][f'{name}_edge_index'] = ei
    data['user', 'liked', 'post'][f'{name}_edge_label'] = torch.ones(ei.shape[1])  # positive

set_split(train_idx, 'train')
set_split(test_idx,  'test')

  viewed_edges = torch.tensor([viewed_df['u_idx'].values, viewed_df['p_idx'].values], dtype=torch.long)


In [5]:
# negative = viewed but not liked
viewed_set = set(map(tuple, viewed_df[['u_idx', 'p_idx']].values.tolist()))
liked_set  = set(map(tuple, liked_df[['u_idx', 'p_idx']].values.tolist()))
negative_edges = list(viewed_set - liked_set)

# sample same number as positive
import random
random.seed(42)
negative_edges = random.sample(negative_edges, len(liked_set))

# positive: label = 1
pos_edges = liked_edges_np.tolist()
pos_labels = [1] * len(pos_edges)

# negative: label = 0
neg_edges = negative_edges
neg_labels = [0] * len(neg_edges)

# combine and shuffle
all_edges = pos_edges + neg_edges
all_labels = pos_labels + neg_labels

from sklearn.utils import shuffle
all_edges, all_labels = shuffle(all_edges, all_labels, random_state=42)

# train/test split
train_e, test_e, train_y, test_y = train_test_split(all_edges, all_labels, test_size=0.2, random_state=42)

# convert to tensor format
def set_edge_data(edges, labels, split):
    ei = torch.tensor(edges, dtype=torch.long).T
    lab = torch.tensor(labels, dtype=torch.float)
    data['user', 'liked', 'post'][f'{split}_edge_index'] = ei
    data['user', 'liked', 'post'][f'{split}_edge_label'] = lab

set_edge_data(train_e, train_y, 'train')
set_edge_data(test_e,  test_y,  'test')


In [6]:
# 6) Define the GNN recommendation model
import torch.nn.functional as F

class GNNRecommender(torch.nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.emb_user = torch.nn.Embedding(num_users, hidden_dim)
        self.emb_post = torch.nn.Embedding(num_posts, hidden_dim)

        self.conv1 = HeteroConv({
            ('user','viewed','post'): SAGEConv(hidden_dim, hidden_dim),
            ('post','rev_viewed','user'): SAGEConv(hidden_dim, hidden_dim),
            ('user','liked','post'): SAGEConv(hidden_dim, hidden_dim),
            ('post','rev_liked','user'): SAGEConv(hidden_dim, hidden_dim),
        }, aggr='sum')

        self.conv2 = HeteroConv({
            ('user','viewed','post'): SAGEConv(hidden_dim, hidden_dim),
            ('post','rev_viewed','user'): SAGEConv(hidden_dim, hidden_dim),
            ('user','liked','post'): SAGEConv(hidden_dim, hidden_dim),
            ('post','rev_liked','user'): SAGEConv(hidden_dim, hidden_dim),
        }, aggr='sum')

    def forward(self, data):
        x_dict = {
            'user': self.emb_user.weight,
            'post': self.emb_post.weight
        }
        x_dict = self.conv1(x_dict, data.edge_index_dict)
        x_dict = {k: F.relu(v) for k,v in x_dict.items()}
        x_dict = self.conv2(x_dict, data.edge_index_dict)
        return x_dict

    def decode(self, user_emb, post_emb, edge_index):
        u, p = edge_index
        return (user_emb[u] * post_emb[p]).sum(dim=1)


model = GNNRecommender(hidden_dim=128)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()


In [7]:
# 7) Training loop (full‐batch)
data = data.to('cpu')
for epoch in range(1, 51):
    model.train()
    optimizer.zero_grad()
    x_dict = model(data)

    ei = data['user','liked','post'].train_edge_index
    y = data['user','liked','post'].train_edge_label

    logits = model.decode(x_dict['user'], x_dict['post'], ei)
    loss = criterion(logits, y)
    loss.backward()
    optimizer.step()

    if epoch % 5 == 0:
        model.eval()
        with torch.no_grad():
            ei_test = data['user','liked','post'].test_edge_index
            y_test  = data['user','liked','post'].test_edge_label
            test_logits = model.decode(x_dict['user'], x_dict['post'], ei_test)
            test_loss = criterion(test_logits, y_test)
        print(f"Epoch {epoch:02d}, Train Loss: {loss:.4f}, Test Loss: {test_loss:.4f}")


Epoch 05, Train Loss: 6.6931, Test Loss: 7.5522
Epoch 10, Train Loss: 1.0452, Test Loss: 3.1402
Epoch 15, Train Loss: 0.4727, Test Loss: 3.6700
Epoch 20, Train Loss: 0.2510, Test Loss: 3.4082
Epoch 25, Train Loss: 0.1825, Test Loss: 3.2447
Epoch 30, Train Loss: 0.1503, Test Loss: 3.4447
Epoch 35, Train Loss: 0.1346, Test Loss: 3.6085
Epoch 40, Train Loss: 0.1182, Test Loss: 3.5630
Epoch 45, Train Loss: 0.1023, Test Loss: 3.5469
Epoch 50, Train Loss: 0.0910, Test Loss: 3.5717


In [8]:
# 8) Recommendation function: top‑k posts for a given user_id
import numpy as np

def recommend_posts(model, data, user_id, k=5):
    model.eval()
    u_idx = user2idx[user_id]

    # get updated embeddings
    x_dict = model(data)
    user_emb = x_dict['user'][u_idx].unsqueeze(0)  # shape: (1, dim)
    post_emb = x_dict['post']                      # shape: (num_posts, dim)

    # compute dot-product scores
    scores = (user_emb * post_emb).sum(dim=1).detach().cpu().numpy()

    # filter out posts the user has already interacted with
    seen = set(df[df['user_id'] == user_id]['p_idx'].tolist())
    candidates = [(i, s) for i, s in enumerate(scores) if i not in seen]
    top_k = sorted(candidates, key=lambda x: x[1], reverse=True)[:k]

    # convert internal indices back to original post_ids
    inv_post = {v: k for k, v in post2idx.items()}
    return [int(inv_post[i]) for i, _ in top_k]

# example: recommend for user_id = 5
print("Top recommendations for user 5 →", recommend_posts(model, data, user_id=5, k=5))


Top recommendations for user 5 → [77, 228, 378, 577, 80]


In [9]:
# save model weights and user/post embeddings (optional)
torch.save({
    'model_state_dict': model.state_dict(),
    'user2idx': user2idx,
    'post2idx': post2idx
}, 'multi_rel_gnn_model.pth')

torch.save(data, 'graph_data.pt')


In [10]:
# load mapping and model
checkpoint = torch.load('multi_rel_gnn_model.pth',weights_only=False)

# recreate model
model = GNNRecommender(hidden_dim=128)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# restore user and post ID mappings
user2idx = checkpoint['user2idx']
post2idx = checkpoint['post2idx']


In [11]:
def recommend_posts(model, data, user_id, k=5):
    model.eval()
    u_idx = user2idx.get(user_id)
    if u_idx is None:
        raise ValueError(f"User ID {user_id} not found.")

    with torch.no_grad():
        x_dict = model(data)
        user_emb = x_dict['user'][u_idx].unsqueeze(0)
        post_emb = x_dict['post']

        # dot product scores
        scores = (user_emb * post_emb).sum(dim=1).detach().cpu().numpy()

        # remove already seen posts
        seen = set(df[df['user_id'] == user_id]['p_idx'].tolist())
        candidates = [(i, s) for i, s in enumerate(scores) if i not in seen]
        top_k = sorted(candidates, key=lambda x: x[1], reverse=True)[:k]

        inv_post = {v: k for k, v in post2idx.items()}
        return [int(inv_post[i]) for i, _ in top_k]


In [12]:
top_posts = recommend_posts(model, data, user_id=5, k=20)
print("Recommended post_ids:", top_posts)


Recommended post_ids: [77, 228, 378, 577, 80, 604, 603, 432, 457, 424, 487, 417, 498, 456, 466, 428, 425, 1531, 296, 2809]
