In [None]:
import pandas as pd
data = pd.read_csv('u.data', sep='\t', names=['user', 'item', 'rating', 'timestamp'])
movies = pd.read_csv('u.item', sep='|', encoding='latin-1', header=None, usecols=[0, 1], names=['item', 'title'])
data = data[data['rating'] >= 4]
user_counts = data['user'].value_counts()
warm_users = user_counts[user_counts > 0].index
data = data[data['user'].isin(warm_users)]
merged = pd.merge(data, movies, on='item')
print(merged[['user', 'item', 'title', 'rating']].head(10))

   user  item                                              title  rating
0   298   474  Dr. Strangelove or: How I Learned to Stop Worr...       4
1   253   465                            Jungle Book, The (1994)       5
2   286  1014      Romy and Michele's High School Reunion (1997)       5
3   200   222                    Star Trek: First Contact (1996)       5
4   122   387                       Age of Innocence, The (1993)       5
5   291  1042                                  Just Cause (1995)       4
6   119   392                     Man Without a Face, The (1993)       4
7   167   486                                     Sabrina (1954)       4
8   299   144                                    Die Hard (1988)       4
9   308     1                                   Toy Story (1995)       4


In [None]:
import numpy as np

users = data['user'].unique()
items = data['item'].unique()
user_map = {u: i for i, u in enumerate(users)}
item_map = {i: j for j, i in enumerate(items)}
data['user_id'] = data['user'].map(user_map)
data['item_id'] = data['item'].map(item_map)
num_users = len(users)
num_items = len(items)
print(f"Number of users: {num_users}, Number of items: {num_items}")

Number of users: 942, Number of items: 1447


In [None]:
user_positive_items = data.groupby('user_id')['item_id'].apply(set).to_dict()
assert all(len(items) > 0 for items in user_positive_items.values())
print(f"Sample user 0 positive items: {user_positive_items[0]}")

Sample user 0 positive items: {0, 1, 8, 9, 13, 14, 15, 17, 28, 29, 31, 32, 37, 42, 47, 53, 55, 57, 59, 60, 61, 62, 68, 70, 71, 77, 597, 603, 91, 95, 101, 104, 110, 624, 114, 115, 120, 645, 137, 143, 145, 146, 661, 149, 151, 157, 677, 166, 167, 169, 172, 178, 183, 190, 191, 710, 202, 204, 207, 212, 215, 216, 218, 732, 222, 236, 754, 256, 257, 264, 275, 276, 321, 838, 340, 344, 348, 361, 367, 390, 392, 404, 409, 426, 430, 448, 465, 989, 495, 497, 500}


In [None]:
np.random.seed(42)

latent_factors = 20
learning_rate = 0.01
reg = 0.01
epochs = 20
user_factors = np.random.normal(0, 0.1, size=(num_users, latent_factors))
item_factors = np.random.normal(0, 0.1, size=(num_items, latent_factors))
print(f"user_factors shape: {user_factors.shape}, item_factors shape: {item_factors.shape}")

user_factors shape: (942, 20), item_factors shape: (1447, 20)


In [None]:
import random

def sample_triplet():
    u = random.randint(0, num_users - 1)
    i = random.choice(list(user_positive_items[u]))
    j = random.randint(0, num_items - 1)
    while j in user_positive_items[u]:
        j = random.randint(0, num_items - 1)
    return u, i, j

u, i, j = sample_triplet()
print(f"Sampled triplet: user {u}, positive item {i}, negative item {j}")


Sampled triplet: user 209, positive item 440, negative item 111


In [None]:
def stable_sigmoid(x):
    x = np.clip(x, -20, 20)
    return 1.0 / (1.0 + np.exp(x))

print(stable_sigmoid(0))    # Should be 0.5
print(stable_sigmoid(100))  # Should be close to 0
print(stable_sigmoid(-100)) # Should be close to 1

0.5
2.0611536181902037e-09
0.9999999979388463


In [None]:
def bpr_update(u, i, j):
    x_ui = np.dot(user_factors[u], item_factors[i])
    x_uj = np.dot(user_factors[u], item_factors[j])
    x_uij = x_ui - x_uj

    s = stable_sigmoid(x_uij)

    pu = user_factors[u].copy()

    user_factors[u] += learning_rate * (s * (item_factors[i] - item_factors[j]) - reg * user_factors[u])
    item_factors[i] += learning_rate * (s * pu - reg * item_factors[i])
    item_factors[j] += learning_rate * (-s * pu - reg * item_factors[j])


In [None]:
for epoch in range(epochs):
    for _ in range(len(data)):
        u, i, j = sample_triplet()
        bpr_update(u, i, j)
    print(f"Epoch {epoch + 1}/{epochs} completed")


Epoch 1/20 completed
Epoch 2/20 completed
Epoch 3/20 completed
Epoch 4/20 completed
Epoch 5/20 completed
Epoch 6/20 completed
Epoch 7/20 completed
Epoch 8/20 completed
Epoch 9/20 completed
Epoch 10/20 completed
Epoch 11/20 completed
Epoch 12/20 completed
Epoch 13/20 completed
Epoch 14/20 completed
Epoch 15/20 completed
Epoch 16/20 completed
Epoch 17/20 completed
Epoch 18/20 completed
Epoch 19/20 completed
Epoch 20/20 completed


In [None]:
def recommend(user_raw_id, top_k=5):
    if user_raw_id not in user_map:
        print(f"User {user_raw_id} not found.")
        return

    u = user_map[user_raw_id]
    scores = item_factors @ user_factors[u]

    known_items = user_positive_items[u]
    scores_filtered = [(item_id, score) for item_id, score in enumerate(scores) if item_id not in known_items]
    top_items = sorted(scores_filtered, key=lambda x: x[1], reverse=True)[:top_k]
    top_raw_ids = [items[item[0]] for item in top_items]
    top_titles = movies[movies['item'].isin(top_raw_ids)][['item', 'title']]
    top_titles['rank'] = top_titles['item'].apply(lambda x: top_raw_ids.index(x))
    top_titles = top_titles.sort_values('rank')

    print(f"Top {top_k} recommendations for user {user_raw_id}:")
    for row in top_titles.itertuples():
        print(f"{row.item}. {row.title}")
recommend(1, top_k=5)

Top 5 recommendations for user 1:
318. Schindler's List (1993)
357. One Flew Over the Cuckoo's Nest (1975)
237. Jerry Maguire (1996)
603. Rear Window (1954)
483. Casablanca (1942)
