In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from libreco.data import DatasetPure, split_by_ratio_chrono
from libreco.algorithms import UserCF, ItemCF

Instructions for updating:
non-resource variables are not supported in the long term


In [None]:
TEST_SIZE = 0.2

RATINGS_DATA_PATH = './data/ratings.csv'
USER_DATA_PATH = './data/users.csv'
ITEM_DATA_PATH = './data/movies.csv'

In [None]:
ratings_df = pd.read_csv(RATINGS_DATA_PATH)
user_df = pd.read_csv(USER_DATA_PATH)
item_df = pd.read_csv(ITEM_DATA_PATH)

In [None]:
movie_id_name_map = dict(zip(item_df['movie_id'], item_df['title']))

In [4]:
def reset_state(name):
    tf.compat.v1.reset_default_graph()
    print("\n", "=" * 30, name, "=" * 30)

### transform original dataset to lib-rec supported dataset

In [None]:
# rename columns as expected by lib-rec
ratings_df.rename(columns={'user_id':'user', 'item_id':'item', 'rating':'label', 'timestamp':'time'},inplace=True)
# train/test split
train_data, eval_data = split_by_ratio_chrono(ratings_df, test_size=TEST_SIZE)

# DatasetPure since we're using a pure model
train_data, train_data_info = DatasetPure.build_trainset(train_data)
eval_data = DatasetPure.build_evalset(eval_data)
print(train_data_info)

n_users: 943, n_items: 1613, data density: 5.2595 %


### User-based CF

In [13]:
# train model
reset_state("user_cf")

metrics = ["loss", "balanced_accuracy", "precision", "recall", "map", "ndcg"]
user_cf = UserCF(
    task="ranking",
    data_info=train_data_info,
    k_sim=20,
    sim_type="cosine",
    num_threads=1,
    min_common=1,
)
user_cf.fit(
    train_data,
    neg_sampling=True,
    verbose=2,
    eval_data=eval_data,
    metrics=metrics,
)


Training start time: [35m2025-07-06 17:59:28[0m
Final block size and num: (943, 1)
sim_matrix elapsed: 0.026s
sim_matrix, shape: (943, 943), num_elements: 837062, density: 94.1313 %


top_k: 100%|██████████| 943/943 [00:00<00:00, 5063.74it/s]
eval_pointwise:  20%|██        | 1/5 [00:00<00:00,  5.90it/s]

[31mNo common interaction or similar neighbor for user 0 and item 1031, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 0 and item 2, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 0 and item 40, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 0 and item 1187, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 0 and item 1091, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 0 and item 1439, proceed with default prediction[0m


eval_pointwise: 100%|██████████| 5/5 [00:00<00:00,  6.12it/s]
eval_listwise: 100%|██████████| 189/189 [00:03<00:00, 57.84it/s]


	 eval log_loss: 3.9621
	 eval balanced_accuracy: 0.5035
	 eval precision@10: 0.1639
	 eval recall@10: 0.1164
	 eval map@10: 0.3111
	 eval ndcg@10: 0.4172


In [33]:
# inference on sample user
user_id = 2
movie_id = 56
top_n = 7

print(f"user_id: {user_id}\n")
print(f"prediction for {movie_id_name_map[movie_id]}: ", user_cf.predict(user=user_id, item=movie_id))
print("-"*50)

top_n_recs = list(user_cf.recommend_user(user=user_id, n_rec=top_n)[user_id])
top_n_recs = [movie_id_name_map[m_id] for m_id in top_n_recs]
print(f"\ntop-N recommendations: {user_id} : \n", top_n_recs)
print("-"*50)

user_ratings = ratings_df[ratings_df['user'] == user_id] # records rated by user
user_ratings = pd.merge(user_ratings, item_df, left_on='item', right_on='movie_id')[['title', 'label']].sort_values(by='label', ascending=False) # merge movie titles & sort by rating
print(f"\nTop Rated movies by user:")
print(user_ratings.head(10))

user_id: 2

prediction for Pulp Fiction (1994):  0.15837383
--------------------------------------------------

top-N recommendations: 2 : 
 ['Lone Star (1996)', 'Big Night (1996)', 'Star Wars (1977)', 'Boot, Das (1981)', 'Cold Comfort Farm (1995)', 'Evita (1996)', 'People vs. Larry Flynt, The (1996)']
--------------------------------------------------

Top Rated movies by user:
                            title  label
43  Wings of the Dove, The (1997)      5
55       Good Will Hunting (1997)      5
27                   Kolya (1996)      5
28                    Emma (1996)      5
51          Godfather, The (1972)      5
52          Secrets & Lies (1996)      5
1          Shall We Dance? (1996)      5
17                 Titanic (1997)      5
14      As Good As It Gets (1997)      5
50                   Fargo (1996)      5


### Item-based CF

In [34]:
# train model
item_cf = ItemCF(
        task="ranking",
        data_info=train_data_info,
        k_sim=20,
        sim_type="cosine",
        num_threads=1,
        min_common=1,
    )
item_cf.fit(
    train_data,
    neg_sampling=True,
    verbose=2,
    eval_data=eval_data,
    metrics=metrics,
)

Training start time: [35m2025-07-06 18:36:00[0m
Final block size and num: (1613, 1)
sim_matrix elapsed: 0.074s
sim_matrix, shape: (1613, 1613), num_elements: 1575868, density: 60.5691 %


top_k: 100%|██████████| 1613/1613 [00:00<00:00, 5269.30it/s]
eval_pointwise:   0%|          | 0/5 [00:00<?, ?it/s]

[31mNo common interaction or similar neighbor for user 2 and item 320, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 2 and item 508, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 2 and item 327, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 2 and item 431, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 2 and item 326, proceed with default prediction[0m
[31mNo common interaction or similar neighbor for user 2 and item 340, proceed with default prediction[0m


eval_pointwise: 100%|██████████| 5/5 [00:01<00:00,  4.30it/s]
eval_listwise: 100%|██████████| 189/189 [00:02<00:00, 91.36it/s] 

	 eval log_loss: 2.3414
	 eval balanced_accuracy: 0.5069
	 eval precision@10: 0.1685
	 eval recall@10: 0.1215
	 eval map@10: 0.3274
	 eval ndcg@10: 0.4417





In [35]:
# inference on sample user
user_id = 2
movie_id = 56
top_n = 7

print(f"user_id: {user_id}\n")
print(f"prediction for {movie_id_name_map[movie_id]}: ", item_cf.predict(user=user_id, item=movie_id))
print("-"*50)

top_n_recs = list(item_cf.recommend_user(user=user_id, n_rec=top_n)[user_id])
top_n_recs = [movie_id_name_map[m_id] for m_id in top_n_recs]
print(f"\ntop-N recommendations: {user_id} : \n", top_n_recs)
print("-"*50)

user_ratings = ratings_df[ratings_df['user'] == user_id] # records rated by user
user_ratings = pd.merge(user_ratings, item_df, left_on='item', right_on='movie_id')[['title', 'label']].sort_values(by='label', ascending=False) # merge movie titles & sort by rating
print(f"\nTop Rated movies by user:")
print(user_ratings.head(10))

user_id: 2

prediction for Pulp Fiction (1994):  0.31714958
--------------------------------------------------

top-N recommendations: 2 : 
 ['Star Wars (1977)', 'Rock, The (1996)', 'Return of the Jedi (1983)', 'Independence Day (ID4) (1996)', 'Twelve Monkeys (1995)', 'Mission: Impossible (1996)', 'Ransom (1996)']
--------------------------------------------------

Top Rated movies by user:
                            title  label
43  Wings of the Dove, The (1997)      5
55       Good Will Hunting (1997)      5
27                   Kolya (1996)      5
28                    Emma (1996)      5
51          Godfather, The (1972)      5
52          Secrets & Lies (1996)      5
1          Shall We Dance? (1996)      5
17                 Titanic (1997)      5
14      As Good As It Gets (1997)      5
50                   Fargo (1996)      5
