In [30]:
import numpy as np
import pandas as pd

In [31]:
ratings_df = pd.read_csv('../data/Ratings.csv', delimiter=';', dtype={'User-ID': np.int32, 'ISBN': str, 'Rating': np.int8})
ratings_df.columns = ['user', 'item', 'label']
ratings_df.head()

Unnamed: 0,user,item,label
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [32]:
print('Total ratings:', ratings_df.shape[0])

ratings_df = ratings_df[ratings_df['label'] > 0]
ratings_df.dropna()

Total ratings: 1149780


Unnamed: 0,user,item,label
1,276726,0155061224,5
3,276729,052165615X,3
4,276729,0521795028,6
6,276736,3257224281,8
7,276737,0600570967,6
...,...,...,...
1149773,276704,0806917695,5
1149775,276704,1563526298,9
1149777,276709,0515107662,10
1149778,276721,0590442449,10


In [33]:
from libreco.algorithms import ItemCF
from libreco.data import DatasetPure
from libreco.data import random_split

ratings_df = ratings_df[["user", "item", "label"]]
train_df, eval_df = random_split(ratings_df, test_size=0.2)

train_data, data_info = DatasetPure.build_trainset(train_df)
eval_data = DatasetPure.build_evalset(eval_df)

# Step 3: Build and train the model
model = ItemCF(task="ranking", 
               data_info=data_info,
               k_sim=10, 
               sim_type="cosine", 
               min_common=1)

model.fit(train_data, neg_sampling=True)


Training start time: [35m2024-08-11 20:23:50[0m
Final block size and num: (1251, 127)
sim_matrix elapsed: 93.239s
sim_matrix, shape: (158852, 158852), num_elements: 96093388, density: 0.3808 %


top_k: 100%|██████████| 158852/158852 [00:24<00:00, 6539.79it/s] 


In [34]:
from libreco.evaluation import evaluate

eval_result = evaluate(model, eval_data, neg_sampling=True, metrics=[ "precision", "recall", "ndcg"])
print(f"Evaluation Results:\n{eval_result}")

eval_listwise:   1%|          | 156/16407 [00:01<01:25, 189.01it/s]

[31mno suitable recommendation for user 39925, return default recommendation[0m


eval_listwise:   2%|▏         | 259/16407 [00:01<00:50, 317.04it/s]

[31mno suitable recommendation for user 25412, return default recommendation[0m
[31mno suitable recommendation for user 44239, return default recommendation[0m
[31mno suitable recommendation for user 12084, return default recommendation[0m


eval_listwise:   2%|▏         | 376/16407 [00:01<00:36, 436.60it/s]

[31mno suitable recommendation for user 56690, return default recommendation[0m
[31mno suitable recommendation for user 17623, return default recommendation[0m


eval_listwise:   3%|▎         | 511/16407 [00:01<00:29, 531.08it/s]

[31mno suitable recommendation for user 53915, return default recommendation[0m
[31mno suitable recommendation for user 66434, return default recommendation[0m


eval_listwise:   5%|▍         | 804/16407 [00:02<00:22, 686.16it/s]

[31mno suitable recommendation for user 13863, return default recommendation[0m
[31mno suitable recommendation for user 1751, return default recommendation[0m


eval_listwise: 100%|██████████| 16407/16407 [00:08<00:00, 1962.18it/s]


Evaluation Results:
{'precision': 0.004382275857865545, 'recall': 0.014942970740036218, 'ndcg': 0.033249598472747145}


**Removing rare books**

In [35]:
rating_count=pd.DataFrame(ratings_df["item"].value_counts())
rare_books=rating_count[rating_count["count"]<=20].index
ratings_df=ratings_df[~ratings_df["item"].isin(rare_books)]

print('Total ratings (rare books excluded):', ratings_df.shape[0])

Total ratings (rare books excluded): 94503


In [36]:
train_df, eval_df = random_split(ratings_df, test_size=0.2)

train_data, data_info = DatasetPure.build_trainset(train_df)
eval_data = DatasetPure.build_evalset(eval_df)

# Step 3: Build and train the model
model = ItemCF(task="ranking", 
               data_info=data_info,
               k_sim=10, 
               sim_type="cosine", 
               min_common=1)

model.fit(train_data, neg_sampling=True)

Training start time: [35m2024-08-11 20:25:57[0m
Final block size and num: (2034, 1)
sim_matrix elapsed: 0.055s
sim_matrix, shape: (2034, 2034), num_elements: 1217048, density: 29.4175 %


top_k: 100%|██████████| 2034/2034 [00:00<00:00, 8465.58it/s]


In [37]:
from libreco.evaluation import evaluate

eval_result = evaluate(model, eval_data, neg_sampling=True, metrics=[ "precision", "recall", "ndcg"])
print(f"Evaluation Results:\n{eval_result}")

eval_listwise: 100%|██████████| 1676/1676 [00:01<00:00, 972.62it/s] 


Evaluation Results:
{'precision': 0.02199015366253916, 'recall': 0.10206671918612688, 'ndcg': 0.11273428439190147}
