In [None]:
import cornac
from cornac.eval_methods import RatioSplit
from cornac.models import MF, PMF, BPR
from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, AUC, MAP
from cornac.data import Dataset

# load the built-in MovieLens 100K and split the data based on ratio
ml_100k = cornac.datasets.movielens.load_feedback()
rs = RatioSplit(data=ml_100k, test_size=0.2, rating_threshold=4.0, seed=123)

# initialize models, here we are comparing: Biased MF, PMF, and BPR
mf = MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123)
pmf = PMF(k=10, max_iter=100, learning_rate=0.001, lambda_reg=0.001, seed=123)
bpr = BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123)
models = [mf, pmf, bpr]

# define metrics to evaluate the models
metrics = [MAE(), RMSE(), Precision(k=10), Recall(k=10), NDCG(k=10), AUC(), MAP()]

# put it together in an experiment, voilà!
cornac.Experiment(eval_method=rs, models=models, metrics=metrics, user_based=True).run()

In [6]:
import pandas as pd
from cornac.data import Dataset

# Load your dataset
df = pd.read_csv('./hotels_users_ratings.xlsx - merge_hotel_user_data.csv', encoding='utf-8')

In [7]:
# Prepare the feedback data (user_id, item_id, rating)
feedback = [(row['UserID'], row['HotelID'], row['Rating']) for _, row in df.iterrows()]

In [8]:
feedback

[(7342, 2638, 9),
 (5732, 2638, 10),
 (9445, 2638, 10),
 (5508, 2638, 10),
 (7770, 2638, 10),
 (3988, 2638, 10),
 (1289, 1456, 2),
 (4001, 1456, 2),
 (2902, 1456, 1),
 (2037, 1456, 1),
 (884, 1456, 8),
 (997, 1456, 7),
 (507, 1456, 8),
 (7179, 1456, 9),
 (5204, 1456, 8),
 (9076, 1456, 10),
 (10353, 1456, 3),
 (7174, 1456, 1),
 (10839, 1456, 4),
 (8355, 1456, 5),
 (11021, 1456, 5),
 (5529, 1456, 6),
 (9359, 1456, 1),
 (840, 1456, 10),
 (3298, 1456, 10),
 (9470, 1456, 10),
 (3059, 1064, 5),
 (9470, 1064, 1),
 (273, 1064, 7),
 (6916, 1064, 7),
 (9463, 1064, 5),
 (10097, 1064, 7),
 (8092, 1064, 6),
 (7221, 1064, 3),
 (4506, 1064, 4),
 (9352, 1064, 5),
 (2882, 2225, 10),
 (10458, 2225, 9),
 (4246, 2225, 10),
 (9726, 2225, 9),
 (1951, 2225, 10),
 (11042, 2225, 10),
 (6654, 2225, 8),
 (9194, 2225, 10),
 (8993, 2225, 9),
 (7838, 2225, 10),
 (6420, 1353, 10),
 (4902, 1353, 10),
 (4927, 1353, 10),
 (4246, 1353, 10),
 (9613, 1353, 10),
 (431, 1353, 9),
 (4824, 1353, 10),
 (8096, 1353, 10),
 (6487

In [9]:
# Create a Cornac dataset
dataset = Dataset.from_uir(feedback)



In [13]:
eval_method = RatioSplit(data=feedback, test_size=0.2, rating_threshold=1.0)




In [17]:
import cornac
from cornac.eval_methods import RatioSplit
from cornac.models import MF, PMF, BPR
from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, AUC, MAP

# Initialize the model
mf = MF(k=10, max_iter=25, learning_rate=0.01, lambda_reg=0.02, use_bias=True, seed=123)

models = [mf]

# define metrics to evaluate the models
metrics = [MAE(), RMSE(), Precision(k=10), Recall(k=10), NDCG(k=10), AUC(), MAP()]

# put it together in an experiment, voilà!
cornac.Experiment(eval_method=eval_method, models=models, metrics=metrics, user_based=True).run()


TEST:
...
   |    MAE |   RMSE |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
-- + ------ + ------ + ------ + ------ + ------- + ------------ + --------- + --------- + --------
MF | 1.1980 | 1.3006 | 0.6858 | 0.0597 |  0.1045 |       0.0513 |    0.1795 |    0.1380 |  11.6353



In [19]:
mf.save(save_dir='save_dir', save_trainset=True)

'save_dir\\MF\\2024-12-14_01-45-30-064514.pkl'

In [20]:
import cornac
from cornac.eval_methods import RatioSplit
from cornac.models import MF, PMF, BPR
from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, AUC, MAP

# load the built-in MovieLens 100K and split the data based on ratio
ml_100k = cornac.datasets.movielens.load_feedback()
rs = RatioSplit(data=ml_100k, test_size=0.2, rating_threshold=4.0, seed=123)

# initialize models, here we are comparing: Biased MF, PMF, and BPR

bpr = BPR(k=10, max_iter=200, learning_rate=0.001, lambda_reg=0.01, seed=123)
models = [bpr]

# define metrics to evaluate the models
metrics = [MAE(), RMSE(), Precision(k=10), Recall(k=10), NDCG(k=10), AUC(), MAP()]

# put it together in an experiment, voilà!
cornac.Experiment(eval_method=rs, models=models, metrics=metrics, user_based=True).run()

Data from http://files.grouplens.org/datasets/movielens/ml-100k/u.data
will be cached into C:\Users\GemiBook\.cornac\ml-100k/u.data


1.99MB [00:06, 285kB/s]                             


File cached!

TEST:
...
    |    MAE |   RMSE |    AUC |    MAP | NDCG@10 | Precision@10 | Recall@10 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------- + ------------ + --------- + --------- + --------
BPR | 2.0143 | 2.2267 | 0.8695 | 0.1042 |  0.1500 |       0.1110 |    0.1195 |    3.8492 |   3.4473



In [22]:
bpr.save("./venv/var/save_dir", save_trainset=True)

'./venv/var/save_dir\\BPR\\2024-12-14_02-01-14-735665.pkl'

In [2]:
import pandas as pd
import json

# Đọc file Excel
df = pd.read_csv('hotels_users_ratings.xlsx - merge_hotel_user_data.csv')
len(df)
# Chọn các cột cần thiết
# Giả sử bạn muốn lấy UserID và User từ dữ liệu
users_data = df[['UserID', 'User']].drop_duplicates()
len(users_data)

# Chuyển đổi dữ liệu thành danh sách các từ điển
users_list = users_data.to_dict(orient='records')

# Xuất danh sách thành file JSON
with open('users.json', 'w', encoding='utf-8') as json_file:
    json.dump(users_list, json_file, ensure_ascii=False, indent=4)

print("Đã tạo file users.json thành công!")


Đã tạo file users.json thành công!


In [3]:
len(users_data)

11149