In [5]:
import os
import sys
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.utils.timer import Timer
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_chrono_split
from recommenders.evaluation.python_evaluation import (
    map, ndcg_at_k, precision_at_k, recall_at_k
)
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.utils.notebook_utils import store_metadata

In [6]:
## top k items to recommend
TOP_K = 10
# Select MovieLens data size: 100k, 1m, 10m, or 20m
#MOVIELENS_DATA_SIZE = '100k'

# Model parameters
EPOCHS = 20
BATCH_SIZE = 256

SEED = DEFAULT_SEED # Set None for non-deterministic results

In [14]:

df = pd.read_csv("/Users/chi.nguyenth/Documents/DoAn_63133022_NguyenThiHaChi/dataset/100k/ratings.csv", engine="python", header=0, 
                      names=["userID", "itemID", "rating", "timestamp"])
# Kiểm tra dữ liệu
print(df.head())

# Tạo tập huấn luyện và kiểm tra (80% huấn luyện, 20% kiểm tra)
train, test = python_chrono_split(df, 0.8)

# In kích thước của tập huấn luyện và kiểm tra
print(f"Training data size: {len(train)}")
print(f"Test data size: {len(test)}")

# Lưu dữ liệu train và test ra các tệp CSV
train.to_csv("/Users/chi.nguyenth/Documents/DoAn_63133022_NguyenThiHaChi/test1/train.csv", index=False)
test.to_csv("/Users/chi.nguyenth/Documents/DoAn_63133022_NguyenThiHaChi/test1/test.csv", index=False)

   userID  itemID  rating  timestamp
0       1       1     4.0  964982703
1       1       3     4.0  964981247
2       1       6     4.0  964982224
3       1      47     5.0  964983815
4       1      50     5.0  964982931
Training data size: 80672
Test data size: 20164


In [15]:
# Đường dẫn đến các tệp train và test đã tạo
train_file = "/Users/chi.nguyenth/Documents/DoAn_63133022_NguyenThiHaChi/test1/train.csv"
test_file = "/Users/chi.nguyenth/Documents/DoAn_63133022_NguyenThiHaChi/test1/test.csv"

# Tạo Dataset cho NCF
data = NCFDataset(train_file=train_file, test_file=test_file, seed=42)

# Lấy số lượng người dùng và phim từ dữ liệu
n_users = len(df['userID'].unique())  # Số lượng người dùng
n_items = len(df['itemID'].unique())  # Số lượng phim

print(f"Number of users: {n_users}")
print(f"Number of items: {n_items}")

INFO:recommenders.models.ncf.dataset:Indexing /Users/chi.nguyenth/Documents/DoAn_63133022_NguyenThiHaChi/test1/train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing /Users/chi.nguyenth/Documents/DoAn_63133022_NguyenThiHaChi/test1/test.csv ...
INFO:recommenders.models.ncf.dataset:Creating full leave-one-out test file /Users/chi.nguyenth/Documents/DoAn_63133022_NguyenThiHaChi/test1/test_full.csv ...
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, **kwds)
  return bound(*args, *

Number of users: 610
Number of items: 9724


In [16]:
model = NCF(
    n_users=n_users, 
    n_items=n_items,
    model_type="NeuMF",
    n_factors=8,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)





In [17]:
gmf_dir = "/Users/chi.nguyenth/Documents/DoAn_63133022_NguyenThiHaChi/notebooks/.pretrain/GMF"
mlp_dir = "/Users/chi.nguyenth/Documents/DoAn_63133022_NguyenThiHaChi/notebooks/.pretrain/MLP"
neumf_dir = "/Users/chi.nguyenth/Documents/DoAn_63133022_NguyenThiHaChi/model/ncf_model"

# Tải mô hình đã huấn luyện
model.load(gmf_dir=gmf_dir, mlp_dir=mlp_dir, neumf_dir=neumf_dir)

In [46]:
print(dir(NCF))

['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_create_model', '_load_neumf', '_predict', 'fit', 'load', 'predict', 'save']


In [44]:
print(dir(data))



In [33]:
# Liệt kê tất cả các thuộc tính và phương thức của đối tượng Dataset
print(dir(data.train_datafile))


['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_check_for_missing_fields', '_extract_row_data', '_init_data', 'batch_indices_range', 'binary', 'col_item', 'col_rating', 'col_test_batch', 'col_user', 'data_len', 'end_of_file', 'expected_fields', 'file', 'filename', 'id2item', 'id2user', 'item2id', 'items', 'line_num', 'load_data', 'next_row', 'reader', 'row', 'user2id', 'users']


In [47]:
print(dir(data.test_datafile))


['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__enter__', '__eq__', '__exit__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_check_for_missing_fields', '_extract_row_data', '_init_data', 'batch_indices_range', 'binary', 'col_item', 'col_rating', 'col_test_batch', 'col_user', 'data_len', 'end_of_file', 'expected_fields', 'file', 'filename', 'id2item', 'id2user', 'item2id', 'items', 'line_num', 'load_data', 'next_row', 'reader', 'row', 'user2id', 'users']


In [48]:
print(data.train_datafile.user2id)  # Kiểm tra nếu user2id tồn tại
print(data.train_datafile.item2id)  # Kiểm tra nếu item2id tồn tại


OrderedDict([(1, 0), (2, 1), (3, 2), (4, 3), (5, 4), (6, 5), (7, 6), (8, 7), (9, 8), (10, 9), (11, 10), (12, 11), (13, 12), (14, 13), (15, 14), (16, 15), (17, 16), (18, 17), (19, 18), (20, 19), (21, 20), (22, 21), (23, 22), (24, 23), (25, 24), (26, 25), (27, 26), (28, 27), (29, 28), (30, 29), (31, 30), (32, 31), (33, 32), (34, 33), (35, 34), (36, 35), (37, 36), (38, 37), (39, 38), (40, 39), (41, 40), (42, 41), (43, 42), (44, 43), (45, 44), (46, 45), (47, 46), (48, 47), (49, 48), (50, 49), (51, 50), (52, 51), (53, 52), (54, 53), (55, 54), (56, 55), (57, 56), (58, 57), (59, 58), (60, 59), (61, 60), (62, 61), (63, 62), (64, 63), (65, 64), (66, 65), (67, 66), (68, 67), (69, 68), (70, 69), (71, 70), (72, 71), (73, 72), (74, 73), (75, 74), (76, 75), (77, 76), (78, 77), (79, 78), (80, 79), (81, 80), (82, 81), (83, 82), (84, 83), (85, 84), (86, 85), (87, 86), (88, 87), (89, 88), (90, 89), (91, 90), (92, 91), (93, 92), (94, 93), (95, 94), (96, 95), (97, 96), (98, 97), (99, 98), (100, 99), (101,

In [29]:
# Kiểm tra dữ liệu huấn luyện
print(data.train_datafile)


<recommenders.models.ncf.dataset.DataFile object at 0x308184dd0>
