In [1]:
import pandas as pd
import numpy as np
from models_ncf.neumf import NeuMFEngine
from models_ncf.data import SampleGenerator

In [2]:

neumf_config = {'alias': 'neumf_first_try',
                'num_epoch': 10,
                'batch_size': 1024,
                'optimizer': 'adam',
                'adam_lr': 1e-3,
                'num_users': 6040,
                'num_items': 3706,
                'latent_dim_mf': 8,
                'latent_dim_mlp': 8,
                'num_negative': 4,
                'layers': [16, 64, 32, 16, 8],  # layers[0] is the concat of latent user vector & latent item vector
                'l2_regularization': 0.0000001,
                'weight_init_gaussian': True,
                'use_cuda': True,
                'use_bachify_eval': True,
                'device_id': 0,
                'pretrain': False,
                'pretrain_mf': 'checkpoints/{}'.format('gmf_factor8neg4_Epoch100_precision0.6391_recall0.2852.model'),
                'pretrain_mlp': 'checkpoints/{}'.format('mlp_factor8neg4_Epoch100_precision0.5606_recall0.2463.model'),
                'model_dir': 'checkpoints/{}_Epoch{}_precision{:.4f}_recall{:.4f}.model'
                }

In [3]:
import torch

# Check if CUDA (GPU support) is available
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()
    print(f"Number of GPUs available: {num_gpus}")
    
    # Print the device ID and its name for each GPU
    for i in range(num_gpus):
        print(f"GPU {i} - Name: {torch.cuda.get_device_name(i)}")
else:
    print("CUDA is not available. Using CPU.")


Number of GPUs available: 1
GPU 0 - Name: NVIDIA GeForce RTX 3060 Laptop GPU


In [4]:

# Load Data

ml1m_dir = 'ml-1m/ratings.dat'
ml1m_rating = pd.read_csv(ml1m_dir, sep='::', header=None, names=['uid', 'mid', 'rating', 'timestamp'], engine='python')
# Reindex
user_id = ml1m_rating[['uid']].drop_duplicates().reindex()
user_id['userId'] = np.arange(len(user_id))
ml1m_rating = pd.merge(ml1m_rating, user_id, on=['uid'], how='left')
item_id = ml1m_rating[['mid']].drop_duplicates()
item_id['itemId'] = np.arange(len(item_id))
ml1m_rating = pd.merge(ml1m_rating, item_id, on=['mid'], how='left')
ml1m_rating = ml1m_rating[['userId', 'itemId', 'rating', 'timestamp']]
print('Range of userId is [{}, {}]'.format(ml1m_rating.userId.min(), ml1m_rating.userId.max()))
print('Range of itemId is [{}, {}]'.format(ml1m_rating.itemId.min(), ml1m_rating.itemId.max()))
# DataLoader for training 
sample_generator = SampleGenerator(ratings=ml1m_rating)
evaluate_data = sample_generator.evaluate_data

config = neumf_config
engine = NeuMFEngine(config)
for epoch in range(config['num_epoch']):
    print('Epoch {} starts !'.format(epoch))
    print('-' * 80)
    train_loader = sample_generator.instance_a_train_loader(config['num_negative'], config['batch_size'])
    engine.train_an_epoch(train_loader, epoch_id=epoch)
    precision, recall = engine.evaluate(evaluate_data, epoch_id=epoch)
    engine.save(config['alias'], epoch, precision, recall)

Range of userId is [0, 6039]
Range of itemId is [0, 3705]
Index(['userId', 'itemId', 'rating', 'real_score', 'negative_samples'], dtype='object')
Embedding(6040, 8)
Embedding(3706, 8)
Embedding(6040, 8)
Embedding(3706, 8)
Linear(in_features=16, out_features=64, bias=True)
Linear(in_features=64, out_features=32, bias=True)
Linear(in_features=32, out_features=16, bias=True)
Linear(in_features=16, out_features=8, bias=True)
Linear(in_features=16, out_features=1, bias=True)
NeuMF(
  (embedding_user_mlp): Embedding(6040, 8)
  (embedding_item_mlp): Embedding(3706, 8)
  (embedding_user_mf): Embedding(6040, 8)
  (embedding_item_mf): Embedding(3706, 8)
  (fc_layers): ModuleList(
    (0): Linear(in_features=16, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=32, bias=True)
    (2): Linear(in_features=32, out_features=16, bias=True)
    (3): Linear(in_features=16, out_features=8, bias=True)
  )
  (affine_output): Linear(in_features=16, out_features=1, bias=True)
  (logist

100%|██████████| 584/584 [00:00<00:00, 1286.39it/s]


Length of test_users: 6040
Length of test_items: 6040
Length of test_preds: 6040
[Evaluating Epoch 0] Precision = 0.2594, Recall = 1.0000
Epoch 1 starts !
--------------------------------------------------------------------------------
[Training Epoch 1] Batch 0, Loss 0.3224395513534546
[Training Epoch 1] Batch 1, Loss 0.3075719475746155
[Training Epoch 1] Batch 2, Loss 0.2836116552352905
[Training Epoch 1] Batch 3, Loss 0.2968325614929199
[Training Epoch 1] Batch 4, Loss 0.31246185302734375
[Training Epoch 1] Batch 5, Loss 0.3001883327960968
[Training Epoch 1] Batch 6, Loss 0.28948545455932617
[Training Epoch 1] Batch 7, Loss 0.3070160746574402
[Training Epoch 1] Batch 8, Loss 0.29410380125045776
[Training Epoch 1] Batch 9, Loss 0.3113848865032196
[Training Epoch 1] Batch 10, Loss 0.2875472605228424
[Training Epoch 1] Batch 11, Loss 0.287916898727417
[Training Epoch 1] Batch 12, Loss 0.29227519035339355
[Training Epoch 1] Batch 13, Loss 0.34022900462150574
[Training Epoch 1] Batch 14,

KeyboardInterrupt: 