In [1]:
import pandas as pd
import numpy as np
from models_ncf.neumf import NeuMFEngine
from models_ncf.data import SampleGenerator

In [2]:
neumf_config = {'alias': 'neumf_first_try',
                'num_epoch': 20,
                'batch_size': 64,
                'optimizer': 'adam',
                'adam_lr': 1e-3,
                'num_users': 6040,
                'num_items': 3706,
                'latent_dim_mf': 8,
                'latent_dim_mlp': 8,
                'num_negative': 4,
                'layers': [16, 64, 32, 16, 8],  # layers[0] is the concat of latent user vector & latent item vector
                'l2_regularization': 0.0000001,
                'weight_init_gaussian': True,
                'use_cuda': True,
                'use_bachify_eval': True,
                'device_id': 0,
                'pretrain': False,
                'pretrain_mf': 'checkpoints/{}'.format('gmf_factor8neg4_Epoch100_precision0.6391_recall0.2852.model'),
                'pretrain_mlp': 'checkpoints/{}'.format('mlp_factor8neg4_Epoch100_precision0.5606_recall0.2463.model'),
                'model_dir': 'checkpoints/{}_Epoch{}_precision{:.4f}_recall{:.4f}.model'
                }

In [None]:
# Load Data
ml1m_dir = 'goodbooks-10k/ratings.csv'
ml1m_rating = pd.read_csv(ml1m_dir, sep=',', header=None, names=['uid', 'bookid', 'rating', 'timestamp'], engine='python')

# Ensure 'rating' column is numeric before passing it to SampleGenerator
ml1m_rating['rating'] = pd.to_numeric(ml1m_rating['rating'], errors='coerce')

# Handle any invalid ratings (optional)
ml1m_rating.dropna(subset=['rating'], inplace=True)

# Reindex
user_id = ml1m_rating[['uid']].drop_duplicates().reindex()
user_id['userId'] = np.arange(len(user_id))
user_id['userId'] = user_id['userId'].astype(int)  # Convert to integer
ml1m_rating = pd.merge(ml1m_rating, user_id, on=['uid'], how='left')

item_id = ml1m_rating[['bookid']].drop_duplicates()
item_id['itemId'] = np.arange(len(item_id))
item_id['itemId'] = item_id['itemId'].astype(int)  # Convert to integer
ml1m_rating = pd.merge(ml1m_rating, item_id, on=['bookid'], how='left')

ml1m_rating = ml1m_rating[['userId', 'itemId', 'rating', 'timestamp']]

# DataLoader for training 
sample_generator = SampleGenerator(ratings=ml1m_rating)
evaluate_data = sample_generator.evaluate_data

config = neumf_config
engine = NeuMFEngine(config)
for epoch in range(config['num_epoch']):
    print('Epoch {} starts !'.format(epoch))
    print('-' * 80)
    train_loader = sample_generator.instance_a_train_loader(config['num_negative'], config['batch_size'])
    engine.train_an_epoch(train_loader, epoch_id=epoch)
    precision, recall = engine.evaluate(evaluate_data, epoch_id=epoch)
    engine.save(config['alias'], epoch, precision, recall)
