In [2]:
import numpy as np
import cornac
from cornac.data import FeatureModality
from cornac.eval_methods import RatioSplit
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch
import pandas as pd

from cornac.models import FMRec, MF
from cornac.datasets.goodreads import prepare_data

  from .autonotebook import tqdm as notebook_tqdm


FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [3]:
path = '../cornac/datasets/good_reads/good_read_UIR_1000.csv'
fpath_genres = '../cornac/datasets/good_reads/goodreads_genres.csv'
fpath_aspects = '../cornac/datasets/good_reads/uid_aspect_features.txt'
df = pd.read_csv(path, sep='\t', header=0, names=['user_id', 'item_id', 'rating'])
genres = pd.read_csv(fpath_genres)
item_features = np.array([[x,y] for [x,y] in zip(genres['item_id'].to_numpy(), genres['feature'].to_numpy())])
df = df[df['item_id'].isin(genres['item_id'])]
user_aspects = pd.read_csv(fpath_aspects, sep='\t', usecols=['user_id', 'feature'])
user_features = np.array([[x,y] for [x,y] in zip(user_aspects['user_id'].to_numpy(), user_aspects['feature'].to_numpy())])
df = df[df['user_id'].isin(user_aspects['user_id'])]

data = df[['user_id', 'item_id', 'rating']].values
rs = prepare_data(data_name="goodreads_limers", test_size=0.2, dense=True, item=True, user=True, sample_size=0.5, seed=21)
ratio_split = cornac.eval_methods.RatioSplit(data=data, test_size=0.2, val_size = 0.2, verbose=False, item_feature = FeatureModality(item_features), user_feature = FeatureModality(user_features))
metrics = [cornac.metrics.MAE(), cornac.metrics.RMSE(), cornac.metrics.Precision(), cornac.metrics.Recall(), cornac.metrics.NDCG(), cornac.metrics.AUC(), cornac.metrics.MAP()]

In [3]:
mf = MF(k=10, max_iter=500, use_bias=True, learning_rate=0.001)

In [11]:
getattr(mf, "max_iter")

500

In [4]:
fm = FMRec()
getattr(fm, "initial_learning_rate")

0.001

In [4]:
fm = FMRec()
gs_fm = GridSearch(
    model = fm,
    space = [Discrete("initial_learning_rate", [0.001, 0.005, 0.01, 0.05, 0.1]),
             ],
    metric = cornac.metrics.RMSE(),
    eval_method = rs,
)
cornac.Experiment(eval_method=ratio_split, models=[gs_fm], metrics=metrics).run()
print(gs_fm.best_params)

Evaluating: {'initial_learning_rate': 0.001}
Creating validation dataset of 0.01 of training for adaptive regularization
-- Epoch 1
Training MSE: 0.78498
-- Epoch 2
Training MSE: 0.59944
-- Epoch 3
Training MSE: 0.55108
-- Epoch 4
Training MSE: 0.51435
-- Epoch 5
Training MSE: 0.48460
-- Epoch 6
Training MSE: 0.46042
-- Epoch 7
Training MSE: 0.44012
-- Epoch 8
Training MSE: 0.42084
-- Epoch 9
Training MSE: 0.40229
-- Epoch 10
Training MSE: 0.38728
Evaluating: {'initial_learning_rate': 0.005}
Creating validation dataset of 0.01 of training for adaptive regularization
-- Epoch 1
Training MSE: 0.74620
-- Epoch 2
Training MSE: 0.57096
-- Epoch 3
Training MSE: 0.50973
-- Epoch 4
Training MSE: 0.46122
-- Epoch 5
Training MSE: 0.43162
-- Epoch 6
Training MSE: 0.39532
-- Epoch 7
Training MSE: 0.37062
-- Epoch 8
Training MSE: 0.34631
-- Epoch 9
Training MSE: 0.32528
-- Epoch 10
Training MSE: 0.30369
Evaluating: {'initial_learning_rate': 0.01}
Creating validation dataset of 0.01 of training for 

KeyboardInterrupt: 