In [1]:
import cornac
from cornac.data import Reader
from cornac.metrics import *
from cornac.models import *
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer

  from .autonotebook import tqdm as notebook_tqdm


FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [2]:
from cornac.datasets import movielens

In [3]:
plots, movie_ids = movielens.load_plot()
ml_1m = movielens.load_feedback(variant="100K", reader=Reader(item_set=movie_ids))

item_text_modality_mov = TextModality(
    corpus=plots,
    ids=movie_ids,
    tokenizer=BaseTokenizer(sep="\t", stop_words="english"),
    max_vocab=8000,
    max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
ratio_split = RatioSplit(
    data=ml_1m,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality_mov,
    verbose=True,
    seed=123,
)

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 943
Number of items = 1518
Number of ratings = 75846
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 941
Number of items = 1279
Number of ratings = 18925
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 943
Total items = 1518


In [4]:
cdl2=CDL(k=50, autoencoder_structure=[200], max_iter=30, lambda_u=0.1, lambda_v=1, lambda_w=0.1, lambda_n=1000, seed=123),
cdr2=CDR(k=50, autoencoder_structure=[200], max_iter=100, batch_size=128, lambda_u=0.01, lambda_v=0.1, 
        lambda_w=0.0001, lambda_n=5, learning_rate=0.001, vocab_size=8000, seed=123),
cvae2=CVAE(z_dim=50, vae_layers=[200, 100], act_fn="sigmoid", input_dim=8000, lr=0.001, batch_size=128, n_epochs=100, 
        lambda_u=1e-4, lambda_v=0.001, lambda_r=10, lambda_w=1e-4, seed=123, verbose=True),
convmf2=ConvMF(n_epochs=5, verbose=True, seed=123),
hft2= HFT(k=10, max_iter=40, grad_iter=5, l2_reg=0.001, lambda_text=0.01, vocab_size=5000, seed=123,),
ctr2= CTR(k=50, max_iter=50, lambda_v=1)

In [5]:
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=100),Precision(k=100), 
           Recall(k=100), NDCG(k=100), NCRR(k=100),
           MRR(),AUC(), MAP()]

In [6]:
cornac.Experiment(eval_method=ratio_split, models=cdr2, metrics=metrics).run()


[CDR] Training started!


100%|██████████| 100/100 [1:02:15<00:00, 37.36s/it, loss=0.216]



Learning completed

[CDR] Evaluation started!


Rating: 100%|██████████| 18925/18925 [00:00<00:00, 151246.53it/s]
Ranking: 100%|██████████| 941/941 [00:28<00:00, 32.79it/s]



TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CDR | 2.5596 | 7.8347 | 2.7508 | 0.9246 | 0.1129 | 0.1011 | 0.2499 |   0.1366 |   0.2657 |        0.0694 |     0.5263 | 3739.0460 |  29.0770



In [6]:
cornac.Experiment(eval_method=ratio_split, models=cdl2, metrics=metrics).run()


[CDL] Training started!


100%|██████████| 30/30 [00:25<00:00,  1.16it/s, loss=278]


Learning completed!

[CDL] Evaluation started!


Rating: 100%|██████████| 18925/18925 [00:00<00:00, 134500.47it/s]
Ranking: 100%|██████████| 941/941 [00:31<00:00, 29.89it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CDL | 2.0310 | 5.2220 | 2.2230 | 0.6381 | 0.0163 | 0.0207 | 0.0481 |   0.0158 |   0.0297 |        0.0110 |     0.0570 |   29.7460 |  31.8660






In [8]:
cornac.Experiment(eval_method=ratio_split, models=convmf2, metrics=metrics).run()


[ConvMF] Training started!
Epoch: 1/5


Optimizing CNN: 100%|██████████| 5/5 [00:20<00:00,  4.04s/it]


Loss: 76477613.93485 Elapsed: 1233.4026s Converge: 7647761393484657650470194882706226995015217393946396196864.000000 
Epoch: 2/5


Optimizing CNN: 100%|██████████| 5/5 [00:19<00:00,  3.96s/it]


Loss: 12291202.65535 Elapsed: 920.8792s Converge: 0.839284 
Epoch: 3/5


Optimizing CNN: 100%|██████████| 5/5 [00:12<00:00,  2.42s/it]


Loss: 8140442.92045 Elapsed: 509.1805s Converge: 0.337702 
Epoch: 4/5


Optimizing CNN: 100%|██████████| 5/5 [00:12<00:00,  2.47s/it]


Loss: 6313336.86836 Elapsed: 16.5689s Converge: 0.224448 
Epoch: 5/5


Optimizing CNN: 100%|██████████| 5/5 [00:12<00:00,  2.58s/it]


Loss: 5418359.53992 Elapsed: 32.0712s Converge: 0.141760 

[ConvMF] Evaluation started!


Rating: 100%|██████████| 18925/18925 [00:00<00:00, 170373.91it/s]
Ranking: 100%|██████████| 941/941 [00:04<00:00, 190.25it/s]


TEST:
...
       |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
------ + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
ConvMF | 0.7613 | 0.9197 | 0.9125 | 0.7216 | 0.0555 | 0.0426 | 0.1240 |   0.0572 |   0.1124 |        0.0359 |     0.2120 | 2713.3824 |   5.3125






In [9]:
cornac.Experiment(eval_method=ratio_split, models=cvae2, metrics=metrics).run()


[CVAE] Training started!


100%|██████████| 100/100 [00:30<00:00,  3.23it/s, cf_loss=0.653, vae_loss=0.11]



[CVAE] Evaluation started!


Rating: 100%|██████████| 18925/18925 [00:00<00:00, 155416.07it/s]
Ranking: 100%|██████████| 941/941 [00:05<00:00, 170.22it/s]


TEST:
...
     |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
---- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CVAE | 0.9359 | 1.3515 | 1.1124 | 0.8800 | 0.1005 | 0.0871 | 0.2407 |   0.1257 |   0.2301 |        0.0636 |     0.4413 |   31.5003 |   5.8135






In [11]:
item_text_modality_hft = TextModality(
    corpus=plots,
    ids=movie_ids,
    tokenizer=BaseTokenizer(sep="\t", stop_words="english"),
    max_vocab=5000,
    max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
ratio_split_hft = RatioSplit(
    data=ml_1m,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality_hft,
    verbose=True,
    seed=123,
)

cornac.Experiment(eval_method=ratio_split_hft, models=hft2, metrics=metrics).run()

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 943
Number of items = 1518
Number of ratings = 75846
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 941
Number of items = 1279
Number of ratings = 18925
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 943
Total items = 1518

[HFT] Training started!


100%|██████████| 40/40 [01:04<00:00,  1.61s/it, loss=3.91e+4]


Learning completed!

[HFT] Evaluation started!


Rating: 100%|██████████| 18925/18925 [00:00<00:00, 81733.16it/s]
Ranking: 100%|██████████| 941/941 [00:43<00:00, 21.54it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
HFT | 0.8675 | 1.3161 | 1.0756 | 0.6333 | 0.0377 | 0.0280 | 0.0664 |   0.0287 |   0.0650 |        0.0252 |     0.1277 |   64.4745 |  44.1675






In [7]:
cornac.Experiment(eval_method=ratio_split, models=[ctr2], metrics=metrics).run()


[CTR] Training started!


100%|██████████| 50/50 [00:29<00:00,  1.72it/s, cf_loss=1.62e+4, lda_likelihood=-2.16e+5]


Learning completed!

[CTR] Evaluation started!


Rating: 100%|██████████| 18925/18925 [00:00<00:00, 152932.86it/s]
Ranking: 100%|██████████| 941/941 [00:01<00:00, 899.54it/s] 


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CTR | 1.4575 | 3.2520 | 1.7390 | 0.8564 | 0.0978 | 0.0861 | 0.3097 |   0.1529 |   0.2323 |        0.0618 |     0.4282 |   29.1476 |   1.3404




