In [1]:
import cornac
from cornac.data import Reader
from cornac.metrics import *
from cornac.models import *
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer

  from .autonotebook import tqdm as notebook_tqdm


FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [2]:
from cornac.datasets import amazon_clothing

In [3]:
docs, item_ids = amazon_clothing.load_text()
feedback = amazon_clothing.load_feedback(reader=Reader(item_set=item_ids))

item_text_modality = TextModality(
    corpus=docs,
    ids=item_ids,
    tokenizer=BaseTokenizer(stop_words="english"),
    max_vocab=8000,
    max_doc_freq=0.5,
)

ratio_split = RatioSplit(
    data=feedback,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality,
    verbose=True,
    seed=123,
    rating_threshold=0.5,
)

rating_threshold = 0.5
exclude_unknowns = True
---
Training data:
Number of users = 5081
Number of items = 3326
Number of ratings = 10951
Max rating = 5.0
Min rating = 1.0
Global mean = 4.3
---
Test data:
Number of users = 1868
Number of items = 1453
Number of ratings = 2200
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 5081
Total items = 3326


In [4]:
cdl2=CDL(k=50, autoencoder_structure=[200], max_iter=30, lambda_u=0.1, lambda_v=1, lambda_w=0.1, lambda_n=1000, seed=123),
cdr2=CDR(k=50, autoencoder_structure=[200], max_iter=100, batch_size=128, lambda_u=0.01, lambda_v=0.1, 
        lambda_w=0.0001, lambda_n=5, learning_rate=0.001, vocab_size=8000, seed=123),
cvae2=CVAE(z_dim=50, vae_layers=[200, 100], act_fn="sigmoid", input_dim=8000, lr=0.001, batch_size=128, n_epochs=100, 
        lambda_u=1e-4, lambda_v=0.001, lambda_r=10, lambda_w=1e-4, seed=123, verbose=True),
convmf2=ConvMF(n_epochs=5, verbose=True, seed=123),
hft2= HFT(k=10, max_iter=40, grad_iter=5, l2_reg=0.001, lambda_text=0.01, vocab_size=5000, seed=123,),
ctr2= CTR(k=50, max_iter=50, lambda_v=1)

In [5]:
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=100),Precision(k=100), 
           Recall(k=100), NDCG(k=100), NCRR(k=100),
           MRR(),AUC(), MAP()]

In [6]:
cornac.Experiment(eval_method=ratio_split, models=cdr2, metrics=metrics).run()


[CDR] Training started!


100%|██████████| 100/100 [05:12<00:00,  3.12s/it, loss=0.253]



Learning completed

[CDR] Evaluation started!


Rating: 100%|██████████| 2200/2200 [00:00<00:00, 157966.73it/s]
Ranking: 100%|██████████| 1868/1868 [00:24<00:00, 75.56it/s]


TEST:
...
    |    MAE |     MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------- + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CDR | 3.3612 | 12.3332 | 3.3723 | 0.6358 | 0.0051 | 0.0350 | 0.0384 |   0.0350 |   0.0735 |        0.0026 |     0.2216 |  315.6037 |  25.0023






In [6]:
cornac.Experiment(eval_method=ratio_split, models=cdl2, metrics=metrics).run()


[CDL] Training started!


100%|██████████| 30/30 [01:07<00:00,  2.27s/it, loss=86.3]


Learning completed!

[CDL] Evaluation started!


Rating: 100%|██████████| 2200/2200 [00:00<00:00, 80380.05it/s]
Ranking: 100%|██████████| 1868/1868 [01:05<00:00, 28.68it/s]


TEST:
...
    |    MAE |     MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------- + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CDL | 3.3398 | 12.1928 | 3.3509 | 0.5612 | 0.0028 | 0.0182 | 0.0200 |   0.0177 |   0.0382 |        0.0014 |     0.1203 |   71.7805 |  65.5388






In [8]:
cornac.Experiment(eval_method=ratio_split, models=convmf2, metrics=metrics).run()


[ConvMF] Training started!
Epoch: 1/5


Optimizing CNN: 100%|██████████| 5/5 [00:30<00:00,  6.18s/it]


Loss: 248967404.59891 Elapsed: 1381.4809s Converge: 24896740459890809484445391537260762315118518428617962160128.000000 
Epoch: 2/5


Optimizing CNN: 100%|██████████| 5/5 [00:29<00:00,  5.87s/it]


Loss: 25294451.26359 Elapsed: 396.4422s Converge: 0.898403 
Epoch: 3/5


Optimizing CNN: 100%|██████████| 5/5 [00:30<00:00,  6.11s/it]


Loss: 6994744.01622 Elapsed: 1037.9017s Converge: 0.723467 
Epoch: 4/5


Optimizing CNN: 100%|██████████| 5/5 [00:28<00:00,  5.75s/it]


Loss: 3423946.51071 Elapsed: 468.6821s Converge: 0.510497 
Epoch: 5/5


Optimizing CNN: 100%|██████████| 5/5 [00:28<00:00,  5.65s/it]


Loss: 2038480.93013 Elapsed: 377.1767s Converge: 0.404640 

[ConvMF] Evaluation started!


Rating: 100%|██████████| 2200/2200 [00:00<00:00, 174656.81it/s]
Ranking: 100%|██████████| 1868/1868 [00:14<00:00, 125.35it/s]


TEST:
...
       |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
------ + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
ConvMF | 1.6760 | 3.6114 | 1.6874 | 0.5990 | 0.0033 | 0.0192 | 0.0222 |   0.0187 |   0.0410 |        0.0016 |     0.1427 | 3663.8297 |  15.1968






In [9]:
cornac.Experiment(eval_method=ratio_split, models=cvae2, metrics=metrics).run()


[CVAE] Training started!


100%|██████████| 100/100 [01:47<00:00,  1.08s/it, cf_loss=0.123, vae_loss=0.11] 



[CVAE] Evaluation started!


Rating: 100%|██████████| 2200/2200 [00:00<00:00, 193269.71it/s]
Ranking: 100%|██████████| 1868/1868 [00:19<00:00, 94.57it/s] 


TEST:
...
     |    MAE |     MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
---- + ------ + ------- + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CVAE | 3.2287 | 11.5556 | 3.2404 | 0.5778 | 0.0039 | 0.0275 | 0.0297 |   0.0271 |   0.0562 |        0.0020 |     0.1670 |  108.2325 |  20.0459






In [13]:
item_text_modality_hft = TextModality(
    corpus=docs,
    ids=item_ids,
    tokenizer=BaseTokenizer(stop_words="english"),
    max_vocab=5000,
    max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
ratio_split_hft = RatioSplit(
    data=feedback,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality_hft,
    verbose=True,
    seed=123,
)

cornac.Experiment(eval_method=ratio_split_hft, models=hft2, metrics=metrics).run()

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 5081
Number of items = 3326
Number of ratings = 10951
Max rating = 5.0
Min rating = 1.0
Global mean = 4.3
---
Test data:
Number of users = 1868
Number of items = 1453
Number of ratings = 2200
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 5081
Total items = 3326

[HFT] Training started!


100%|██████████| 40/40 [02:05<00:00,  3.13s/it, loss=9.63e+3]


Learning completed!

[HFT] Evaluation started!


Rating: 100%|██████████| 2200/2200 [00:00<00:00, 66798.43it/s]
Ranking: 100%|██████████| 1868/1868 [01:11<00:00, 26.08it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
HFT | 0.7933 | 1.3676 | 0.8112 | 0.5015 | 0.0006 | 0.0025 | 0.0031 |   0.0017 |   0.0056 |        0.0003 |     0.0248 |  125.7353 |  72.4770






In [12]:
cornac.Experiment(eval_method=ratio_split, models=[ctr2], metrics=metrics).run()


[CTR] Training started!


100%|██████████| 50/50 [06:51<00:00,  8.24s/it, cf_loss=2.41e+4, lda_likelihood=-1.64e+5]


Learning completed!

[CTR] Evaluation started!


Rating: 100%|██████████| 2200/2200 [00:00<00:00, 152386.65it/s]
Ranking: 100%|██████████| 1868/1868 [00:04<00:00, 402.21it/s]


TEST:
...
    |    MAE |     MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------- + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CTR | 3.0342 | 10.5714 | 3.0492 | 0.6643 | 0.0060 | 0.0517 | 0.0560 |   0.0519 |   0.0945 |        0.0031 |     0.2609 |  411.9780 |   4.9438




