In [1]:
import cornac
from cornac.data import Reader
from cornac.metrics import *
from cornac.models import *
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer

  from .autonotebook import tqdm as notebook_tqdm


FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [2]:
import numpy as np
import pandas as pd
import random

In [3]:
mind = pd.read_csv('mind_algorithm_test.csv')

mind_feedback = mind.loc[:, ['user_id','item_id','rating']]
# feedback = cornac.data.Dataset.from_uir(mind_feedback.itertuples(index=False))
feedback = mind_feedback.apply(lambda x: tuple(x), axis=1).values.tolist()
text = list(mind['text'])
item_ids = list(mind['item_id'])

item_text_modality = TextModality(
    corpus=text,
    ids=item_ids,
    tokenizer=BaseTokenizer(stop_words="english"),
    max_vocab=8000,
    max_doc_freq=0.5,
)

ratio_split = RatioSplit(
    data=feedback,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality,
    verbose=True,
    seed=123,
    rating_threshold=0.5,
)

rating_threshold = 0.5
exclude_unknowns = True
---
Training data:
Number of users = 181
Number of items = 3954
Number of ratings = 7923
Max rating = 1.0
Min rating = 0.0
Global mean = 0.7
---
Test data:
Number of users = 169
Number of items = 935
Number of ratings = 1364
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 181
Total items = 3954




In [4]:
cdl2=CDL(k=50, autoencoder_structure=[200], max_iter=30, lambda_u=0.1, lambda_v=1, lambda_w=0.1, lambda_n=1000, seed=123),
cdr2=CDR(k=50, autoencoder_structure=[200], max_iter=100, batch_size=128, lambda_u=0.01, lambda_v=0.1, 
        lambda_w=0.0001, lambda_n=5, learning_rate=0.001, vocab_size=8000, seed=123),
cvae2=CVAE(z_dim=50, vae_layers=[200, 100], act_fn="sigmoid", input_dim=8000, lr=0.001, batch_size=128, n_epochs=100, 
        lambda_u=1e-4, lambda_v=0.001, lambda_r=10, lambda_w=1e-4, seed=123, verbose=True),
convmf2=ConvMF(n_epochs=5, verbose=True, seed=123),
hft2= HFT(k=10, max_iter=40, grad_iter=5, l2_reg=0.001, lambda_text=0.01, vocab_size=5000, seed=123,),
ctr2= CTR(k=50, max_iter=50, lambda_v=1)

In [5]:
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=100),Precision(k=100), 
           Recall(k=100), NDCG(k=100), NCRR(k=100),
           MRR(),AUC(), MAP()]

In [6]:
cornac.Experiment(eval_method=ratio_split, models=cdl2, metrics=metrics).run()


[CDL] Training started!


100%|██████████| 30/30 [01:05<00:00,  2.17s/it, loss=22.2]


Learning completed!

[CDL] Evaluation started!


Rating: 100%|██████████| 1364/1364 [00:00<00:00, 73012.37it/s]
Ranking: 100%|██████████| 169/169 [00:03<00:00, 46.42it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CDL | 0.5090 | 0.4215 | 0.5988 | 0.7058 | 0.0126 | 0.0139 | 0.0481 |   0.0206 |   0.0516 |        0.0068 |     0.1520 |   70.8186 |   3.7192






In [6]:
cornac.Experiment(eval_method=ratio_split, models=cdr2, metrics=metrics).run()


[CDR] Training started!


  0%|          | 0/100 [00:00<?, ?it/s]

In [12]:
cornac.Experiment(eval_method=ratio_split, models=convmf2, metrics=metrics).run()


[ConvMF] Training started!
Epoch: 1/5


Optimizing CNN: 100%|██████████| 5/5 [00:29<00:00,  5.84s/it]


Loss: 388202488.99806 Elapsed: 31.6717s Converge: 38820248899805946442067103106938299449386138131819904630784.000000 
Epoch: 2/5


Optimizing CNN: 100%|██████████| 5/5 [00:28<00:00,  5.73s/it]


Loss: 16280436.57963 Elapsed: 31.1473s Converge: 0.958062 
Epoch: 3/5


Optimizing CNN: 100%|██████████| 5/5 [00:29<00:00,  5.98s/it]


Loss: 2597326.56804 Elapsed: 32.3029s Converge: 0.840463 
Epoch: 4/5


Optimizing CNN: 100%|██████████| 5/5 [00:29<00:00,  5.98s/it]


Loss: 608228.72330 Elapsed: 32.3779s Converge: 0.765825 
Epoch: 5/5


Optimizing CNN: 100%|██████████| 5/5 [00:30<00:00,  6.17s/it]


Loss: 343590.30987 Elapsed: 33.3908s Converge: 0.435097 

[ConvMF] Evaluation started!


Rating: 100%|██████████| 1364/1364 [00:00<00:00, 124817.95it/s]
Ranking: 100%|██████████| 169/169 [00:00<00:00, 453.06it/s]


TEST:
...
       |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
------ + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
ConvMF | 0.3375 | 0.1580 | 0.3663 | 0.6454 | 0.0058 | 0.0094 | 0.0234 |   0.0107 |   0.0244 |        0.0032 |     0.0614 |  162.9940 |   0.4162






In [11]:
cornac.Experiment(eval_method=ratio_split, models=cvae2, metrics=metrics).run()


[CVAE] Training started!


100%|██████████| 100/100 [01:06<00:00,  1.51it/s, cf_loss=0.000354, vae_loss=0.133]



[CVAE] Evaluation started!


Rating: 100%|██████████| 1364/1364 [00:00<00:00, 118516.54it/s]
Ranking: 100%|██████████| 169/169 [00:00<00:00, 806.69it/s]


TEST:
...
     |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
---- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CVAE | 0.5643 | 0.5192 | 0.6669 | 0.5874 | 0.0096 | 0.0085 | 0.0225 |   0.0101 |   0.0332 |        0.0052 |     0.0932 |   67.0465 |   0.2534






In [9]:
item_text_modality_hft = TextModality(
    corpus=text,
    ids=item_ids,
    tokenizer=BaseTokenizer(stop_words="english"),
    max_vocab=5000,
    max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
ratio_split_hft = RatioSplit(
    data=feedback,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality_hft,
    verbose=True,
    seed=123,
)

cornac.Experiment(eval_method=ratio_split_hft, models=hft2, metrics=metrics).run()

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 181
Number of items = 3954
Number of ratings = 7923
Max rating = 1.0
Min rating = 0.0
Global mean = 0.7
---
Test data:
Number of users = 169
Number of items = 935
Number of ratings = 1364
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 181
Total items = 3954





[HFT] Training started!


100%|██████████| 40/40 [06:46<00:00, 10.17s/it, loss=5.27e+4]


Learning completed!

[HFT] Evaluation started!


Rating: 100%|██████████| 1364/1364 [00:00<00:00, 45070.20it/s]
Ranking: 100%|██████████| 169/169 [00:04<00:00, 41.04it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
HFT | 0.0551 | 0.0160 | 0.0830 | 0.6251 | 0.0017 | 0.0032 | 0.0075 |   0.0022 |   0.0044 |        0.0010 |     0.0080 |  410.0649 |   4.1844






In [14]:
cornac.Experiment(eval_method=ratio_split, models=[ctr2], metrics=metrics).run()


[CTR] Training started!


100%|██████████| 50/50 [00:57<00:00,  1.14s/it, cf_loss=180, lda_likelihood=-1.81e+6]


Learning completed!

[CTR] Evaluation started!


Rating: 100%|██████████| 1364/1364 [00:00<00:00, 124372.94it/s]
Ranking: 100%|██████████| 169/169 [00:00<00:00, 653.14it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC | F1@100 |    MAP |    MRR | NCRR@100 | NDCG@100 | Precision@100 | Recall@100 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + -------- + -------- + ------------- + ---------- + --------- + --------
CTR | 0.5469 | 0.4689 | 0.6360 | 0.6310 | 0.0097 | 0.0108 | 0.0388 |   0.0166 |   0.0370 |        0.0053 |     0.1022 |   57.0324 |   0.3014




