In [1]:
import numpy as np
import pandas as pd

import cornac
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer
from cornac.data import Reader
from cornac.metrics import MAE,RMSE,MSE,FMeasure,Precision,Recall,NDCG,NCRR,MRR,AUC,MAP

FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [2]:
docs_movielens, item_ids_movielens = cornac.datasets.movielens.load_plot()
feedback_movielens_100K = cornac.datasets.movielens.load_feedback(variant="100K",reader=Reader(item_set=item_ids_movielens))

movielens_item_text_modality = TextModality(
    corpus=docs_movielens,
    ids=item_ids_movielens,
    tokenizer=BaseTokenizer(sep="\t", stop_words="english"),
    max_vocab=8000,
    max_doc_freq=0.5,
)
# Define an evaluation method to split feedback into train and test sets
movieslens100K_ratio_split = RatioSplit(
    data=feedback_movielens_100K,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=movielens_item_text_modality,
    verbose=True,
    seed=123,
    rating_threshold=1,
)

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 943
Number of items = 1518
Number of ratings = 75846
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 941
Number of items = 1279
Number of ratings = 18925
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 943
Total items = 1518


### k=10

In [3]:
# Instantiate HFT model
hft_k10 = cornac.models.HFT(
    k=10,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[hft_k10], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.7453 | 0.9116 | 0.9548 | 0.6619 | 0.0478 | 0.0379 | 0.1399 |  0.0550 |  0.0699 |       0.0383 |    0.0986 |   12.8622 |   1.7481



In [82]:
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[cornac.models.HFT(
    k=10,
    max_iter=50,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/50 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.8056 | 1.1346 | 1.0652 | 0.6329 | 0.0295 | 0.0276 | 0.0643 |  0.0233 |  0.0383 |       0.0241 |    0.0605 |   62.5077 |   2.4544



In [4]:
data1 = {"model":"HFT",
        "data":"movielens-100K",
        "k":10,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.7450,"MSE":0.9100,"RMSE":0.9539,"AUC":0.6618,"F1":0.0476,"MAP":0.0379,"MRR":0.1403,"NCRR":0.0550,"NDCG":0.0696,"Precision":0.0381,"Recall":0.0978,
        "category diversity":0}

In [5]:
from cornac.utils import cache

# Download some information of MovieLens 100K dataset
user_df = pd.read_csv(
  cache("http://files.grouplens.org/datasets/movielens/ml-100k/u.user"),
  sep="|", names=["UserID", "Age", "Gender", "Occupation", "Zip Code"]
).set_index("UserID")

item_df = pd.read_csv(
  cache("http://files.grouplens.org/datasets/movielens/ml-100k/u.item"),
  sep="|", encoding="ISO-8859-1",
  names=["ItemID", "Title", "Release Date", "Video Release Date", "IMDb URL",
         "unknown", "Action", "Adventure", "Animation", "Children's", "Comedy",
         "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror",
         "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"]
).set_index("ItemID").drop(columns=["Video Release Date", "IMDb URL", "unknown"])

In [6]:
import diversity_movielens
def compute_avg_diversity(model):
    for i in range(len(set(movieslens100K_ratio_split.test_set.user_ids))):  # get the average result for all users
        result = []
        r = diversity_movielens.MovielensRetrieval(model=model, item_df=item_df, UIDX=i, TOPK=50)
        d = diversity_movielens.Diversity()
        recommendation = d.compute_dict_distr(r.get_recy())
        history = d.compute_dict_distr(r.get_history())
        result.append(d.compute_kl_divergence(recommendation, history, alpha=0.001))
    return np.mean(result)

In [7]:
data1["category diversity"] = compute_avg_diversity(hft_k10)
data1["category diversity"]

0.19221158137022604

### k=50

In [8]:
# Instantiate HFT model
hft_k50 = cornac.models.HFT(
    k=50,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[hft_k50], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.7570 | 0.9398 | 0.9694 | 0.6559 | 0.0471 | 0.0368 | 0.1273 |  0.0496 |  0.0669 |       0.0374 |    0.0953 |   29.7297 |   1.6939



In [9]:
data2 = {"model":"HFT",
        "data":"movielens-100K",
        "k":50,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.7570,"MSE":0.9397,"RMSE":0.9694,"AUC":0.6559,"F1":0.0470,"MAP":0.0368,"MRR":0.1269,"NCRR":0.0494,"NDCG":0.0668,"Precision":0.0373,"Recall":0.0952,
        "category diversity":compute_avg_diversity(hft_k50)}

In [10]:
data2["category diversity"]

0.19959275849372568

### k=100

In [11]:
# Instantiate HFT model
hft_k100 = cornac.models.HFT(
    k=100,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[hft_k100], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.7634 | 0.9471 | 0.9732 | 0.6596 | 0.0417 | 0.0343 | 0.1045 |  0.0405 |  0.0586 |       0.0329 |    0.0863 |   37.9185 |   1.5414



In [12]:
data3 = {"model":"HFT",
        "data":"movielens-100K",
        "k":100,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.7634,"MSE":0.9471,"RMSE":0.9732,"AUC":0.6595,"F1":0.0417,"MAP":0.0343,"MRR":0.1049,"NCRR":0.0405,"NDCG":0.0586,"Precision":0.0329,"Recall":0.0863,
        "category diversity":compute_avg_diversity(hft_k100)}
data3["category diversity"]

0.16955840437898917

### k=200

In [13]:
# Instantiate HFT model
hft_k200 = cornac.models.HFT(
    k=200,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[hft_k200], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.7498 | 0.9126 | 0.9553 | 0.6565 | 0.0405 | 0.0331 | 0.0907 |  0.0349 |  0.0556 |       0.0312 |    0.0882 |   79.4958 |   2.1174



In [14]:
data4 = {"model":"HFT",
        "data":"movielens-100K",
        "k":200,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.7498,"MSE":0.9127,"RMSE":0.9553,"AUC":0.6565,"F1":0.0406,"MAP":0.0330,"MRR":0.0906,"NCRR":0.0349,"NDCG":0.0556,"Precision":0.0313,"Recall":0.0883,
        "category diversity":compute_avg_diversity(hft_k200)}
data4["category diversity"]

0.15096267166842275

### k=300

In [15]:
# Instantiate HFT model
hft_k300 = cornac.models.HFT(
    k=300,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[hft_k300], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.7539 | 0.9206 | 0.9595 | 0.6577 | 0.0364 | 0.0313 | 0.0848 |  0.0303 |  0.0492 |       0.0279 |    0.0796 |   96.8714 |   2.1165



In [16]:
data5 = {"model":"HFT",
        "data":"movielens-100K",
        "k":300,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.7539,"MSE":0.9206,"RMSE":0.9595,"AUC":0.6577,"F1":0.0366,"MAP":0.0313,"MRR":0.0848,"NCRR":0.0304,"NDCG":0.0493,"Precision":0.0281,"Recall":0.0797,
        "category diversity":compute_avg_diversity(hft_k300)}
data5["category diversity"]

0.1344581931463635

### k=400

In [17]:
# Instantiate HFT model
hft_k400 = cornac.models.HFT(
    k=400,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[hft_k400], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.7471 | 0.9015 | 0.9495 | 0.6598 | 0.0368 | 0.0315 | 0.0721 |  0.0276 |  0.0483 |       0.0283 |    0.0788 |  114.2492 |   1.5766



In [18]:
data6 = {"model":"HFT",
        "data":"movielens-100K",
        "k":400,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.7471,"MSE":0.9015,"RMSE":0.9495,"AUC":0.6598,"F1":0.0367,"MAP":0.0315,"MRR":0.0721,"NCRR":0.0276,"NDCG":0.0482,"Precision":0.0282,"Recall":0.0786,
        "category diversity":compute_avg_diversity(hft_k400)}
data6["category diversity"]

0.15969421280865836

#----------------------------------

### k=300,lambda_text=0.1
 lambda_text: float, default: 0.1
        Weight of corpus likelihood in objective function.

In [19]:
# Instantiate HFT model
hft_lamda_text01 = cornac.models.HFT(
    k=300,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.1,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[hft_lamda_text01], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.7545 | 0.9214 | 0.9599 | 0.6577 | 0.0367 | 0.0312 | 0.0824 |  0.0298 |  0.0494 |       0.0281 |    0.0803 |   97.3042 |   1.4463



In [20]:
data7 = {"model":"HFT",
        "data":"movielens-100K",
        "k":300,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.1,
        "l2_reg":0.001,
        "MAE":0.7544,"MSE":0.9213,"RMSE":0.9599,"AUC":0.6576,"F1":0.0364,"MAP":0.0311,"MRR":0.0825,"NCRR":0.0297,"NDCG":0.0489,"Precision":0.0279,"Recall":0.0793,
        "category diversity":compute_avg_diversity(hft_lamda_text01)}
data7["category diversity"]

0.1344581931463635

### k=300, lambda_text=0.001

In [21]:
# Instantiate HFT model
hft_lamda_text001 = cornac.models.HFT(
    k=300,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.001,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[hft_lamda_text001], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.7539 | 0.9206 | 0.9595 | 0.6577 | 0.0365 | 0.0313 | 0.0849 |  0.0304 |  0.0493 |       0.0280 |    0.0796 |  100.5494 |   1.5014



In [22]:
data8 = {"model":"HFT",
        "data":"movielens-100K",
        "k":300,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.001,
        "l2_reg":0.001,
        "MAE":0.7539,"MSE":0.9206,"RMSE":0.9595,"AUC":0.6577,"F1":0.0365,"MAP":0.0313,"MRR":0.0850,"NCRR":0.0304,"NDCG":0.0493,"Precision":0.0280,"Recall":0.0795,
        "category diversity":compute_avg_diversity(hft_lamda_text001)}
data8["category diversity"]

0.1344581931463635

### k=300, lamda_text=0.0001

In [23]:
# Instantiate HFT model
hft_lamda_text0001 = cornac.models.HFT(
    k=300,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.0001,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[hft_lamda_text0001], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.7539 | 0.9206 | 0.9595 | 0.6577 | 0.0365 | 0.0313 | 0.0851 |  0.0305 |  0.0493 |       0.0280 |    0.0796 |   96.1422 |   1.6178



In [24]:
data9 = {"model":"HFT",
        "data":"movielens-100K",
        "k":300,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.0001,
        "l2_reg":0.001,
        "MAE":0.7539,"MSE":0.9206,"RMSE":0.9595,"AUC":0.6577,"F1":0.0365,"MAP":0.0313,"MRR":0.0851,"NCRR":0.0304,"NDCG":0.0493,"Precision":0.0280,"Recall":0.0796,
        "category diversity":compute_avg_diversity(hft_lamda_text0001)}
data9["category diversity"]

0.1344581931463635

### k=200, lambda_text=0.001

In [25]:
# Instantiate HFT model
hft_lamda_text001_k200 = cornac.models.HFT(
    k=200,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.001,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[hft_lamda_text001_k200], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.7498 | 0.9127 | 0.9553 | 0.6565 | 0.0405 | 0.0331 | 0.0913 |  0.0351 |  0.0557 |       0.0312 |    0.0882 |   83.3049 |   1.7585



In [26]:
data10 = {"model":"HFT",
        "data":"movielens-100K",
        "k":200,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.001,
        "l2_reg":0.001,
        "MAE":0.7498,"MSE":0.9127,"RMSE":0.9553,"AUC":0.6565,"F1":0.0405,"MAP":0.0331,"MRR":0.0913,"NCRR":0.0351,"NDCG":0.0557,"Precision":0.0312,"Recall":0.0881,
        "category diversity":compute_avg_diversity(hft_lamda_text001_k200)}
data10["category diversity"]

0.15096267166842275

### k=300, lambda_text=0.001, l2_reg=0.01
l2_reg: float, default: 0.001
        Regularization for user item latent factors.

In [27]:
# Instantiate HFT model
hft_l2001_k300 = cornac.models.HFT(
    k=300,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.01,
    lambda_text=0.001,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=movieslens100K_ratio_split, models=[hft_l2001_k300], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/18925 [00:00<?, ?it/s]

Ranking:   0%|          | 0/941 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.7538 | 0.9204 | 0.9594 | 0.6577 | 0.0365 | 0.0313 | 0.0849 |  0.0304 |  0.0493 |       0.0280 |    0.0794 |  101.4304 |   1.8322



In [28]:
data11 = {"model": "HFT",
          "data": "movielens-100K",
          "k": 300,
          "max_iter": 10,
          "grad_iter": 5,
          "lambda_text": 0.001,
          "l2_reg": 0.01,
          "MAE": 0.7538, "MSE": 0.9204, "RMSE": 0.9594, "AUC": 0.6577, "F1": 0.0364, "MAP": 0.0313, "MRR": 0.0849,
          "NCRR": 0.0304, "NDCG": 0.0492, "Precision": 0.0279, "Recall": 0.0794,
          "category diversity": compute_avg_diversity(hft_l2001_k300)}
data11["category diversity"]

0.1344581931463635

In [29]:
data = [data1,data2,data3,data4,data5,data6,data7,data8,data9,data10,data11]
df = pd.DataFrame(columns = data1.keys())
i=0
for d in data:
    df = pd.concat([df, pd.DataFrame(d,columns = list(d.keys()),index=[i])], ignore_index=False)
    i += 1

In [30]:
df

Unnamed: 0,model,data,k,max_iter,grad_iter,lambda_text,l2_reg,MAE,MSE,RMSE,AUC,F1,MAP,MRR,NCRR,NDCG,Precision,Recall,category diversity
0,HFT,movielens-100K,10,10,5,0.01,0.001,0.745,0.91,0.9539,0.6618,0.0476,0.0379,0.1403,0.055,0.0696,0.0381,0.0978,0.192212
1,HFT,movielens-100K,50,10,5,0.01,0.001,0.757,0.9397,0.9694,0.6559,0.047,0.0368,0.1269,0.0494,0.0668,0.0373,0.0952,0.199593
2,HFT,movielens-100K,100,10,5,0.01,0.001,0.7634,0.9471,0.9732,0.6595,0.0417,0.0343,0.1049,0.0405,0.0586,0.0329,0.0863,0.169558
3,HFT,movielens-100K,200,10,5,0.01,0.001,0.7498,0.9127,0.9553,0.6565,0.0406,0.033,0.0906,0.0349,0.0556,0.0313,0.0883,0.150963
4,HFT,movielens-100K,300,10,5,0.01,0.001,0.7539,0.9206,0.9595,0.6577,0.0366,0.0313,0.0848,0.0304,0.0493,0.0281,0.0797,0.134458
5,HFT,movielens-100K,400,10,5,0.01,0.001,0.7471,0.9015,0.9495,0.6598,0.0367,0.0315,0.0721,0.0276,0.0482,0.0282,0.0786,0.159694
6,HFT,movielens-100K,300,10,5,0.1,0.001,0.7544,0.9213,0.9599,0.6576,0.0364,0.0311,0.0825,0.0297,0.0489,0.0279,0.0793,0.134458
7,HFT,movielens-100K,300,10,5,0.001,0.001,0.7539,0.9206,0.9595,0.6577,0.0365,0.0313,0.085,0.0304,0.0493,0.028,0.0795,0.134458
8,HFT,movielens-100K,300,10,5,0.0001,0.001,0.7539,0.9206,0.9595,0.6577,0.0365,0.0313,0.0851,0.0304,0.0493,0.028,0.0796,0.134458
9,HFT,movielens-100K,200,10,5,0.001,0.001,0.7498,0.9127,0.9553,0.6565,0.0405,0.0331,0.0913,0.0351,0.0557,0.0312,0.0881,0.150963


In [31]:
df.to_csv('/Users/pigr/Desktop/cornac/cornac/dataset/hft_movielens_100K.csv',header=-1,index=0)