In [1]:
import numpy as np
import pandas as pd
from scipy.stats import entropy
import matplotlib.pyplot as plt
import random

import cornac
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer
from cornac.data import Reader
from cornac.metrics import MAE,RMSE,MSE,FMeasure,Precision,Recall,NDCG,NCRR,MRR,AUC,MAP

FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [3]:
with open ('/Users/pigr/Desktop/cornac_dataset/mind_test_200K.csv') as mind_file:
    mind = pd.read_csv(mind_file)
mind['story'] = mind['story'].astype('int')

In [4]:
mind_ = mind[:10000]
len(mind_)

10000

In [5]:
mind_.head()

Unnamed: 0,user_id,item_id,rating,date,category,url,text,entities,sentiment,complexity,publication_date,entities_base,enriched_entities,story
0,U13740,N55189,1,2019-10-15,tv,https://assets.msn.com/labs/mind/AAIORni.html,"We’d like to solve the puzzle, Pat: Blair Davi...","[{'text': 'Cardiff', 'alternative': ['Cardiff'...",0.091856,62.17,1571098000000.0,"[{'text': 'Pat', 'start_char': 31, 'end_char':...","[{'text': 'Cardiff', 'alternative': ['Cardiff'...",766
1,U13740,N42782,1,2019-10-19,sports,https://assets.msn.com/labs/mind/AAJ1mE9.html,The Yankees forced a Game 6 on Friday as it to...,"[{'text': 'second', 'alternative': ['second'],...",0.028105,73.92,1571443000000.0,"[{'text': 'Yankees', 'start_char': 4, 'end_cha...","[{'text': 'second', 'alternative': ['second'],...",444
2,U13740,N34694,1,2019-10-19,tv,https://assets.msn.com/labs/mind/AAIZa64.html,A living legend. Rosie O’Donnell gave Us Weekl...,"[{'text': '90', 'alternative': ['90'], 'freque...",0.330846,81.63,1571443000000.0,"[{'text': 'Us Weekly', 'start_char': 38, 'end_...","[{'text': '90', 'alternative': ['90'], 'freque...",1331
3,U13740,N45794,1,2019-10-23,news,https://assets.msn.com/labs/mind/AAJcQKF.html,Four American Airlines flight attendants were ...,"[{'text': 'Four', 'alternative': ['Four'], 'fr...",0.044328,61.26,1571789000000.0,"[{'text': 'Four', 'start_char': 0, 'end_char':...","[{'text': 'Four', 'alternative': ['Four'], 'fr...",0
4,U13740,N18445,1,2019-10-27,sports,https://assets.msn.com/labs/mind/AAJpMXE.html,Michigan sent a funny tweet on Saturday night ...,"[{'text': 'two', 'alternative': ['two'], 'freq...",0.166667,90.29,1572134000000.0,"[{'text': 'Michigan', 'start_char': 0, 'end_ch...","[{'text': 'two', 'alternative': ['two'], 'freq...",0


In [6]:
# pre-process data to initialize ctr model
mind_feedback = mind_.loc[:, ['user_id','item_id','rating']]
# feedback = cornac.data.Dataset.from_uir(mind_feedback.itertuples(index=False))
feedback = mind_feedback.apply(lambda x: tuple(x), axis=1).values.tolist()
text = list(mind_['text'])
item_ids = list(mind_['item_id'])
# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
item_text_modality = TextModality(
    corpus=text,
    ids=item_ids,
    tokenizer=BaseTokenizer(sep=" ", stop_words="english"),
    max_vocab=8000,
    max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
mind_ratio_split = RatioSplit(
    data=feedback,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality,
    verbose=True,
    seed=123,
    rating_threshold=0.5,
)

rating_threshold = 0.5
exclude_unknowns = True
---
Training data:
Number of users = 272
Number of items = 3942
Number of ratings = 7911
Max rating = 1.0
Min rating = 1.0
Global mean = 1.0
---
Test data:
Number of users = 234
Number of items = 915
Number of ratings = 1336
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 272
Total items = 3942




### k=10

In [7]:
# Instantiate HFT model
hft_k10 = cornac.models.HFT(
    k=10,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[hft_k10], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.0000 | 0.0000 | 0.0000 | 0.5058 | 0.0026 | 0.0054 | 0.0217 |  0.0073 |  0.0088 |       0.0015 |    0.0157 |   47.7344 |   0.4496



In [8]:
import diversity_mind
def compute_avg_diversity(model, data, feature):
    if (feature=="category"):
        for i in range(len(set(mind_ratio_split.test_set.user_ids))): # get the average result for all users from training data
            result = []
            r = diversity_mind.Retrieval(model=model,data = data,UIDX = i,TOPK = 50,feature = feature)
            d = diversity_mind.Diversity(r.get_history(), r.get_recy())
            result.append(d.compute())
    else:
        useridx = list(set([int(tuple[1]) for tuple in mind_ratio_split.test_set.uid_map]))
        result = []
        i = 0
        while i <= 200:
            # shuffle
            user_pair = random.sample(useridx, 2)
            r0 = diversity_mind.Retrieval(model=model, data=data, UIDX=user_pair[0],TOPK = 50, feature=feature)
            r1 = diversity_mind.Retrieval(model=model, data=data, UIDX=user_pair[1],TOPK = 50, feature=feature)
            d = diversity_mind.Diversity(r0.get_recy(), r1.get_recy())
            result.append(d.compute())
            i += 1
    return np.mean(result)

In [9]:
data1 = {"model":"HFT",
        "data":"mind",
        "k":10,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5047,"F1":0.0030,"MAP":0.0035,"MRR":0.0091,"NCRR":0.0026,"NDCG":0.0075,"Precision":0.0017,"Recall":0.0236,
        "category diversity":compute_avg_diversity(hft_k10, mind_, "category"),
         "story diversity":compute_avg_diversity(hft_k10, mind_, "story")}
print(data1["category diversity"])
print(data1["story diversity"])

0.16396560474705105
0.9045115577052535


### k=50

In [10]:
# Instantiate HFT model
hft_k50 = cornac.models.HFT(
    k=50,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[hft_k50], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.0000 | 0.0000 | 0.0000 | 0.5085 | 0.0022 | 0.0032 | 0.0065 |  0.0017 |  0.0047 |       0.0014 |    0.0130 |   70.8497 |   0.5839



In [13]:
data2 = {"model":"HFT",
        "data":"mind",
        "k":50,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.4918,"F1":0.0008,"MAP":0.0026,"MRR":0.0059,"NCRR":0.0011,"NDCG":0.0021,"Precision":0.0005,"Recall":0.0063,
        "category diversity":compute_avg_diversity(hft_k50, mind_, "category"),
         "story diversity":compute_avg_diversity(hft_k50, mind_, "story")}
print(data2["category diversity"])
print(data2["story diversity"])

0.19043215399053856
1.267216020198324


### k=100

In [12]:
# Instantiate HFT model
hft_k100 = cornac.models.HFT(
    k=100,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[hft_k100], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.0000 | 0.0000 | 0.0000 | 0.5153 | 0.0040 | 0.0051 | 0.0193 |  0.0062 |  0.0117 |       0.0024 |    0.0326 |  128.9839 |   0.4669



In [14]:
data3 = {"model":"HFT",
        "data":"mind",
        "k":100,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5207,"F1":0.0032,"MAP":0.0031,"MRR":0.0064,"NCRR":0.0015,"NDCG":0.0061,"Precision":0.0019,"Recall":0.0203,
        "category diversity":compute_avg_diversity(hft_k100, mind_, "category"),
         "story diversity":compute_avg_diversity(hft_k100, mind_, "story")}
print(data3["category diversity"])
print(data3["story diversity"])

0.2994482672296015
1.238645938432165


### k=200

In [15]:
# Instantiate HFT model
hft_k200 = cornac.models.HFT(
    k=200,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[hft_k200], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.0000 | 0.0000 | 0.0000 | 0.5039 | 0.0023 | 0.0028 | 0.0064 |  0.0018 |  0.0047 |       0.0014 |    0.0117 |  169.6924 |   0.7212



In [16]:
data4 = {"model":"HFT",
        "data":"mind",
        "k":200,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5009,"F1":0.0016,"MAP":0.0028,"MRR":0.0074,"NCRR":0.0018,"NDCG":0.0032,"Precision":0.0010,"Recall":0.0060,
        "category diversity":compute_avg_diversity(hft_k200, mind_, "category"),
         "story diversity":compute_avg_diversity(hft_k200, mind_, "story")}
print(data4["category diversity"])
print(data4["story diversity"])

0.17147534416801316
0.7212621544088905


### k=300

In [17]:
# Instantiate HFT model
hft_k300 = cornac.models.HFT(
    k=300,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[hft_k300], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.0000 | 0.0000 | 0.0000 | 0.5017 | 0.0029 | 0.0034 | 0.0064 |  0.0021 |  0.0065 |       0.0018 |    0.0191 |  188.7188 |   0.5018



In [18]:
data5 = {"model":"HFT",
        "data":"mind",
        "k":300,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5078,"F1":0.0007,"MAP":0.0023,"MRR":0.0036,"NCRR":0.0003,"NDCG":0.0010,"Precision":0.0004,"Recall":0.0022,
        "category diversity":compute_avg_diversity(hft_k300, mind_, "category"),
         "story diversity":compute_avg_diversity(hft_k300, mind_, "story")}
print(data5["category diversity"])
print(data5["story diversity"])

0.6952346382870473
0.7134748246574826


### k=400

In [19]:
# Instantiate HFT model
hft_k400 = cornac.models.HFT(
    k=400,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[hft_k400], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.0000 | 0.0000 | 0.0000 | 0.4951 | 0.0019 | 0.0031 | 0.0063 |  0.0019 |  0.0049 |       0.0011 |    0.0133 |  238.4574 |   0.5260



In [20]:
data6 = {"model":"HFT",
        "data":"mind",
        "k":400,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.001,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5111,"F1":0.0011,"MAP":0.0027,"MRR":0.0036,"NCRR":0.0010,"NDCG":0.0033,"Precision":0.0006,"Recall":0.0104,
        "category diversity":compute_avg_diversity(hft_k400, mind_, "category"),
         "story diversity":compute_avg_diversity(hft_k400, mind_, "story")}
print(data6["category diversity"])
print(data6["story diversity"])

0.2829768282174946
0.833893953804446


### k=50, lambda_text=0.1

In [21]:
# Instantiate HFT model
hft_lambda_text01_k50 = cornac.models.HFT(
    k=50,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.1,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[hft_lambda_text01_k50], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.0000 | 0.0000 | 0.0000 | 0.4814 | 0.0028 | 0.0035 | 0.0121 |  0.0035 |  0.0060 |       0.0017 |    0.0115 |   78.0048 |   0.5500



In [22]:
data7 = {"model":"HFT",
        "data":"mind",
        "k":50,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.1,
        "l2_reg":0.001,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5048,"F1":0.0020,"MAP":0.0033,"MRR":0.0109,"NCRR":0.0029,"NDCG":0.0048,"Precision":0.0012,"Recall":0.0118,
        "category diversity":compute_avg_diversity(hft_lambda_text01_k50, mind_, "category"),
         "story diversity":compute_avg_diversity(hft_lambda_text01_k50, mind_, "story")}
print(data7["category diversity"])
print(data7["story diversity"])

0.7117619035128504
1.2760288635067285


### k=10, lambda_text=0.1

In [23]:
# Instantiate HFT model
hft_lambda_text01_k10 = cornac.models.HFT(
    k=10,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.001,
    lambda_text=0.1,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[hft_lambda_text01_k10], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.0000 | 0.0000 | 0.0000 | 0.5136 | 0.0031 | 0.0037 | 0.0080 |  0.0024 |  0.0071 |       0.0019 |    0.0202 |   70.9974 |   0.4813



In [24]:
data8 = {"model":"HFT",
        "data":"mind",
        "k":10,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.1,
        "l2_reg":0.001,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.4974,"F1":0.0016,"MAP":0.0027,"MRR":0.0048,"NCRR":0.0008,"NDCG":0.0025,"Precision":0.0010,"Recall":0.0064,
        "category diversity":compute_avg_diversity(hft_lambda_text01_k10, mind_, "category"),
         "story diversity":compute_avg_diversity(hft_lambda_text01_k10, mind_, "story")}
print(data8["category diversity"])
print(data8["story diversity"])

0.8952004524817146
1.1745088443207166


### k=50, lambda_text=0.01, l2_reg=0.01

In [25]:
# Instantiate HFT model
hft_l201_k50 = cornac.models.HFT(
    k=50,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.01,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[hft_l201_k50], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.0000 | 0.0000 | 0.0000 | 0.4971 | 0.0028 | 0.0071 | 0.0109 |  0.0061 |  0.0097 |       0.0017 |    0.0194 |   69.6633 |   0.5382



In [26]:
data9 = {"model":"HFT",
        "data":"mind",
        "k":50,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.01,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.4976,"F1":0.0026,"MAP":0.0055,"MRR":0.0116,"NCRR":0.0051,"NDCG":0.0076,"Precision":0.0016,"Recall":0.0136,
        "category diversity":compute_avg_diversity(hft_l201_k50, mind_, "category"),
         "story diversity":compute_avg_diversity(hft_l201_k50, mind_, "story")}
print(data9["category diversity"])
print(data9["story diversity"])

0.443079039674977
1.077328362262056


### k=50, lambda_text=0.01, l2_reg=0.1

In [27]:
# Instantiate HFT model
hft_l21_k50 = cornac.models.HFT(
    k=50,
    max_iter=10,
    grad_iter=5,
    l2_reg=0.1,
    lambda_text=0.01,
    vocab_size=8000,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[hft_l21_k50], metrics=metrics, user_based=False
).run()


[HFT] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[HFT] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
HFT | 0.0000 | 0.0000 | 0.0000 | 0.4988 | 0.0024 | 0.0056 | 0.0091 |  0.0044 |  0.0094 |       0.0014 |    0.0259 |   80.9620 |   0.4694



In [28]:
data10 = {"model":"HFT",
        "data":"mind",
        "k":50,
        "max_iter":10,
        "grad_iter":5,
        "lambda_text":0.01,
        "l2_reg":0.1,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.4957,"F1":0.0014,"MAP":0.0025,"MRR":0.0051,"NCRR":0.0009,"NDCG":0.0025,"Precision":0.0009,"Recall":0.0060,
        "category diversity":compute_avg_diversity(hft_l21_k50, mind_, "category"),
         "story diversity":compute_avg_diversity(hft_l21_k50, mind_, "story")}
print(data10["category diversity"])
print(data10["story diversity"])

0.08683283281052173
1.0871930964606356


In [29]:
data = [data1,data2,data3,data4,data5,data6,data7,data8,data9,data10]
df = pd.DataFrame(columns = data1.keys())
i=0
for d in data:
    df = pd.concat([df, pd.DataFrame(d,columns = list(d.keys()),index=[i])], ignore_index=False)
    i += 1

In [30]:
df

Unnamed: 0,model,data,k,max_iter,grad_iter,lambda_text,l2_reg,MAE,MSE,RMSE,AUC,F1,MAP,MRR,NCRR,NDCG,Precision,Recall,category diversity,story diversity
0,HFT,mind,10,10,5,0.01,0.001,0.0,0.0,0.0,0.5047,0.003,0.0035,0.0091,0.0026,0.0075,0.0017,0.0236,0.163966,0.904512
1,HFT,mind,50,10,5,0.01,0.001,0.0,0.0,0.0,0.4918,0.0008,0.0026,0.0059,0.0011,0.0021,0.0005,0.0063,0.190432,1.267216
2,HFT,mind,100,10,5,0.01,0.001,0.0,0.0,0.0,0.5207,0.0032,0.0031,0.0064,0.0015,0.0061,0.0019,0.0203,0.299448,1.238646
3,HFT,mind,200,10,5,0.01,0.001,0.0,0.0,0.0,0.5009,0.0016,0.0028,0.0074,0.0018,0.0032,0.001,0.006,0.171475,0.721262
4,HFT,mind,300,10,5,0.01,0.001,0.0,0.0,0.0,0.5078,0.0007,0.0023,0.0036,0.0003,0.001,0.0004,0.0022,0.695235,0.713475
5,HFT,mind,400,10,5,0.01,0.001,0.0,0.0,0.0,0.5111,0.0011,0.0027,0.0036,0.001,0.0033,0.0006,0.0104,0.282977,0.833894
6,HFT,mind,50,10,5,0.1,0.001,0.0,0.0,0.0,0.5048,0.002,0.0033,0.0109,0.0029,0.0048,0.0012,0.0118,0.711762,1.276029
7,HFT,mind,10,10,5,0.1,0.001,0.0,0.0,0.0,0.4974,0.0016,0.0027,0.0048,0.0008,0.0025,0.001,0.0064,0.8952,1.174509
8,HFT,mind,50,10,5,0.01,0.01,0.0,0.0,0.0,0.4976,0.0026,0.0055,0.0116,0.0051,0.0076,0.0016,0.0136,0.443079,1.077328
9,HFT,mind,50,10,5,0.01,0.1,0.0,0.0,0.0,0.4957,0.0014,0.0025,0.0051,0.0009,0.0025,0.0009,0.006,0.086833,1.087193


In [31]:
df.to_csv('/Users/pigr/Desktop/cornac/cornac/dataset/hft_mind.csv',header=-1,index=0)