In [2]:
import numpy as np
import pandas as pd
# from scipy.stats import entropy
# import matplotlib.pyplot as plt
import random

import cornac
from cornac.eval_methods import RatioSplit
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer
from cornac.data import Reader
from cornac.metrics import MAE,RMSE,MSE,FMeasure,Precision,Recall,NDCG,NCRR,MRR,AUC,MAP

FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [4]:
with open ('/Users/pigr/Desktop/cornac_dataset/mind_test_200K.csv') as mind_file:
    mind = pd.read_csv(mind_file)
mind['story'] = mind['story'].astype('int')
mind_ = mind[:10000]
len(mind_)

10000

In [5]:
# pre-process data to initialize ctr model
mind_feedback = mind_.loc[:, ['user_id','item_id','rating']]
# feedback = cornac.data.Dataset.from_uir(mind_feedback.itertuples(index=False))
feedback = mind_feedback.apply(lambda x: tuple(x), axis=1).values.tolist()
text = list(mind_['text'])
item_ids = list(mind_['item_id'])
# Instantiate a TextModality, it makes it convenient to work with text auxiliary information
item_text_modality = TextModality(
    corpus=text,
    ids=item_ids,
    tokenizer=BaseTokenizer(sep=" ", stop_words="english"),
    max_vocab=8000,
    max_doc_freq=0.5,
)

# Define an evaluation method to split feedback into train and test sets
mind_ratio_split = RatioSplit(
    data=feedback,
    test_size=0.2,
    exclude_unknowns=True,
    item_text=item_text_modality,
    verbose=True,
    seed=123,
    rating_threshold=0.5,
)

rating_threshold = 0.5
exclude_unknowns = True
---
Training data:
Number of users = 272
Number of items = 3942
Number of ratings = 7911
Max rating = 1.0
Min rating = 1.0
Global mean = 1.0
---
Test data:
Number of users = 234
Number of items = 915
Number of ratings = 1336
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 272
Total items = 3942




### k=10

In [6]:
# Instantiate HFT model
ctr_k10 = cornac.models.CTR(
    k=10,
    lambda_u=0.01,
    lambda_v=0.01,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_k10], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.6023 | 0.0116 | 0.0121 | 0.0455 |  0.0180 |  0.0293 |       0.0069 |    0.0600 |    8.7085 |   0.5259



In [7]:
import diversity_mind
def compute_avg_diversity(model, data, feature):
    if (feature=="category"):
        for i in range(len(set(mind_ratio_split.test_set.user_ids))): # get the average result for all users from training data
            result = []
            r = diversity_mind.Retrieval(model=model,data = data,UIDX = i,TOPK = 50,feature = feature)
            d = diversity_mind.Diversity(r.get_history(), r.get_recy())
            result.append(d.compute())
    else:
        useridx = list(set([int(tuple[1]) for tuple in mind_ratio_split.test_set.uid_map]))
        result = []
        i = 0
        while i <= 200:
            # shuffle
            user_pair = random.sample(useridx, 2)
            r0 = diversity_mind.Retrieval(model=model, data=data, UIDX=user_pair[0],TOPK = 50, feature=feature)
            r1 = diversity_mind.Retrieval(model=model, data=data, UIDX=user_pair[1],TOPK = 50, feature=feature)
            d = diversity_mind.Diversity(r0.get_recy(), r1.get_recy())
            result.append(d.compute())
            i += 1
    return np.mean(result)

In [8]:
data1 = {"model":"CTR",
        "data":"mind",
        "k":10,
        "lambda_u":0.01,
        "lambda_v":0.01,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.6023,"F1":0.0116,"MAP":0.0121,"MRR":0.0455,"NCRR":0.0180,"NDCG":0.0293,"Precision":0.0069,"Recall":0.0600,
        "category diversity":compute_avg_diversity(ctr_k10, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_k10, mind_, "story")}
print(data1["category diversity"])
print(data1["story diversity"])

1.1132982862491942
1.4689592254207955


### k=50

In [9]:
# Instantiate HFT model
ctr_k50 = cornac.models.CTR(
    k=50,
    lambda_u=0.01,
    lambda_v=0.01,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_k50], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.5831 | 0.0088 | 0.0111 | 0.0330 |  0.0139 |  0.0249 |       0.0050 |    0.0567 |   14.4396 |   0.5941



In [10]:
data2 = {"model":"CTR",
        "data":"mind",
        "k":50,
        "lambda_u":0.01,
        "lambda_v":0.01,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5831,"F1":0.0088,"MAP":0.0111,"MRR":0.0330,"NCRR":0.0139,"NDCG":0.0249,"Precision":0.0050,"Recall":0.0567,
        "category diversity":compute_avg_diversity(ctr_k50, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_k50, mind_, "story")}
print(data2["category diversity"])
print(data2["story diversity"])

0.239911504754411
1.4662830734438659


### k=100

In [11]:
# Instantiate HFT model
ctr_k100 = cornac.models.CTR(
    k=100,
    lambda_u=0.01,
    lambda_v=0.01,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_k100], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.5372 | 0.0062 | 0.0079 | 0.0215 |  0.0087 |  0.0167 |       0.0036 |    0.0372 |   28.5283 |   0.4693



In [12]:
data3 = {"model":"CTR",
        "data":"mind",
        "k":100,
        "lambda_u":0.01,
        "lambda_v":0.01,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5372,"F1":0.0062,"MAP":0.0079,"MRR":0.0215,"NCRR":0.0087,"NDCG":0.0167,"Precision":0.0036,"Recall":0.0372,
        "category diversity":compute_avg_diversity(ctr_k100, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_k100, mind_, "story")}
print(data3["category diversity"])
print(data3["story diversity"])

0.22646657238332812
1.2274197037927126


### k=200

In [13]:
# Instantiate HFT model
ctr_k200 = cornac.models.CTR(
    k=200,
    lambda_u=0.01,
    lambda_v=0.01,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_k200], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.5449 | 0.0060 | 0.0089 | 0.0207 |  0.0089 |  0.0168 |       0.0035 |    0.0374 |   68.2161 |   0.6382



In [14]:
data4 = {"model":"CTR",
        "data":"mind",
        "k":200,
        "lambda_u":0.01,
        "lambda_v":0.01,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5449,"F1":0.0060,"MAP":0.0089,"MRR":0.0207,"NCRR":0.0089,"NDCG":0.0168,"Precision":0.0035,"Recall":0.0374,
        "category diversity":compute_avg_diversity(ctr_k200, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_k200, mind_, "story")}
print(data4["category diversity"])
print(data4["story diversity"])

0.2197598396070089
1.3624207902401426


### k=300

In [15]:
# Instantiate HFT model
ctr_k300 = cornac.models.CTR(
    k=300,
    lambda_u=0.01,
    lambda_v=0.01,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_k300], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.5936 | 0.0077 | 0.0112 | 0.0300 |  0.0131 |  0.0217 |       0.0045 |    0.0462 |   86.0962 |   0.5169



In [16]:
data5 = {"model":"CTR",
        "data":"mind",
        "k":300,
        "lambda_u":0.01,
        "lambda_v":0.01,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5936,"F1":0.0077,"MAP":0.0112,"MRR":0.0300,"NCRR":0.0131,"NDCG":0.0217,"Precision":0.0045,"Recall":0.0462,
        "category diversity":compute_avg_diversity(ctr_k300, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_k300, mind_, "story")}
print(data5["category diversity"])
print(data5["story diversity"])

0.2621068314886044
1.4222101643963663


### k=400

In [17]:
# Instantiate HFT model
ctr_k400 = cornac.models.CTR(
    k=400,
    lambda_u=0.01,
    lambda_v=0.01,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_k400], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.5883 | 0.0073 | 0.0112 | 0.0291 |  0.0130 |  0.0217 |       0.0043 |    0.0469 |  159.0961 |   0.5312



In [18]:
data6 = {"model":"CTR",
        "data":"mind",
        "k":400,
        "lambda_u":0.01,
        "lambda_v":0.01,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5883,"F1":0.0073,"MAP":0.0112,"MRR":0.0291,"NCRR":0.0130,"NDCG":0.0217,"Precision":0.0043,"Recall":0.0469,
        "category diversity":compute_avg_diversity(ctr_k400, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_k400, mind_, "story")}
print(data6["category diversity"])
print(data6["story diversity"])

0.2581333917951102
1.2088983183944608


### k=500

In [19]:
# Instantiate HFT model
ctr_k500 = cornac.models.CTR(
    k=500,
    lambda_u=0.01,
    lambda_v=0.01,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_k500], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.5965 | 0.0075 | 0.0106 | 0.0287 |  0.0122 |  0.0211 |       0.0044 |    0.0468 |  297.7980 |   0.6581



In [20]:
data7 = {"model":"CTR",
        "data":"mind",
        "k":500,
        "lambda_u":0.01,
        "lambda_v":0.01,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5965,"F1":0.0075,"MAP":0.0106,"MRR":0.0287,"NCRR":0.0122,"NDCG":0.0211,"Precision":0.0044,"Recall":0.0468,
        "category diversity":compute_avg_diversity(ctr_k500, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_k500, mind_, "story")}
print(data7["category diversity"])
print(data7["story diversity"])

0.2602979292103075
1.36410335185139


### k=400, lambda_v=0.1

In [21]:
# Instantiate HFT model
ctr_v01_k400 = cornac.models.CTR(
    k=400,
    lambda_u=0.01,
    lambda_v=0.1,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_v01_k400], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.5984 | 0.0072 | 0.0109 | 0.0271 |  0.0118 |  0.0207 |       0.0042 |    0.0452 |  192.7012 |   0.8505



In [22]:
data8 = {"model":"CTR",
        "data":"mind",
        "k":400,
        "lambda_u":0.01,
        "lambda_v":0.1,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.5984,"F1":0.0072,"MAP":0.0109,"MRR":0.0271,"NCRR":0.0118,"NDCG":0.0207,"Precision":0.0042,"Recall":0.0452,
        "category diversity":compute_avg_diversity(ctr_v01_k400, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_v01_k400, mind_, "story")}
print(data7["category diversity"])
print(data7["story diversity"])

0.2602979292103075
1.36410335185139


### k=400, lambda_v=1

In [23]:
# Instantiate HFT model
ctr_v1_k400 = cornac.models.CTR(
    k=400,
    lambda_u=0.01,
    lambda_v=1,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_v1_k400], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.6200 | 0.0076 | 0.0111 | 0.0261 |  0.0114 |  0.0210 |       0.0044 |    0.0471 |  161.8261 |   0.7221



In [24]:
data9 = {"model":"CTR",
        "data":"mind",
        "k":400,
        "lambda_u":0.01,
        "lambda_v":1,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.6200,"F1":0.0076,"MAP":0.0111,"MRR":0.0261,"NCRR":0.0114,"NDCG":0.0210,"Precision":0.0044,"Recall":0.0471,
        "category diversity":compute_avg_diversity(ctr_v1_k400, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_v1_k400, mind_, "story")}
print(data9["category diversity"])
print(data9["story diversity"])

0.24794737816218898
1.5081829190932745


### k=400, lambda_v=10

In [25]:
# Instantiate HFT model
ctr_v10_k400 = cornac.models.CTR(
    k=400,
    lambda_u=0.01,
    lambda_v=10,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_v10_k400], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.6308 | 0.0075 | 0.0102 | 0.0246 |  0.0101 |  0.0205 |       0.0044 |    0.0488 |  166.4190 |   0.6877



In [26]:
data10 = {"model":"CTR",
        "data":"mind",
        "k":400,
        "lambda_u":0.01,
        "lambda_v":10,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.6308,"F1":0.0075,"MAP":0.0102,"MRR":0.0246,"NCRR":0.0101,"NDCG":0.0205,"Precision":0.0044,"Recall":0.0488,
        "category diversity":compute_avg_diversity(ctr_v10_k400, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_v10_k400, mind_, "story")}
print(data10["category diversity"])
print(data10["story diversity"])

0.25429152535441835
1.5649867403745632


### k=400, lambda_v=100

In [27]:
# Instantiate HFT model
ctr_v100_k400 = cornac.models.CTR(
    k=400,
    lambda_u=0.01,
    lambda_v=100,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_v100_k400], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.6748 | 0.0108 | 0.0153 | 0.0432 |  0.0188 |  0.0341 |       0.0062 |    0.0865 |  150.0576 |   0.5578



In [28]:
data11 = {"model":"CTR",
        "data":"mind",
        "k":400,
        "lambda_u":0.01,
        "lambda_v":100,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.6748,"F1":0.0108,"MAP":0.0153,"MRR":0.0432,"NCRR":0.0188,"NDCG":0.0341,"Precision":0.0062,"Recall":0.0865,
        "category diversity":compute_avg_diversity(ctr_v100_k400, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_v100_k400, mind_, "story")}
print(data11["category diversity"])
print(data11["story diversity"])

2.6298190706243045
1.5265278051802473


### k=400,lambda_v=0.1,lambda_u=0.1

In [29]:
# Instantiate HFT model
ctr_u01_v01_k400 = cornac.models.CTR(
    k=400,
    lambda_u=0.1,
    lambda_v=0.1,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_u01_v01_k400], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.6026 | 0.0070 | 0.0111 | 0.0289 |  0.0122 |  0.0206 |       0.0041 |    0.0447 |  165.9264 |   0.5777



In [30]:
data12 = {"model":"CTR",
        "data":"mind",
        "k":400,
        "lambda_u":0.1,
        "lambda_v":0.1,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.6026,"F1":0.0070,"MAP":0.0111,"MRR":0.0289,"NCRR":0.0122,"NDCG":0.0206,"Precision":0.0041,"Recall":0.0447,
        "category diversity":compute_avg_diversity(ctr_u01_v01_k400, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_u01_v01_k400, mind_, "story")}
print(data12["category diversity"])
print(data12["story diversity"])

0.2499707066368308
1.4429969587201958



### k=400, lambda_v=0.1, lambda_u=1

In [31]:
# Instantiate HFT model
ctr_u1_v01_k400 = cornac.models.CTR(
    k=400,
    lambda_u=1,
    lambda_v=0.1,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_u1_v01_k400], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.6282 | 0.0081 | 0.0108 | 0.0234 |  0.0102 |  0.0215 |       0.0047 |    0.0516 |  161.6932 |   0.5923



In [32]:
data13 = {"model":"CTR",
        "data":"mind",
        "k":400,
        "lambda_u":1,
        "lambda_v":0.1,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.6282,"F1":0.0081,"MAP":0.0108,"MRR":0.0234,"NCRR":0.0102,"NDCG":0.0215,"Precision":0.0047,"Recall":0.0516,
        "category diversity":compute_avg_diversity(ctr_u1_v01_k400, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_u1_v01_k400, mind_, "story")}
print(data13["category diversity"])
print(data13["story diversity"])

0.2390969237581832
1.3395862535621295


### k=400, lambda_v=0.1, lambda_u=10

In [33]:
# Instantiate HFT model
ctr_u10_v01_k400 = cornac.models.CTR(
    k=400,
    lambda_u=10,
    lambda_v=0.1,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_u10_v01_k400], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.6890 | 0.0106 | 0.0132 | 0.0341 |  0.0146 |  0.0298 |       0.0061 |    0.0765 |  188.8644 |   0.9451



In [34]:
data14 = {"model":"CTR",
        "data":"mind",
        "k":400,
        "lambda_u":10,
        "lambda_v":0.1,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.6890,"F1":0.0106,"MAP":0.0132,"MRR":0.0341,"NCRR":0.0146,"NDCG":0.0298,"Precision":0.0061,"Recall":0.0765,
        "category diversity":compute_avg_diversity(ctr_u10_v01_k400, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_u10_v01_k400, mind_, "story")}
print(data14["category diversity"])
print(data14["story diversity"])

2.2994837219088935
1.309685001382068


### k=400, lambda_v=0.1, lambda_u=100

In [35]:
# Instantiate HFT model
ctr_u100_v01_k400 = cornac.models.CTR(
    k=400,
    lambda_u=100,
    lambda_v=0.1,
    eta=0.01,
    a=1,
    b=0.01,
    max_iter=10,
    trainable=True,
    verbose=True,
    init_params=None,
    seed=123,
)

# Instantiate MSE for evaluation
metrics = [MAE(), RMSE(), MSE(),FMeasure(k=50),Precision(k=50),
           Recall(k=50), NDCG(k=50), NCRR(k=50),
           MRR(),AUC(), MAP()]

# Put everything together into an experiment and run it
cornac.Experiment(
    eval_method=mind_ratio_split, models=[ctr_u100_v01_k400], metrics=metrics, user_based=False
).run()


[CTR] Training started!


  0%|          | 0/10 [00:00<?, ?it/s]

Learning completed!

[CTR] Evaluation started!


Rating:   0%|          | 0/1336 [00:00<?, ?it/s]

Ranking:   0%|          | 0/234 [00:00<?, ?it/s]


TEST:
...
    |    MAE |    MSE |   RMSE |    AUC |  F1@50 |    MAP |    MRR | NCRR@50 | NDCG@50 | Precision@50 | Recall@50 | Train (s) | Test (s)
--- + ------ + ------ + ------ + ------ + ------ + ------ + ------ + ------- + ------- + ------------ + --------- + --------- + --------
CTR | 0.0000 | 0.0000 | 0.0000 | 0.7570 | 0.0210 | 0.0206 | 0.0557 |  0.0263 |  0.0536 |       0.0123 |    0.1268 |  161.9034 |   0.5364



In [36]:
data15 = {"model":"CTR",
        "data":"mind",
        "k":400,
        "lambda_u":10,
        "lambda_v":0.1,
        "eta":0.01,
        "a":1,
        "b":0.01,
        "max_iter":10,
        "MAE":0.0000,"MSE":0.0000,"RMSE":0.0000,"AUC":0.7570,"F1":0.0210,"MAP":0.0206,"MRR":0.0557,"NCRR":0.0263,"NDCG":0.0536,"Precision":0.0123,"Recall":0.1268,
        "category diversity":compute_avg_diversity(ctr_u100_v01_k400, mind_, "category"),
         "story diversity":compute_avg_diversity(ctr_u100_v01_k400, mind_, "story")}
print(data15["category diversity"])
print(data15["story diversity"])

0.2692351375066459
0.9376505594864659


In [37]:
data = [data1,data2,data3,data4,data5,data6,data7,data8,data9,data10,data11,data12,data13,data14,data15]
df = pd.DataFrame(columns = data1.keys())
i=0
for d in data:
    df = pd.concat([df, pd.DataFrame(d,columns = list(d.keys()),index=[i])], ignore_index=False)
    i += 1

In [38]:
df

Unnamed: 0,model,data,k,lambda_u,lambda_v,eta,a,b,max_iter,MAE,...,AUC,F1,MAP,MRR,NCRR,NDCG,Precision,Recall,category diversity,story diversity
0,CTR,mind,10,0.01,0.01,0.01,1,0.01,10,0.0,...,0.6023,0.0116,0.0121,0.0455,0.018,0.0293,0.0069,0.06,1.113298,1.468959
1,CTR,mind,50,0.01,0.01,0.01,1,0.01,10,0.0,...,0.5831,0.0088,0.0111,0.033,0.0139,0.0249,0.005,0.0567,0.239912,1.466283
2,CTR,mind,100,0.01,0.01,0.01,1,0.01,10,0.0,...,0.5372,0.0062,0.0079,0.0215,0.0087,0.0167,0.0036,0.0372,0.226467,1.22742
3,CTR,mind,200,0.01,0.01,0.01,1,0.01,10,0.0,...,0.5449,0.006,0.0089,0.0207,0.0089,0.0168,0.0035,0.0374,0.21976,1.362421
4,CTR,mind,300,0.01,0.01,0.01,1,0.01,10,0.0,...,0.5936,0.0077,0.0112,0.03,0.0131,0.0217,0.0045,0.0462,0.262107,1.42221
5,CTR,mind,400,0.01,0.01,0.01,1,0.01,10,0.0,...,0.5883,0.0073,0.0112,0.0291,0.013,0.0217,0.0043,0.0469,0.258133,1.208898
6,CTR,mind,500,0.01,0.01,0.01,1,0.01,10,0.0,...,0.5965,0.0075,0.0106,0.0287,0.0122,0.0211,0.0044,0.0468,0.260298,1.364103
7,CTR,mind,400,0.01,0.1,0.01,1,0.01,10,0.0,...,0.5984,0.0072,0.0109,0.0271,0.0118,0.0207,0.0042,0.0452,0.253321,1.471805
8,CTR,mind,400,0.01,1.0,0.01,1,0.01,10,0.0,...,0.62,0.0076,0.0111,0.0261,0.0114,0.021,0.0044,0.0471,0.247947,1.508183
9,CTR,mind,400,0.01,10.0,0.01,1,0.01,10,0.0,...,0.6308,0.0075,0.0102,0.0246,0.0101,0.0205,0.0044,0.0488,0.254292,1.564987


In [39]:
df.to_csv('/Users/pigr/Desktop/cornac/cornac/dataset/ctr_mind.csv',header=-1,index=0)