## DEF

In [1]:
import os
import sys
from IPython.display import clear_output

__FILE_DIR_PATH = os.path.dirname(__vsc_ipynb_file__) \
    if __IPYTHON__ \
    else os.path.dirname(__file__)
    
WORKSPACE_HOME = __FILE_DIR_PATH.replace(
    f"/{os.path.basename(__FILE_DIR_PATH)}", "")
WORKSPACE_HOME = WORKSPACE_HOME.replace("/trunk", "")
DATASET_DIR_HOME = f"{WORKSPACE_HOME}/data/colley"

print(WORKSPACE_HOME)
sys.path.append(WORKSPACE_HOME)

import copy

import numpy as np
from pandas import DataFrame
from tqdm import tqdm
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from core import *
from ipirec import *
from colley import *

# plt.rcParams["font.family"] = "AppleGothic"
plt.rcParams["font.family"] = "NanumGothic"
mpl.rcParams["axes.unicode_minus"] = False

/home/taegyu/git_repo/ipirec


## OPT & ALLOC

In [2]:
_FOLD_SET_ID = 2
top_n_conditions = [n for n in range(3, 37, 2)]

_TEST_SET_FILES_LIST = [
    str.format(
        "{0}/test_{1}_{2}_list.csv",
        DATASET_DIR_HOME,
        _FOLD_SET_ID,
        DecisionType.to_str(d),
    )
    for d in [
        DecisionType.E_LIKE,
        DecisionType.E_PURCHASE,
    ]
]

dataset = ColleyFilteredDataSet(dataset_dir_path=DATASET_DIR_HOME)
dataset._load_metadata_()
for decision_type in DecisionType:
    dataset.append_decisions(
        file_path=str.format(
            "{0}/train_{1}_{2}_list.csv",
            DATASET_DIR_HOME,
            _FOLD_SET_ID,
            DecisionType.to_str(decision_type),
        ),
        decision_type=decision_type,
    )
dataset.__id_index_mapping__()

[IO] /home/taegyu/git_repo/ipirec/data/colley/train_2_view_list.csv


[LOAD] train_2_view_list.csv: 100%|██████████| 46568/46568 [00:00<00:00, 84445.22it/s]


[IO] /home/taegyu/git_repo/ipirec/data/colley/train_2_like_list.csv


[LOAD] train_2_like_list.csv: 100%|██████████| 22362/22362 [00:00<00:00, 84211.22it/s]


[IO] /home/taegyu/git_repo/ipirec/data/colley/train_2_purchase_list.csv


[LOAD] train_2_purchase_list.csv: 100%|██████████| 12349/12349 [00:00<00:00, 83231.50it/s]


In [3]:
dataset.append_interest_tags()

/home/taegyu/git_repo/ipirec/data/colley/user_interest_tag_list.csv


In [None]:
user: UserEntity = dataset.user_dict[692466]
user.top_n_decision_tags_set
# user.top_n_decision_tags_set.difference_update(user.set_of_interest_tags)

In [4]:
top_n_tags = 5

model_params = IPIRecModel.create_models_parameters(
    top_n_tags=top_n_tags,
    co_occur_items_threshold=4,
)
model = IPIRecModel(
    dataset=dataset,
    model_params=model_params,
)
model.analysis()

CorrelationModel.preprocess()
CorrelationModel.top_n_decision_tags()
CorrelationModel.mean_freq_tags()
CorrelationModel.item_based_tags_corr()
CorrelationModel.user_based_tags_corr()
CorrelationModel.tags_score()


In [5]:
frob_norm = 1.0
score_iters = 10
weight_iters = 5

estimator_params = AdjustedBiasedCorrelationEstimator.create_models_parameters(
    score_iterations=score_iters,
    score_learning_rate=10 ** -2,
    score_generalization=10 ** -4,
    weight_iterations=weight_iters,
    weight_learning_rate=10 ** -3,
    weight_generalization=1.0,
    frob_norm=frob_norm,
    default_voting=0.0,
)
estimator = AdjustedBiasedCorrelationEstimator(
    model=model,
    model_params=estimator_params,
    )

AdjustedBiasedCorrelationEstimator.append_biases()


In [None]:
"""
for decision_type in DecisionType:
    for _ in tqdm(
        iterable=range(score_iters),
        desc=f"{DecisionType.to_str(decision_type)}",
        total=score_iters,
    ):
        _L = estimator._adjust_tags_corr_(
            decision_type=decision_type,
        )
        print(f"{_L}")
        # estimator._personalization_(
        #     target_decision=decision_type,
        # )
"""

## FIT

In [None]:
_uidx = estimator.user_id_to_idx.get(692466, -1)
if _uidx != -1:
    print(estimator.arr_users_tags_map[_uidx])

In [None]:
user: UserEntity = dataset.user_dict[692466]

In [None]:
_iidx = estimator.item_id_to_idx.get(472, -1)
if _iidx != -1:
    print(estimator.arr_items_tags_map[_iidx])

In [6]:
loss_log_list = list()
_ITER = 0
while True:
    _ITER += 1
    estimator.__append_biases__()
    _L = estimator._adjust_tags_corr_(DecisionType.E_VIEW)
    _S = np.std(estimator.arr_tags_score)
    _W = np.std(estimator.arr_user_idx_to_weights)
    print(
        str.format(
            "[{0} S] L: {1}\nS: {2}\nW: {3}",
                _ITER,
                _L,
                _S,
                _W,
                )
        )
    # __L = _L +(( _S + _W)**(2**-1))
    __L = _L +(_W**(2**-1))
    print(__L)
    _L = estimator._personalization_(DecisionType.E_VIEW)
    _S = np.std(estimator.arr_tags_score)
    _W = np.std(estimator.arr_user_idx_to_weights)
    # _S = np.sum(np.abs(estimator.arr_tags_score))
    # _W = np.sum(np.abs(estimator.arr_user_idx_to_weights))
    print(
        str.format(
            "[{0} W] L: {1}\nS: {2}\nW: {3}",
                _ITER,
                _L,
                _S,
                _W,
                )
        )
    # __L = _L +(( _S + _W)**(2**-1))
    __L = _L +(_W**(2**-1))
    print(__L)
    loss_log_list.append(_L)
    _min = min(loss_log_list)
    if _min < _L:
        #_estimator: AdjustedBiasedCorrelationEstimator = copy.deepcopy(estimator)
        break
    
inst = BaseAction(user_id=692466, item_id=472)
inst = estimator._estimate_(inst)
inst.estimated_score

AdjustedBiasedCorrelationEstimator.append_biases()
[1 S] L: 0.6894625961813272
S: 0.12315779947292778
W: 0.0
0.6894625961813272


[view] Adjust: 100%|██████████| 39992/39992 [00:06<00:00, 5919.08it/s]


[1 W] L: 0.9987437682391035
S: 0.09011785644077157
W: 0.02918587438762188
1.169582506198683
AdjustedBiasedCorrelationEstimator.append_biases()
[2 S] L: 0.7956773834984967
S: 0.12342493682073691
W: 0.02918587438762188
0.9665161214580762


[view] Adjust: 100%|██████████| 39992/39992 [00:06<00:00, 5951.53it/s]


[2 W] L: 0.9990000601528866
S: 0.10302121897816942
W: 0.02989768236875534
1.1719095230771446


0.0

# TRAIN(S)

## S(V)

In [None]:
_ITER = 100
# 21881.16485981459
# 22906.855353528248 
# [P -> S] 22847.84812020912 >> 25977.561489833748 >> 25564.655994604418
decision_type = DecisionType.E_VIEW
__E = list()
for _ in tqdm(
        iterable=range(_ITER),
        desc=f"{DecisionType.to_str(decision_type)}",
        total=_ITER,
    ):
        _L = estimator._adjust_tags_corr_(
            decision_type=decision_type,
        )
        print(f"{_L}")
        __E.append(_L)
        if min(__E) < _L:
            __E.clear()
            break

## S(L)

In [None]:
_ITER = 100
# 7841.645698883881 >> 7956.079589877575
# 8582.921496806883
decision_type = DecisionType.E_LIKE
__E = list()
for _ in tqdm(
        iterable=range(_ITER),
        desc=f"{DecisionType.to_str(decision_type)}",
        total=_ITER,
    ):
        _L = estimator._adjust_tags_corr_(
            decision_type=decision_type,
        )
        print(f"{_L}")
        __E.append(_L)
        if min(__E) < _L:
            __E.clear()
            break

## S(P)

In [None]:
_ITER = 100
## 8901.497879940547
# 8959.261042943654 >> 9056.971447119242
# 9147.675861340429
# [P->S] 8965.367465171059 >> 9147.313206731318
# 9209.555742799665
__E = list()
decision_type = DecisionType.E_PURCHASE
for _ in tqdm(
        iterable=range(_ITER),
        desc=f"{DecisionType.to_str(decision_type)}",
        total=_ITER,
    ):
        if (__IPYTHON__) and (_ % 5 == 0):
            clear_output(wait=True)
        _L = estimator._adjust_tags_corr_(
            decision_type=decision_type,
        )
        print(f"{_L}")
        __E.append(_L)
        if min(__E) < _L:
            __E.clear()
            break

# Recommendation

In [7]:
recommender = ScoreBasedRecommender(
    estimator=estimator,
)
recommender.prediction()

Recommender.prediction(): 100%|██████████| 24270/24270 [04:09<00:00, 97.25it/s] 


In [None]:
"""
recommender = ELABasedRecommender(
    estimator=estimator,
)
recommender.prediction()
"""

# EVAL - Recommended items.

In [8]:
evaluator = IRMetricsEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[0],
)
evaluator.top_n_eval(
    top_n_conditions=top_n_conditions,
)
df: DataFrame = evaluator.evlautions_summary_df()
print(df)

[IO] /home/taegyu/git_repo/ipirec/data/colley/test_2_like_list.csv


[LOAD] test_2_like_list.csv: 100%|██████████| 5590/5590 [00:00<00:00, 76745.33it/s]


    Conditions  Precision    Recall  F1-score  Accuracy  Hits   TP     FP  \
0            3   0.047101  0.048964  0.048015  0.990661   182  182   3682   
1            5   0.034161  0.059188  0.043320  0.987426   220  220   6220   
2            7   0.028172  0.068335  0.039896  0.984181   254  254   8762   
3            9   0.024241  0.075599  0.036710  0.980917   281  281  11311   
4           11   0.022092  0.084208  0.035001  0.977667   313  313  13855   
5           13   0.020784  0.093624  0.034016  0.974424   348  348  16396   
6           15   0.019513  0.101426  0.032730  0.971166   377  377  18943   
7           17   0.018908  0.111380  0.032327  0.967928   414  414  21482   
8           19   0.018388  0.121065  0.031927  0.964688   450  450  24022   
9           21   0.017340  0.126177  0.030489  0.961404   469  469  26579   
10          23   0.016574  0.132096  0.029453  0.958128   491  491  29133   
11          25   0.016149  0.139898  0.028956  0.954869   520  520  31680   

In [9]:
evaluator = IRMetricsEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[1],
)
evaluator.top_n_eval(
    top_n_conditions=top_n_conditions,
)
df: DataFrame = evaluator.evlautions_summary_df()
print(df)

[IO] /home/taegyu/git_repo/ipirec/data/colley/test_2_purchase_list.csv


[LOAD] test_2_purchase_list.csv: 100%|██████████| 3087/3087 [00:00<00:00, 77840.87it/s]


    Conditions  Precision    Recall  F1-score  Accuracy  Hits  TP     FP  \
0            3   0.000909  0.001932  0.001236  0.992656     3   3   3297   
1            5   0.000727  0.002576  0.001134  0.989326     4   4   5496   
2            7   0.001039  0.005151  0.001729  0.986005     8   8   7692   
3            9   0.001515  0.009659  0.002619  0.982692    15  15   9885   
4           11   0.001818  0.014166  0.003223  0.979380    22  22  12078   
5           13   0.001678  0.015454  0.003028  0.976053    24  24  14276   
6           15   0.001636  0.017386  0.002991  0.972729    27  27  16473   
7           17   0.001604  0.019317  0.002963  0.969405    30  30  18670   
8           19   0.001435  0.019317  0.002672  0.966071    30  30  20870   
9           21   0.001558  0.023181  0.002921  0.962756    36  36  23064   
10          23   0.001739  0.028332  0.003277  0.959447    44  44  25256   
11          25   0.001891  0.033484  0.003580  0.956138    52  52  27448   
12          

# RAND >> W

In [None]:
tags_count = estimator.tags_count
users_count = estimator.users_count

_RAND_W: np.ndarray = np.random.rand(users_count, tags_count, tags_count)
print(_RAND_W)
estimator.arr_user_idx_to_weights = _RAND_W

In [None]:
estimator.model.arr_tags_score = np.tanh(estimator.arr_tags_score)
estimator.arr_user_idx_to_weights = np.tanh(estimator.arr_user_idx_to_weights)

In [None]:
_S: np.ndarray = copy.deepcopy(estimator.arr_tags_score)
_W: np.ndarray = copy.deepcopy(estimator.arr_user_idx_to_weights)

# _S = np.tanh(_S)
_S
# model.arr_tags_score = _S
# estimator.model.arr_tags_score = _S

In [None]:
# _W = np.tanh(_W)
_W

# [IO] np.ndarray -- S, W

## S, W >> 

In [None]:
dt_str = DirectoryPathValidator.current_datetime_str()

file_path = str.format(
    "{0}/resources/IPIRec/{1}_S_{2}.npy",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    dt_str,
)
__dir_path = os.path.dirname(file_path)
if not os.path.exists(__dir_path):
    DirectoryPathValidator.mkdir(__dir_path)

with open(file=file_path, mode="wb") as fout:
    np.save(fout, estimator.model.arr_tags_score)
    fout.close()

file_path = str.format(
    "{0}/resources/IPIRec/{1}_W_{2}.npy",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    dt_str,
)
with open(file=file_path, mode="wb") as fout:
    np.save(fout, estimator.arr_user_idx_to_weights)
    fout.close()

## >> S, W

In [None]:
file_path = str.format(
    "{0}/resources/IPIRec/{1}_S.npy",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
)
if not os.path.exists(file_path):
    raise FileNotFoundError()
_S: np.ndarray = np.load(
    file=file_path,
)
file_path = str.format(
    "{0}/resources/IPIRec/{1}_W.npy",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
)
_W: np.ndarray = np.load(
    file=file_path,
)
estimator.model.arr_tags_score = _S
estimator.arr_user_idx_to_weights = _W

# TRAIN(W)

## W(V)

In [None]:
_ITER = 100
# 39934.18908918701
# [P->V] 39940.724917418236 >> 39939.05102884622
__E = list()
for decision_type in [DecisionType.E_VIEW,]:
    for _ in range(_ITER):
        _L = estimator._personalization_(decision_type)
        print(f"[{_ + 1} | {_ITER}]: {_L}")
        __E.append(_L)
        if min(__E) < _L:
            __E.clear()
            break

## W(L)

In [None]:
_ITER = 100
# 10070.10165995471
# 10062.877589213442 >> 10069.284461862013
# 10062.394683458622
# local optima가 빈번함; -- objective function에 momentum 가해야할 듯함 (근데 그러려면 2차함수를 구해줘야함)
__E = list()
for decision_type in [DecisionType.E_LIKE,]:
    for _ in range(_ITER):
        _L = estimator._personalization_(decision_type)
        __E.append(_L)
        print(f"[{_ + 1} | {_ITER}]: {_L}")
        if min(__E) < _L:
            __E.clear()
            break

## W(P)

In [None]:
_ITER = 100
## 9959.982525242347
# 10004.336950298619 >> 10005.550312827972
# [P->S] 10005.334460052087
# 10002.76615999305
# V와 L보다 local optima가 더 빈번하므로, 목적함수를 좀 더 완화하도록 구성할 필요있음
__E = list()
for decision_type in [DecisionType.E_PURCHASE,]:
    for _ in range(_ITER):
        _L = estimator._personalization_(decision_type)
        __E.append(_L)
        print(f"[{_ + 1} | {_ITER}]: {_L}")
        if min(__E) < _L:
            __E.clear()
            break

# Obs. Corr(T)

## S

In [None]:
_tags_score: np.ndarray = copy.deepcopy(estimator.arr_tags_score)
plt.title(label="Tags scores", fontsize=8.0)

_min = np.min(_tags_score)
_max = np.max(_tags_score[_tags_score < 1.0])
_tag_names_list = list(estimator.tags_dict.keys())


"""
plt.xlabel(xlabel="Source tags name", fontsize=4.0)
plt.ylabel(ylabel="Target tags name", fontsize=4.0)
plt.xticks(fontsize=2.0)
plt.yticks(fontsize=2.0)
ax = sns.heatmap(
    data=_tags_score,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
)
"""

# """
tags_count = estimator.tags_count
for _ in range(tags_count):
    _tags_score[_][_] = 0.0
ax = sns.clustermap(
    _tags_score,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
    ## defaults
    # cbar_kws=dict(use_gridspec=False, location="top"),
    # cbar_pos=(0.02, 0.8, 0.05, 0.18),
    cbar_kws=dict(use_gridspec=False, location="top"),
    cbar_pos=(0.03, 0.85, 0.1, 0.01),
    ## (pos_x, pos_y, len_x, len_y)
)
ax.tick_params(axis="x", labelsize=2.0)
ax.tick_params(axis="y", labelsize=2.0)
# """

_fig_file_path = str.format(
    "{0}/trunk/obs/set{1}_cmap.svg",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
)
__fig_dir_path = os.path.dirname(_fig_file_path)
if not os.path.exists(__fig_dir_path):
    DirectoryPathValidator.mkdir(__fig_dir_path)

ax.figure.savefig(_fig_file_path)
# plt.show()

## W

In [None]:
## HIER_HEATMAP

_users_dist: np.ndarray = copy.deepcopy(estimator.arr_user_idx_to_weights)
user_id = 424169
plt.title(label=f"W(u) = {user_id}", fontsize=8.0)

uidx = estimator.user_id_to_idx[user_id]
if estimator.user_id_to_idx.get(user_id, -1) == -1:
    raise KeyError()

__OBS_W: np.ndarray = _users_dist[uidx]
_min = np.min(__OBS_W)
_max = np.max(__OBS_W[__OBS_W < 1.0])
_tag_names_list = list(estimator.tags_dict.keys())


"""
plt.xlabel(xlabel="Source tags name", fontsize=4.0)
plt.ylabel(ylabel="Target tags name", fontsize=4.0)
plt.xticks(fontsize=2.0)
plt.yticks(fontsize=2.0)
ax = sns.heatmap(
    data=__OBS_W,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
)
"""

# """
tags_count = estimator.tags_count
for _ in range(tags_count):
    __OBS_W[_][_] = 0.0
ax = sns.clustermap(
    __OBS_W,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
    ## defaults
    # cbar_kws=dict(use_gridspec=False, location="top"),
    # cbar_pos=(0.02, 0.8, 0.05, 0.18),
    cbar_kws=dict(use_gridspec=False, location="top"),
    cbar_pos=(0.03, 0.85, 0.1, 0.01),
    ## (pos_x, pos_y, len_x, len_y)
)
ax.tick_params(axis="x", labelsize=2.0)
ax.tick_params(axis="y", labelsize=2.0)
# """

_fig_file_path = str.format(
    "{0}/trunk/obs/set{1}_W_u{2}_cmap.svg",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    user_id,
)
__fig_dir_path = os.path.dirname(_fig_file_path)
if not os.path.exists(__fig_dir_path):
    DirectoryPathValidator.mkdir(__fig_dir_path)

ax.figure.savefig(_fig_file_path)
# plt.show()

In [None]:
## HEATMAP
_users_dist: np.ndarray = copy.deepcopy(estimator.arr_user_idx_to_weights)
user_id = 424169
plt.title(label=f"W(u) = {user_id}", fontsize=8.0)

uidx = estimator.user_id_to_idx[user_id]
if estimator.user_id_to_idx.get(user_id, -1) == -1:
    raise KeyError()

__OBS_W: np.ndarray = _users_dist[uidx]
_min = np.min(__OBS_W)
_max = np.max(__OBS_W[__OBS_W < 1.0])
_tag_names_list = list(estimator.tags_dict.keys())



# """
tags_count = estimator.tags_count
for _ in range(tags_count):
    __OBS_W[_][_] = 0.0

plt.xlabel(xlabel="Source tags name", fontsize=4.0)
plt.ylabel(ylabel="Target tags name", fontsize=4.0)
plt.xticks(fontsize=2.0)
plt.yticks(fontsize=2.0)
ax = sns.heatmap(
    data=__OBS_W,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
)
ax.tick_params(axis="x", labelsize=2.0)
ax.tick_params(axis="y", labelsize=2.0)
# """

_fig_file_path = str.format(
    "{0}/trunk/obs/set{1}_W_u{2}.svg",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    user_id,
)
__fig_dir_path = os.path.dirname(_fig_file_path)
if not os.path.exists(__fig_dir_path):
    DirectoryPathValidator.mkdir(__fig_dir_path)

ax.figure.savefig(_fig_file_path)
# plt.show()

## S * W(u)

In [None]:
## HIER_HEATMAP
user_id = 424169

_tags_score: np.ndarray = copy.deepcopy(estimator.arr_tags_score)
_tag_names_list = list(estimator.tags_dict.keys())

tags_count = estimator.tags_count
plt.title(label=f"S * W(u) = {user_id}", fontsize=8.0)

uidx = estimator.user_id_to_idx[user_id]
if estimator.user_id_to_idx.get(user_id, -1) == -1:
    raise KeyError()

_users_dist: np.ndarray = copy.deepcopy(estimator.arr_user_idx_to_weights)
__OBS_W: np.ndarray = _users_dist[uidx]

for _ in range(tags_count):
    _tags_score[_][_] = 0.0
    __OBS_W[_][_] = 0.0

_WS = _tags_score * __OBS_W
_min = np.min(_WS)
_max = np.max(_WS[_WS < 1.0])
ax = sns.clustermap(
    _WS,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
    ## defaults
    # cbar_kws=dict(use_gridspec=False, location="top"),
    # cbar_pos=(0.02, 0.8, 0.05, 0.18),
    cbar_kws=dict(use_gridspec=False, location="top"),
    cbar_pos=(0.03, 0.85, 0.1, 0.01),
    ## (pos_x, pos_y, len_x, len_y)
)
ax.tick_params(axis="x", labelsize=2.0)
ax.tick_params(axis="y", labelsize=2.0)
# """

_fig_file_path = str.format(
    "{0}/trunk/obs/set{1}_WS_u{2}_cmap.svg",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    user_id,
)
__fig_dir_path = os.path.dirname(_fig_file_path)
if not os.path.exists(__fig_dir_path):
    DirectoryPathValidator.mkdir(__fig_dir_path)

ax.figure.savefig(_fig_file_path)
# plt.show()

In [None]:

## HEATMAP
user_id = 424169
_tags_score: np.ndarray = copy.deepcopy(estimator.arr_tags_score)
plt.title(label=f"W * S ({user_id})", fontsize=8.0)

_min = np.min(_tags_score)
_max = np.max(_tags_score[_tags_score < 1.0])
_tag_names_list = list(estimator.tags_dict.keys())

tags_count = estimator.tags_count
_users_dist: np.ndarray = copy.deepcopy(estimator.arr_user_idx_to_weights)

uidx = estimator.user_id_to_idx[user_id]
if estimator.user_id_to_idx.get(user_id, -1) == -1:
    raise KeyError()

__OBS_W: np.ndarray = _users_dist[uidx]
_min = np.min(__OBS_W)
_max = np.max(__OBS_W[__OBS_W < 1.0])
_tag_names_list = list(estimator.tags_dict.keys())

for _ in range(tags_count):
    _tags_score[_][_] = 0.0
    __OBS_W[_][_] = 0.0

_WS = _tags_score * __OBS_W

plt.xlabel(xlabel="Source tags name", fontsize=4.0)
plt.ylabel(ylabel="Target tags name", fontsize=4.0)
plt.xticks(fontsize=2.0)
plt.yticks(fontsize=2.0)
ax = sns.heatmap(
    data=_WS,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
)
plt.show()

_fig_file_path = str.format(
    "{0}/trunk/obs/set{1}_WS_u{2}.svg",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    user_id,
)
__fig_dir_path = os.path.dirname(_fig_file_path)
if not os.path.exists(__fig_dir_path):
    DirectoryPathValidator.mkdir(__fig_dir_path)

ax.figure.savefig(_fig_file_path)
# plt.show()

# EVAL - Tags scores.

In [10]:
tags_scores_evaluator = TagsScoreRMSEEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[0],
)

tags_scores_evaluator.eval()

[IO] /home/taegyu/git_repo/ipirec/data/colley/test_2_like_list.csv


[LOAD] test_2_like_list.csv: 100%|██████████| 5590/5590 [00:00<00:00, 77670.75it/s]

Hits RMSE: 0.9985384348943671
ForAll RMSE: 0.9997043455887041





In [11]:
tags_scores_evaluator = TagsScoreRMSEEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[1],
)

tags_scores_evaluator.eval()

[IO] /home/taegyu/git_repo/ipirec/data/colley/test_2_purchase_list.csv


[LOAD] test_2_purchase_list.csv: 100%|██████████| 3087/3087 [00:00<00:00, 73054.10it/s]

Hits RMSE: 0.9999572988390121
ForAll RMSE: 0.9999947147357442





# Diff. of Tags Freq. - Rec. Items;

In [None]:
from rec_tags_freq import CosineItemsTagsFreqAddPenalty

rec_tags_freq_dist = CosineItemsTagsFreqAddPenalty()
evaluator = IRMetricsEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[0],
)
avg_cos_dist = rec_tags_freq_dist.tags_freq_distance(
    test_set=evaluator.TEST_SET_LIST,
    recommender=recommender
)
print(f"L: {avg_cos_dist}")

evaluator = IRMetricsEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[1],
)
avg_cos_dist = rec_tags_freq_dist.tags_freq_distance(
    test_set=evaluator.TEST_SET_LIST,
    recommender=recommender
)
print(f"P: {avg_cos_dist}")

# Clone -- estimator;

In [None]:
_dummy_estimator: AdjustedBiasedCorrelationEstimator = copy.deepcopy(estimator)