## DEF

In [None]:
import os
import sys
from IPython.display import clear_output

__FILE_DIR_PATH = os.path.dirname(__vsc_ipynb_file__) \
    if __IPYTHON__ \
    else os.path.dirname(__file__)
    
WORKSPACE_HOME = __FILE_DIR_PATH.replace(
    f"/{os.path.basename(__FILE_DIR_PATH)}", "")
WORKSPACE_HOME = WORKSPACE_HOME.replace("/trunk", "")
DATASET_DIR_HOME = f"{WORKSPACE_HOME}/data/colley"

print(WORKSPACE_HOME)
sys.path.append(WORKSPACE_HOME)

import copy

import numpy as np
from pandas import DataFrame
from tqdm import tqdm
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from core import *
from ipirec import *
from colley import *

# plt.rcParams["font.family"] = "AppleGothic"
plt.rcParams["font.family"] = "NanumGothic"
mpl.rcParams["axes.unicode_minus"] = False

## OPT & ALLOC

In [None]:
_FOLD_SET_ID = 2
top_n_conditions = [n for n in range(3, 37, 2)]

_TEST_SET_FILES_LIST = [
    str.format(
        "{0}/test_{1}_{2}_list.csv",
        DATASET_DIR_HOME,
        _FOLD_SET_ID,
        DecisionType.to_str(d),
    )
    for d in [
        DecisionType.E_LIKE,
        DecisionType.E_PURCHASE,
    ]
]

dataset = ColleyFilteredDataSet(dataset_dir_path=DATASET_DIR_HOME)
dataset._load_metadata_()
for decision_type in DecisionType:
    dataset.append_decisions(
        file_path=str.format(
            "{0}/train_{1}_{2}_list.csv",
            DATASET_DIR_HOME,
            _FOLD_SET_ID,
            DecisionType.to_str(decision_type),
        ),
        decision_type=decision_type,
    )
dataset.__id_index_mapping__()

In [None]:
user: UserEntity = dataset.user_dict[692466]
user.set_of_interest_tags

In [None]:
top_n_tags = 5

model_params = CorrelationModel.create_models_parameters(
    top_n_tags=top_n_tags,
    co_occur_items_threshold=4,
)
model = CorrelationModel(
    dataset=dataset,
    model_params=model_params,
)
model.analysis()

In [None]:
dataset.append_interest_tags()

In [None]:
frob_norm = 1.0
score_iters = 10
weight_iters = 5

estimator_params = AdjustedBiasedCorrelationEstimator.create_models_parameters(
    score_iterations=score_iters,
    score_learning_rate=10 ** -2,
    score_generalization=10 ** -4,
    weight_iterations=weight_iters,
    weight_learning_rate=10 ** -3,
    weight_generalization=1.0,
    frob_norm=frob_norm,
    default_voting=0.0,
)
estimator = AdjustedBiasedCorrelationEstimator(
    model=model,
    model_params=estimator_params,
    )

In [None]:
"""
for decision_type in DecisionType:
    for _ in tqdm(
        iterable=range(score_iters),
        desc=f"{DecisionType.to_str(decision_type)}",
        total=score_iters,
    ):
        _L = estimator._adjust_tags_corr_(
            decision_type=decision_type,
        )
        print(f"{_L}")
        # estimator._personalization_(
        #     target_decision=decision_type,
        # )
"""

## FIT

In [None]:
_uidx = estimator.user_id_to_idx.get(692466, -1)
if _uidx != -1:
    print(estimator.arr_users_tags_map[_uidx])

In [None]:
user: UserEntity = dataset.user_dict[692466]

In [None]:
_iidx = estimator.item_id_to_idx.get(472, -1)
if _iidx != -1:
    print(estimator.arr_items_tags_map[_iidx])

In [None]:
loss_log_list = list()
_ITER = 0
while True:
    _ITER += 1
    estimator.__append_biases__()
    _L = estimator._adjust_tags_corr_(DecisionType.E_VIEW)
    _S = np.std(estimator.arr_tags_score)
    _W = np.std(estimator.arr_user_idx_to_weights)
    print(
        str.format(
            "[{0} S] L: {1}\nS: {2}\nW: {3}",
                _ITER,
                _L,
                _S,
                _W,
                )
        )
    # __L = _L +(( _S + _W)**(2**-1))
    __L = _L +(_W**(2**-1))
    print(__L)
    _L = estimator._personalization_(DecisionType.E_VIEW)
    _S = np.std(estimator.arr_tags_score)
    _W = np.std(estimator.arr_user_idx_to_weights)
    # _S = np.sum(np.abs(estimator.arr_tags_score))
    # _W = np.sum(np.abs(estimator.arr_user_idx_to_weights))
    print(
        str.format(
            "[{0} W] L: {1}\nS: {2}\nW: {3}",
                _ITER,
                _L,
                _S,
                _W,
                )
        )
    # __L = _L +(( _S + _W)**(2**-1))
    __L = _L +(_W**(2**-1))
    print(__L)
    loss_log_list.append(_L)
    _min = min(loss_log_list)
    if _min < _L:
        _estimator: AdjustedBiasedCorrelationEstimator = copy.deepcopy(estimator)
        break
    
inst = BaseAction(user_id=692466, item_id=472)
inst = estimator._estimate_(inst)
inst.estimated_score

In [None]:
inst = BaseAction(user_id=692466, item_id=472)
inst = estimator._estimate_(inst)
inst.estimated_score

# TRAIN(S)

## S(V)

In [None]:
_ITER = 100
# 21881.16485981459
# 22906.855353528248 
# [P -> S] 22847.84812020912 >> 25977.561489833748 >> 25564.655994604418
decision_type = DecisionType.E_VIEW
__E = list()
for _ in tqdm(
        iterable=range(_ITER),
        desc=f"{DecisionType.to_str(decision_type)}",
        total=_ITER,
    ):
        _L = estimator._adjust_tags_corr_(
            decision_type=decision_type,
        )
        print(f"{_L}")
        __E.append(_L)
        if min(__E) < _L:
            __E.clear()
            break

## S(L)

In [None]:
_ITER = 100
# 7841.645698883881 >> 7956.079589877575
# 8582.921496806883
decision_type = DecisionType.E_LIKE
__E = list()
for _ in tqdm(
        iterable=range(_ITER),
        desc=f"{DecisionType.to_str(decision_type)}",
        total=_ITER,
    ):
        _L = estimator._adjust_tags_corr_(
            decision_type=decision_type,
        )
        print(f"{_L}")
        __E.append(_L)
        if min(__E) < _L:
            __E.clear()
            break

## S(P)

In [None]:
_ITER = 100
## 8901.497879940547
# 8959.261042943654 >> 9056.971447119242
# 9147.675861340429
# [P->S] 8965.367465171059 >> 9147.313206731318
# 9209.555742799665
__E = list()
decision_type = DecisionType.E_PURCHASE
for _ in tqdm(
        iterable=range(_ITER),
        desc=f"{DecisionType.to_str(decision_type)}",
        total=_ITER,
    ):
        if (__IPYTHON__) and (_ % 5 == 0):
            clear_output(wait=True)
        _L = estimator._adjust_tags_corr_(
            decision_type=decision_type,
        )
        print(f"{_L}")
        __E.append(_L)
        if min(__E) < _L:
            __E.clear()
            break

# Recommendation

In [None]:
recommender = ScoreBasedRecommender(
    estimator=estimator,
)
recommender.prediction()

In [None]:
"""
recommender = ELABasedRecommender(
    estimator=estimator,
)
recommender.prediction()
"""

# EVAL - Recommended items.

In [None]:
evaluator = IRMetricsEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[0],
)
evaluator.top_n_eval(
    top_n_conditions=top_n_conditions,
)
df: DataFrame = evaluator.evlautions_summary_df()
print(df)

In [None]:
evaluator = IRMetricsEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[1],
)
evaluator.top_n_eval(
    top_n_conditions=top_n_conditions,
)
df: DataFrame = evaluator.evlautions_summary_df()
print(df)

# RAND >> W

In [None]:
tags_count = estimator.tags_count
users_count = estimator.users_count

_RAND_W: np.ndarray = np.random.rand(users_count, tags_count, tags_count)
print(_RAND_W)
estimator.arr_user_idx_to_weights = _RAND_W

In [None]:
estimator.model.arr_tags_score = np.tanh(estimator.arr_tags_score)
estimator.arr_user_idx_to_weights = np.tanh(estimator.arr_user_idx_to_weights)

In [None]:
_S: np.ndarray = copy.deepcopy(estimator.arr_tags_score)
_W: np.ndarray = copy.deepcopy(estimator.arr_user_idx_to_weights)

# _S = np.tanh(_S)
_S
# model.arr_tags_score = _S
# estimator.model.arr_tags_score = _S

In [None]:
# _W = np.tanh(_W)
_W

# [IO] np.ndarray -- S, W

## S, W >> 

In [None]:
dt_str = DirectoryPathValidator.current_datetime_str()

file_path = str.format(
    "{0}/resources/IPIRec/{1}_S_{2}.npy",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    dt_str,
)
__dir_path = os.path.dirname(file_path)
if not os.path.exists(__dir_path):
    DirectoryPathValidator.mkdir(__dir_path)

with open(file=file_path, mode="wb") as fout:
    np.save(fout, estimator.model.arr_tags_score)
    fout.close()

file_path = str.format(
    "{0}/resources/IPIRec/{1}_W_{2}.npy",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    dt_str,
)
with open(file=file_path, mode="wb") as fout:
    np.save(fout, estimator.arr_user_idx_to_weights)
    fout.close()

## >> S, W

In [None]:
file_path = str.format(
    "{0}/resources/IPIRec/{1}_S.npy",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
)
if not os.path.exists(file_path):
    raise FileNotFoundError()
_S: np.ndarray = np.load(
    file=file_path,
)
file_path = str.format(
    "{0}/resources/IPIRec/{1}_W.npy",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
)
_W: np.ndarray = np.load(
    file=file_path,
)
estimator.model.arr_tags_score = _S
estimator.arr_user_idx_to_weights = _W

# TRAIN(W)

## W(V)

In [None]:
_ITER = 100
# 39934.18908918701
# [P->V] 39940.724917418236 >> 39939.05102884622
__E = list()
for decision_type in [DecisionType.E_VIEW,]:
    for _ in range(_ITER):
        _L = estimator._personalization_(decision_type)
        print(f"[{_ + 1} | {_ITER}]: {_L}")
        __E.append(_L)
        if min(__E) < _L:
            __E.clear()
            break

## W(L)

In [None]:
_ITER = 100
# 10070.10165995471
# 10062.877589213442 >> 10069.284461862013
# 10062.394683458622
# local optima가 빈번함; -- objective function에 momentum 가해야할 듯함 (근데 그러려면 2차함수를 구해줘야함)
__E = list()
for decision_type in [DecisionType.E_LIKE,]:
    for _ in range(_ITER):
        _L = estimator._personalization_(decision_type)
        __E.append(_L)
        print(f"[{_ + 1} | {_ITER}]: {_L}")
        if min(__E) < _L:
            __E.clear()
            break

## W(P)

In [None]:
_ITER = 100
## 9959.982525242347
# 10004.336950298619 >> 10005.550312827972
# [P->S] 10005.334460052087
# 10002.76615999305
# V와 L보다 local optima가 더 빈번하므로, 목적함수를 좀 더 완화하도록 구성할 필요있음
__E = list()
for decision_type in [DecisionType.E_PURCHASE,]:
    for _ in range(_ITER):
        _L = estimator._personalization_(decision_type)
        __E.append(_L)
        print(f"[{_ + 1} | {_ITER}]: {_L}")
        if min(__E) < _L:
            __E.clear()
            break

# Obs. Corr(T)

## S

In [None]:
_tags_score: np.ndarray = copy.deepcopy(estimator.arr_tags_score)
plt.title(label="Tags scores", fontsize=8.0)

_min = np.min(_tags_score)
_max = np.max(_tags_score[_tags_score < 1.0])
_tag_names_list = list(estimator.tags_dict.keys())


"""
plt.xlabel(xlabel="Source tags name", fontsize=4.0)
plt.ylabel(ylabel="Target tags name", fontsize=4.0)
plt.xticks(fontsize=2.0)
plt.yticks(fontsize=2.0)
ax = sns.heatmap(
    data=_tags_score,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
)
"""

# """
tags_count = estimator.tags_count
for _ in range(tags_count):
    _tags_score[_][_] = 0.0
ax = sns.clustermap(
    _tags_score,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
    ## defaults
    # cbar_kws=dict(use_gridspec=False, location="top"),
    # cbar_pos=(0.02, 0.8, 0.05, 0.18),
    cbar_kws=dict(use_gridspec=False, location="top"),
    cbar_pos=(0.03, 0.85, 0.1, 0.01),
    ## (pos_x, pos_y, len_x, len_y)
)
ax.tick_params(axis="x", labelsize=2.0)
ax.tick_params(axis="y", labelsize=2.0)
# """

_fig_file_path = str.format(
    "{0}/trunk/obs/set{1}_cmap.svg",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
)
__fig_dir_path = os.path.dirname(_fig_file_path)
if not os.path.exists(__fig_dir_path):
    DirectoryPathValidator.mkdir(__fig_dir_path)

ax.figure.savefig(_fig_file_path)
# plt.show()

## W

In [None]:
## HIER_HEATMAP

_users_dist: np.ndarray = copy.deepcopy(estimator.arr_user_idx_to_weights)
user_id = 424169
plt.title(label=f"W(u) = {user_id}", fontsize=8.0)

uidx = estimator.user_id_to_idx[user_id]
if estimator.user_id_to_idx.get(user_id, -1) == -1:
    raise KeyError()

__OBS_W: np.ndarray = _users_dist[uidx]
_min = np.min(__OBS_W)
_max = np.max(__OBS_W[__OBS_W < 1.0])
_tag_names_list = list(estimator.tags_dict.keys())


"""
plt.xlabel(xlabel="Source tags name", fontsize=4.0)
plt.ylabel(ylabel="Target tags name", fontsize=4.0)
plt.xticks(fontsize=2.0)
plt.yticks(fontsize=2.0)
ax = sns.heatmap(
    data=__OBS_W,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
)
"""

# """
tags_count = estimator.tags_count
for _ in range(tags_count):
    __OBS_W[_][_] = 0.0
ax = sns.clustermap(
    __OBS_W,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
    ## defaults
    # cbar_kws=dict(use_gridspec=False, location="top"),
    # cbar_pos=(0.02, 0.8, 0.05, 0.18),
    cbar_kws=dict(use_gridspec=False, location="top"),
    cbar_pos=(0.03, 0.85, 0.1, 0.01),
    ## (pos_x, pos_y, len_x, len_y)
)
ax.tick_params(axis="x", labelsize=2.0)
ax.tick_params(axis="y", labelsize=2.0)
# """

_fig_file_path = str.format(
    "{0}/trunk/obs/set{1}_W_u{2}_cmap.svg",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    user_id,
)
__fig_dir_path = os.path.dirname(_fig_file_path)
if not os.path.exists(__fig_dir_path):
    DirectoryPathValidator.mkdir(__fig_dir_path)

ax.figure.savefig(_fig_file_path)
# plt.show()

In [None]:
## HEATMAP
_users_dist: np.ndarray = copy.deepcopy(estimator.arr_user_idx_to_weights)
user_id = 424169
plt.title(label=f"W(u) = {user_id}", fontsize=8.0)

uidx = estimator.user_id_to_idx[user_id]
if estimator.user_id_to_idx.get(user_id, -1) == -1:
    raise KeyError()

__OBS_W: np.ndarray = _users_dist[uidx]
_min = np.min(__OBS_W)
_max = np.max(__OBS_W[__OBS_W < 1.0])
_tag_names_list = list(estimator.tags_dict.keys())



# """
tags_count = estimator.tags_count
for _ in range(tags_count):
    __OBS_W[_][_] = 0.0

plt.xlabel(xlabel="Source tags name", fontsize=4.0)
plt.ylabel(ylabel="Target tags name", fontsize=4.0)
plt.xticks(fontsize=2.0)
plt.yticks(fontsize=2.0)
ax = sns.heatmap(
    data=__OBS_W,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
)
ax.tick_params(axis="x", labelsize=2.0)
ax.tick_params(axis="y", labelsize=2.0)
# """

_fig_file_path = str.format(
    "{0}/trunk/obs/set{1}_W_u{2}.svg",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    user_id,
)
__fig_dir_path = os.path.dirname(_fig_file_path)
if not os.path.exists(__fig_dir_path):
    DirectoryPathValidator.mkdir(__fig_dir_path)

ax.figure.savefig(_fig_file_path)
# plt.show()

## S * W(u)

In [None]:
## HIER_HEATMAP
user_id = 424169

_tags_score: np.ndarray = copy.deepcopy(estimator.arr_tags_score)
_tag_names_list = list(estimator.tags_dict.keys())

tags_count = estimator.tags_count
plt.title(label=f"S * W(u) = {user_id}", fontsize=8.0)

uidx = estimator.user_id_to_idx[user_id]
if estimator.user_id_to_idx.get(user_id, -1) == -1:
    raise KeyError()

_users_dist: np.ndarray = copy.deepcopy(estimator.arr_user_idx_to_weights)
__OBS_W: np.ndarray = _users_dist[uidx]

for _ in range(tags_count):
    _tags_score[_][_] = 0.0
    __OBS_W[_][_] = 0.0

_WS = _tags_score * __OBS_W
_min = np.min(_WS)
_max = np.max(_WS[_WS < 1.0])
ax = sns.clustermap(
    _WS,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
    ## defaults
    # cbar_kws=dict(use_gridspec=False, location="top"),
    # cbar_pos=(0.02, 0.8, 0.05, 0.18),
    cbar_kws=dict(use_gridspec=False, location="top"),
    cbar_pos=(0.03, 0.85, 0.1, 0.01),
    ## (pos_x, pos_y, len_x, len_y)
)
ax.tick_params(axis="x", labelsize=2.0)
ax.tick_params(axis="y", labelsize=2.0)
# """

_fig_file_path = str.format(
    "{0}/trunk/obs/set{1}_WS_u{2}_cmap.svg",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    user_id,
)
__fig_dir_path = os.path.dirname(_fig_file_path)
if not os.path.exists(__fig_dir_path):
    DirectoryPathValidator.mkdir(__fig_dir_path)

ax.figure.savefig(_fig_file_path)
# plt.show()

In [None]:

## HEATMAP
user_id = 424169
_tags_score: np.ndarray = copy.deepcopy(estimator.arr_tags_score)
plt.title(label=f"W * S ({user_id})", fontsize=8.0)

_min = np.min(_tags_score)
_max = np.max(_tags_score[_tags_score < 1.0])
_tag_names_list = list(estimator.tags_dict.keys())

tags_count = estimator.tags_count
_users_dist: np.ndarray = copy.deepcopy(estimator.arr_user_idx_to_weights)

uidx = estimator.user_id_to_idx[user_id]
if estimator.user_id_to_idx.get(user_id, -1) == -1:
    raise KeyError()

__OBS_W: np.ndarray = _users_dist[uidx]
_min = np.min(__OBS_W)
_max = np.max(__OBS_W[__OBS_W < 1.0])
_tag_names_list = list(estimator.tags_dict.keys())

for _ in range(tags_count):
    _tags_score[_][_] = 0.0
    __OBS_W[_][_] = 0.0

_WS = _tags_score * __OBS_W

plt.xlabel(xlabel="Source tags name", fontsize=4.0)
plt.ylabel(ylabel="Target tags name", fontsize=4.0)
plt.xticks(fontsize=2.0)
plt.yticks(fontsize=2.0)
ax = sns.heatmap(
    data=_WS,
    vmin=_min,
    vmax=_max,
    cmap="Grays",
    xticklabels=_tag_names_list,
    yticklabels=_tag_names_list,
)
plt.show()

_fig_file_path = str.format(
    "{0}/trunk/obs/set{1}_WS_u{2}.svg",
    WORKSPACE_HOME,
    _FOLD_SET_ID,
    user_id,
)
__fig_dir_path = os.path.dirname(_fig_file_path)
if not os.path.exists(__fig_dir_path):
    DirectoryPathValidator.mkdir(__fig_dir_path)

ax.figure.savefig(_fig_file_path)
# plt.show()

# EVAL - Tags scores.

In [None]:
tags_scores_evaluator = TagsScoreRMSEEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[0],
)

tags_scores_evaluator.eval()

In [None]:
tags_scores_evaluator = TagsScoreRMSEEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[1],
)

tags_scores_evaluator.eval()

# Diff. of Tags Freq. - Rec. Items;

In [None]:
from rec_tags_freq import CosineItemsTagsFreqAddPenalty

rec_tags_freq_dist = CosineItemsTagsFreqAddPenalty()
evaluator = IRMetricsEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[0],
)
avg_cos_dist = rec_tags_freq_dist.tags_freq_distance(
    test_set=evaluator.TEST_SET_LIST,
    recommender=recommender
)
print(f"L: {avg_cos_dist}")

evaluator = IRMetricsEvaluator(
    recommender=recommender,
    file_path=_TEST_SET_FILES_LIST[1],
)
avg_cos_dist = rec_tags_freq_dist.tags_freq_distance(
    test_set=evaluator.TEST_SET_LIST,
    recommender=recommender
)
print(f"P: {avg_cos_dist}")

# Clone -- estimator;

In [None]:
_dummy_estimator: AdjustedBiasedCorrelationEstimator = copy.deepcopy(estimator)