In [26]:
import numpy as np
import pandas as pd
from news_recommendation.utils import get_topic_list, read_json, load_sparse, NPMI
from pathlib import Path
from collections import defaultdict


In [31]:
def get_mean_std(values, r: int = 2):
    return f"{np.round(np.mean(values), r)}" + u"\u00B1" + f"{np.round(np.std(values), r)}"
def cal_random_score(wd_path, ref_data_path, run_num=5):
    word_dict = read_json(wd_path)
    reverse_dict = {v: k for k, v in word_dict.items()}
    top_n = 10
    random_scores = defaultdict(lambda: [])
    for _ in range(run_num):
        for topic_num in [10, 50, 100, 200]:
            topic_dist = np.random.randn(topic_num, len(word_dict))
            topic_list = get_topic_list(topic_dist, top_n, reverse_dict)
            ref_texts = load_sparse(ref_data_path)
            scorer = NPMI((ref_texts > 0).astype(int))
            topic_index = [[word_dict[word] - 1 for word in topic] for topic in topic_list]
            topic_scores = {"c_npmi": scorer.compute_npmi(topics=topic_index, n=top_n)}
            random_scores[topic_num].append(np.mean(topic_scores["c_npmi"]))
    return pd.DataFrame(random_scores)

In [32]:
wd_40910_path = Path(r"C:\Users\Rui\Documents\Explainable_AI\bi_attention\dataset\utils\word_dict\MIND_large_40910.json")
ref_40910_path = Path(r"C:\Users\Rui\Documents\Explainable_AI\bi_attention\dataset\data\processed\MIND-large-original\train.dtm.npz")
mean_npmi_40910 = cal_random_score(wd_40910_path, ref_40910_path)

In [33]:
wd_32774_path = Path(
    r"C:\Users\Rui\Documents\Explainable_AI\bi_attention\dataset\utils\word_dict\MIND_large_32774.json")
ref_32774_path = Path(
    r"C:\Users\Rui\Documents\Explainable_AI\bi_attention\dataset\data\processed\MIND-large-10\train.dtm.npz")
mean_npmi_32774 = cal_random_score(wd_32774_path, ref_32774_path)

In [36]:
mean_npmi_40910.mean(axis=0).to_excel("mean_npmi_40910.xlsx")

In [37]:
mean_npmi_32774.mean(axis=0).to_excel("mean_npmi_32774.xlsx")
