# Validate action_level_novelty writer id

- Amazon writer id in total -> 310
- KNOWN writer id: max -> 100, min -> 1
- UNKNOWN writer id: max -> 310, min -> 101
- SHIPPING writer id : max -> 1034, min -> 311
- STORY writer id: max -> 1079, min -> 1035

In [21]:
import json
import numpy as np

In [22]:
def load_shipping_writer_id_set():
    # all the unknown writer are in the shipping review dataset
    reviewer_id_stats_file = "./shipping_review_input/shipping_reviewer_stats.txt"
    unknown_writer_id_set = set()
    with open(reviewer_id_stats_file, mode="r") as fin:
        for line in fin:
            line = line.strip()
            parts = line.split()
            unknown_writer_id_set.add(parts[0])
        # endfor
    # endwith
    return unknown_writer_id_set


def load_story_writer_id_set():
    # all the unknown writer are in the shipping review dataset
    reviewer_id_stats_file = "../5_Novelist_Dataset/output/author_to_num_dict.txt"
    unknown_writer_id_set = set()
    with open(reviewer_id_stats_file, mode="r") as fin:
        for line in fin:
            line = line.strip()
            parts = line.split()
            unknown_writer_id_set.add(parts[0])
        # endfor
    # endwith
    return unknown_writer_id_set

In [23]:
known_writer_num = 100
known_writer_review_num = 40

# get all_writer_id_to_num_dict
with open(
        f"./output/writer_{known_writer_num}_review_{known_writer_review_num}/writer_id_to_num_mapping_dict.json",
        mode="r") as fin:
    all_writer_id_to_num_dict = json.load(fin)
# endwith

In [24]:
print(f"All Amazon review writer id size: {len(all_writer_id_to_num_dict)}")

All Amazon review writer id size: 310


In [25]:
# known writer id set
with open("./output/writer_100_review_40/known_writer_id_list.json", mode="r") as fin:
    known_writer_list = json.load(fin)
#with

# unknown writer id set
with open("./output/writer_100_review_40/unknown_writer_id_list.json", mode="r") as fin:
    unknown_writer_list = json.load(fin)
#endwith

# ########## known writer ############
known_writer_id_set = set()
for writer_str in known_writer_list:
    known_writer_id_set.add(all_writer_id_to_num_dict[writer_str])
#end
known_writer_id_arr = np.array(list(known_writer_id_set))
print(f"KNOWN writer id: max -> {np.amax(known_writer_id_arr)}, min -> {np.amin(known_writer_id_arr)}")

# ########## unknown writer ##########
unknown_writer_id_set = set()
for writer_str in unknown_writer_list:
    unknown_writer_id_set.add(all_writer_id_to_num_dict[writer_str])
#endfor
unknown_writer_id_arr = np.array(list(unknown_writer_id_set))
print(f"UNKNOWN writer id: max -> {np.amax(unknown_writer_id_arr)}, min -> {np.amin(unknown_writer_id_arr)}")


KNOWN writer id: max -> 100, min -> 1
UNKNOWN writer id: max -> 310, min -> 101


In [26]:
# ####################### add more writer mapping to dict #######################
# (1) add writer for shipping service
# (2) add writer for story novelist

shipping_review_writer_id_set = load_shipping_writer_id_set()
story_writer_id_set = load_story_writer_id_set()

for tmp_review_id in shipping_review_writer_id_set:
    all_writer_id_to_num_dict[tmp_review_id] = len(
        all_writer_id_to_num_dict) + 1
# endfor

for tmp_review_id in story_writer_id_set:
    all_writer_id_to_num_dict[tmp_review_id] = len(
        all_writer_id_to_num_dict) + 1
# endfor


# ########## shipping writer range ##########
shipping_writer_num_set = set()
for writer_str in shipping_review_writer_id_set:
    shipping_writer_num_set.add(all_writer_id_to_num_dict[writer_str])
#endfor
shipping_writer_num_arr = np.array(list(shipping_writer_num_set))
print(f"SHIPPING writer id : max -> {np.amax(shipping_writer_num_arr)}, min -> {np.amin(shipping_writer_num_arr)}")

# ########## story writer range ###########
story_writer_num_set = set()
for writer_str in story_writer_id_set:
    story_writer_num_set.add(all_writer_id_to_num_dict[writer_str])
#endfor
story_writer_num_arr = np.array(list(story_writer_num_set))
print(f"STORY writer id: max -> {np.amax(story_writer_num_arr)}, min -> {np.amin(story_writer_num_arr)}")



SHIPPING writer id : max -> 1034, min -> 311
STORY writer id: max -> 1079, min -> 1035
