In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
import os, json
import numpy as np

if not os.getcwd().endswith("src"):
    os.chdir("src")
    print("jumping into src")

from utils.data.data_module import DataModule
from utils.data.testbench import TestBench
from recommender.run_pipeline import Models

if os.getcwd().endswith("src"):
    os.chdir("..")
    print("jumping out of src")

args = {
    "output_dir": "models/popularity",
    "dataset_config": "configs/datasets/masked_is_negative.json",
    "model_config": "configs/twotower/2_user_id_anime_id_title_fresh.json",
    "model": "tower",
    "should_return_ids": True,
}
dataset_config = {}
model_config = {}
output_dir = args["output_dir"]
if args["dataset_config"]:
    with open(args["dataset_config"], "r") as f:
        dataset_config_2 = json.load(f)
        dataset_config.update(dataset_config_2)
if args["model_config"]:
    with open(args["model_config"], "r") as f:
        model_config = json.load(f)

os.makedirs(output_dir, exist_ok=True)
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"

jumping into src
jumping out of src


In [10]:
datamodule = DataModule(**dataset_config)
testbench = TestBench(
    datamodule, should_return_ids=args.get("should_return_ids", False)
)

Parsing animes...: 100%|██████████| 12294/12294 [00:00<00:00, 19828.60it/s]
Parsing users...: 100%|██████████| 73515/73515 [00:39<00:00, 1871.48it/s]
Resetting Train to k=0 ...: 100%|██████████| 48669/48669 [00:14<00:00, 3306.90it/s]

Number of Users: 54077, Hash[:8]: 9f0cd3, Hash: 9f0cd3119bd9ee7279856737c33aebb8
Total Animes: 12294, Total Users: 54077





In [None]:
auxiliary_args = {
    "n_users": datamodule.max_user_count,
    "n_anime": datamodule.max_anime_count,
}
model_config = model_config | auxiliary_args
model = Models.from_string(args["model"].upper())(datamodule=datamodule, **model_config)

model.train()

metrics = testbench.full_evaluation(model)
with open(os.path.join(output_dir, "output.txt"), "w") as f:
    for k, v in metrics.items():
        if type(v) == np.ndarray:
            continue
        f.write(f"{k}: {v}\n")

Epoch 1 / 100


In [None]:
scores = metrics["scores"]
shows = np.argsort(-scores, axis=1)
print(f"{scores.shape=}")
test_cuids = datamodule.test_cuids
first = test_cuids[0]
print(first)

scores.shape=(5408, 12294)
48669


In [None]:
model.dataset[0]

([tensor([0])],
 [tensor([2855, 4551,  760, 1087, 3978, 3560,  425, 3337,  449, 2620])],
 [tensor([ 1469,   596,  4644,  9903,   105,  7615,  5438,  4645,   287,  7656,
          10739,  5982,  5852,  2715,  6476,  5459,  9709,  2710,  9693,  3762])])

In [None]:
print(f"Get positive recommendations")
positives_for_first = datamodule.canonical_user_mapping[first].preserved_cais
print(f"Average score: {scores[0].mean()}")
print(f"Average scores of positive: {scores[0][positives_for_first].mean()}")
shows_in_preserved = np.isin(shows[0], positives_for_first)
ranks = np.nonzero(shows_in_preserved)[0]
print(f"Ranks: {ranks}")

Get positive recommendations
Average score: -0.023083681240677834
Average scores of positive: 0.2732205390930176
Ranks: [  942  1053  1121  1182  1508  1638  1653  1738  1810  1882  1986  2258
  2468  2603  2701  2722  2939  3039  3373  3445  3721  3749  3767  3851
  4314  4431  4539  4561  4678  4865  5073  5397  5399  5450  5525  5787
  6015  6149  6237  6379  6436  6847  7479  7493  7703  7724  8118  8174
  8190  8866  8905  9056  9212  9428  9566  9730  9928  9956 10371 10379
 10544 11243 11678 11712 11933 12157]


In [None]:
for i in range(5):
    print(f"User {i}")
    for j in range(5):
        print(
            f"[{j}] - [{scores[i, shows[i,j]]}]: {datamodule.canonical_anime_mapping[shows[i, j]].name}, ",
            end="",
        )
    print

User 0
[0] - [13.575464248657227]: Rockman.EXE Stream, [1] - [13.480500221252441]: Kumori Nochi Hare, [2] - [12.464479446411133]: Hwanggeum Cheolin, [3] - [11.939308166503906]: Himawari no You ni, [4] - [11.854948997497559]: Aniyome, User 1
[0] - [13.505234718322754]: Nyamen: Tenkai Daiichi Joshi Koukou Bunka Matsuri Tokubetsu Eizou, [1] - [12.39642333984375]: Ashita no Joe 2 (Movie), [2] - [12.390143394470215]: Angelium, [3] - [12.329965591430664]: Kizuoibito, [4] - [12.233551979064941]: Sanrio Anime Sekai Meisaku Gekijou, User 2
[0] - [11.632704734802246]: Ghost Messenger Movie, [1] - [10.786734580993652]: Tondera House no Daibouken, [2] - [10.354955673217773]: Urusei Yatsura Movie 6: Itsudatte My Darling, [3] - [10.296066284179688]: Cofun Gal no Coffy: Cofunderella, [4] - [10.26324462890625]: Soul Worker: Your Destiny Awaits, User 3
[0] - [14.320096015930176]: Kaijuu no Ballad, [1] - [13.771485328674316]: Fuyu no Yoru no Ohanashi, [2] - [13.52220630645752]: Sansha Sanyou, [3] - [13.

48669


In [9]:
print(len(datamodule.canonical_user_mapping[first].preserved_cais))

66


In [15]:
print(f"{np.isinf(scores[0]).sum()=}")
print(f"{len(scores[0])=}")

np.isinf(scores[0]).sum()=np.int64(12294)
len(scores[0])=12294
