In [48]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [63]:
import os, json
import numpy as np

if not os.getcwd().endswith("src"):
    os.chdir("src")
    print("jumping into src")

from utils.data.data_module import DataModule
from utils.data.testbench import TestBench
from recommender.run_pipeline import Models

if os.getcwd().endswith("src"):
    os.chdir("..")
    print("jumping out of src")

args = {
    "output_dir": "models/popularity",
    "dataset_config": "configs/datasets/id_dataset.json",
    "model_config": "configs/twotower/fresh_user_embedder.json",
    "model": "tower",
    "should_return_ids": True,
}
dataset_config = {}
model_config = {}
output_dir = args["output_dir"]
if args["dataset_config"]:
    with open(args["dataset_config"], "r") as f:
        dataset_config_2 = json.load(f)
        dataset_config.update(dataset_config_2)
if args["model_config"]:
    with open(args["model_config"], "r") as f:
        model_config = json.load(f)

os.makedirs(output_dir, exist_ok=True)
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"

jumping into src
jumping out of src


In [50]:
datamodule = DataModule(**dataset_config)
testbench = TestBench(
    datamodule, should_return_ids=args.get("should_return_ids", False)
)

Parsing animes...: 100%|██████████| 12294/12294 [00:01<00:00, 10675.44it/s]
Parsing users...: 100%|██████████| 73515/73515 [00:50<00:00, 1447.37it/s]
Resetting Train to k=0 ...: 100%|██████████| 48669/48669 [00:35<00:00, 1388.03it/s]

Number of Users: 54077, Hash[:8]: 9f0cd3, Hash: 9f0cd3119bd9ee7279856737c33aebb8
Total Animes: 12294, Total Users: 54077





In [65]:
auxiliary_args = {
    "n_users": datamodule.max_user_count,
    "n_anime": datamodule.max_anime_count,
}
model_config = model_config | auxiliary_args
model = Models.from_string(args["model"].upper())(datamodule=datamodule, **model_config)

model.train()

metrics = testbench.full_evaluation(model)
with open(os.path.join(output_dir, "output.txt"), "w") as f:
    for k, v in metrics.items():
        if type(v) == np.ndarray:
            continue
        f.write(f"{k}: {v}\n")

Epoch 1 / 20


L:0.804, +:0.001, -:0.003: 100%|██████████| 106/106 [00:08<00:00, 12.04it/s] 


[0] Loss: 0.8056946632997046
Epoch 2 / 20


L:0.789, +:0.043, -:-0.002: 100%|██████████| 106/106 [00:08<00:00, 12.51it/s]


[1] Loss: 0.7900619304405069
Epoch 3 / 20


L:0.770, +:0.098, -:-0.012: 100%|██████████| 106/106 [00:09<00:00, 11.51it/s]


[2] Loss: 0.7736590566500176
Epoch 4 / 20


L:0.747, +:0.179, -:-0.010: 100%|██████████| 106/106 [00:08<00:00, 11.97it/s]


[3] Loss: 0.7533867376030616
Epoch 5 / 20


L:0.716, +:0.300, -:-0.021: 100%|██████████| 106/106 [00:09<00:00, 11.63it/s]


[4] Loss: 0.7231887910725937
Epoch 6 / 20


L:0.669, +:0.486, -:-0.053: 100%|██████████| 106/106 [00:09<00:00, 11.61it/s]


[5] Loss: 0.6787641380193099
Epoch 7 / 20


L:0.590, +:0.819, -:-0.156: 100%|██████████| 106/106 [00:08<00:00, 11.93it/s]


[6] Loss: 0.6082205918599974
Epoch 8 / 20


L:0.505, +:1.230, -:-0.320: 100%|██████████| 106/106 [00:09<00:00, 11.68it/s]


[7] Loss: 0.5214384774554449
Epoch 9 / 20


L:0.418, +:1.669, -:-0.602: 100%|██████████| 106/106 [00:08<00:00, 12.85it/s]


[8] Loss: 0.43429486369186976
Epoch 10 / 20


L:0.354, +:2.040, -:-0.938: 100%|██████████| 106/106 [00:08<00:00, 12.56it/s]


[9] Loss: 0.3667648116934975
Epoch 11 / 20


L:0.313, +:2.321, -:-1.261: 100%|██████████| 106/106 [00:09<00:00, 11.75it/s]


[10] Loss: 0.3216134517822627
Epoch 12 / 20


L:0.291, +:2.489, -:-1.529: 100%|██████████| 106/106 [00:08<00:00, 12.20it/s]


[11] Loss: 0.29469611734714146
Epoch 13 / 20


L:0.271, +:2.572, -:-1.786: 100%|██████████| 106/106 [00:09<00:00, 11.70it/s]


[12] Loss: 0.27714147798295286
Epoch 14 / 20


L:0.267, +:2.606, -:-1.946: 100%|██████████| 106/106 [00:08<00:00, 12.11it/s]


[13] Loss: 0.265396366844762
Epoch 15 / 20


L:0.253, +:2.636, -:-2.126: 100%|██████████| 106/106 [00:09<00:00, 11.58it/s]


[14] Loss: 0.25502855049551654
Epoch 16 / 20


L:0.245, +:2.649, -:-2.257: 100%|██████████| 106/106 [00:09<00:00, 11.60it/s]


[15] Loss: 0.24672871083021164
Epoch 17 / 20


L:0.235, +:2.660, -:-2.391: 100%|██████████| 106/106 [00:09<00:00, 11.61it/s]


[16] Loss: 0.23890111215834348
Epoch 18 / 20


L:0.233, +:2.667, -:-2.481: 100%|██████████| 106/106 [00:09<00:00, 11.60it/s]


[17] Loss: 0.23220976287463926
Epoch 19 / 20


L:0.225, +:2.677, -:-2.601: 100%|██████████| 106/106 [00:08<00:00, 11.92it/s]


[18] Loss: 0.2250507225124341
Epoch 20 / 20


L:0.215, +:2.678, -:-2.706: 100%|██████████| 106/106 [00:09<00:00, 11.68it/s]


[19] Loss: 0.21780629399812446
Saving model to models/twotower/user_id__anime_id_fresh.pt
Start Time: 2024-11-12 17:33:40


User Embeddings...: 11it [00:00, 1222.05it/s]
Anime Embeddings...: 100%|██████████| 25/25 [00:00<00:00, 223.21it/s]


user_embeddings.shape=(5408, 256) anime_embeddings.shape=(256, 12294)
Commence God Operation
Commence Big Sort Energy
End Time: 2024-11-12 17:33:42
This model took 1.7963 seconds.
Out of an optimal score of 1.0, you scored 0.0606.
Your DEI score is 8890.3589.
Your Pseudo-IOU score is 0.0141.


In [None]:
scores = metrics["scores"]
shows = np.argsort(-scores, axis=1)
print(f"{scores.shape=}")
test_cuids = datamodule.test_cuids
first = test_cuids[0]
print(first)

scores.shape=(5408, 12294)
48669


In [None]:
model.dataset[0]

([tensor([0])],
 [tensor([2855, 4551,  760, 1087, 3978, 3560,  425, 3337,  449, 2620])],
 [tensor([ 1469,   596,  4644,  9903,   105,  7615,  5438,  4645,   287,  7656,
          10739,  5982,  5852,  2715,  6476,  5459,  9709,  2710,  9693,  3762])])

In [None]:
print(f"Get positive recommendations")
positives_for_first = datamodule.canonical_user_mapping[first].preserved_cais
print(f"Average score: {scores[0].mean()}")
print(f"Average scores of positive: {scores[0][positives_for_first].mean()}")
shows_in_preserved = np.isin(shows[0], positives_for_first)
ranks = np.nonzero(shows_in_preserved)[0]
print(f"Ranks: {ranks}")

Get positive recommendations
Average score: -0.023083681240677834
Average scores of positive: 0.2732205390930176
Ranks: [  942  1053  1121  1182  1508  1638  1653  1738  1810  1882  1986  2258
  2468  2603  2701  2722  2939  3039  3373  3445  3721  3749  3767  3851
  4314  4431  4539  4561  4678  4865  5073  5397  5399  5450  5525  5787
  6015  6149  6237  6379  6436  6847  7479  7493  7703  7724  8118  8174
  8190  8866  8905  9056  9212  9428  9566  9730  9928  9956 10371 10379
 10544 11243 11678 11712 11933 12157]


In [None]:
for i in range(5):
    print(f"User {i}")
    for j in range(5):
        print(
            f"[{j}] - [{scores[i, shows[i,j]]}]: {datamodule.canonical_anime_mapping[shows[i, j]].name}, ",
            end="",
        )
    print

User 0
[0] - [13.575464248657227]: Rockman.EXE Stream, [1] - [13.480500221252441]: Kumori Nochi Hare, [2] - [12.464479446411133]: Hwanggeum Cheolin, [3] - [11.939308166503906]: Himawari no You ni, [4] - [11.854948997497559]: Aniyome, User 1
[0] - [13.505234718322754]: Nyamen: Tenkai Daiichi Joshi Koukou Bunka Matsuri Tokubetsu Eizou, [1] - [12.39642333984375]: Ashita no Joe 2 (Movie), [2] - [12.390143394470215]: Angelium, [3] - [12.329965591430664]: Kizuoibito, [4] - [12.233551979064941]: Sanrio Anime Sekai Meisaku Gekijou, User 2
[0] - [11.632704734802246]: Ghost Messenger Movie, [1] - [10.786734580993652]: Tondera House no Daibouken, [2] - [10.354955673217773]: Urusei Yatsura Movie 6: Itsudatte My Darling, [3] - [10.296066284179688]: Cofun Gal no Coffy: Cofunderella, [4] - [10.26324462890625]: Soul Worker: Your Destiny Awaits, User 3
[0] - [14.320096015930176]: Kaijuu no Ballad, [1] - [13.771485328674316]: Fuyu no Yoru no Ohanashi, [2] - [13.52220630645752]: Sansha Sanyou, [3] - [13.

48669


In [9]:
print(len(datamodule.canonical_user_mapping[first].preserved_cais))

66


In [15]:
print(f"{np.isinf(scores[0]).sum()=}")
print(f"{len(scores[0])=}")

np.isinf(scores[0]).sum()=np.int64(12294)
len(scores[0])=12294
