In [None]:
import os, sys
import numpy as np
import pandas as pd
import random
from scipy import stats as st
import itertools
import operator
import heapq as hq
import torch

from tqdm.notebook import trange
from tqdm import tqdm

In [None]:
# get currently working directory
base_dir = os.getcwd()

# load functions from other notebooks : helpers.ipynb contains a set of tools and functions used in this notebook
helpers_file = os.path.join(base_dir, 'helpers.ipynb').replace("\\", "/")
%run $helpers_file

In [None]:
for p in ['../spotlight_ext']:
    module_path = os.path.abspath(os.path.join(base_dir, p))
    if module_path not in sys.path:
        sys.path.append(module_path)

random_state = np.random.RandomState(2020)

In [None]:
# Load the pretrained models "lstm" (entire_model_1m_20interactions.pt) and "pooling" (pooling_model_1m_20interactions.pt) presents in the models folder

# implicit_model = load_model('implicit_factorization')
lstm_model = load_model(model_type='entire') # the code to create this model is in misc.ipynb (see section "train model")
pooling_model = load_model('pooling') # the code to create this model is in brute_force_rec_expl.ipynb (see section "train and save pooling model")

pretrained_models = {
    'lstm': lstm_model,
    'pooling': pooling_model,
}

In [None]:
# Get the dataset Movielens with the variant 1M. Then divide it into a training set and a testing set. It also limits the length of each sequence of elements in the 2 sets to 20.

from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset

# get dataset, more information here : https://grouplens.org/datasets/movielens/
dataset = get_movielens_dataset(variant='1M')
train, test = random_train_test_split(dataset, random_state=random_state)

max_sequence_length = 20 # Maximum sequence length. Subsequences shorter than this will be left-padded with zeros.
train = train.to_sequence(max_sequence_length=max_sequence_length)
test = test.to_sequence(max_sequence_length=max_sequence_length) # test is a SequenceInteractions object, here is the documentation on this object : https://maciejkula.github.io/spotlight/interactions.html

In [None]:
# Compute cosine similarity matrix for item embeddings using GPU
pooling_sims_matrix = gpu_embeddings_to_cosine_similarity_matrix(
    pooling_model._net.item_embeddings(
        torch.arange(0, dataset.num_items, dtype=torch.int64)
    )).detach().numpy()

# Compute item-item similarity matrix using Jaccard similarity
jaccard_sims_matrix = compute_sim_matrix(dataset, 'jaccard')

In [None]:
# def find_interesting_case(uid):
    