## Notebook Settings

In [1]:
import os
import mlflow
import dotenv
import torch
import pandas as pd
import numpy as np
from torch.utils.data import DataLoader
from mlflow.tracking import MlflowClient
import subprocess
import random

In [2]:
os.chdir("./../")

In [3]:
dotenv.load_dotenv("./.env")

True

In [4]:
TRACKING_URL = "http://175.41.182.223:5050/"
EXPERIMENT_NAME = "bst-movielens1m-recommender-training"

In [5]:
model_name = "movielens1m-recommender-model"
stage = "Production"

In [6]:
mlflow.set_tracking_uri(TRACKING_URL)
mlflow.set_experiment(EXPERIMENT_NAME)

<Experiment: artifact_location='s3://s3-mlflow-artifacts-storage/mlflow/15', creation_time=1690551278624, experiment_id='15', last_update_time=1690551278624, lifecycle_stage='active', name='bst-movielens1m-recommender-training', tags={}>

## Get Registered Model

In [7]:
client = MlflowClient()
experiment = client.get_experiment_by_name(EXPERIMENT_NAME)

In [8]:
registered_models = client.get_registered_model(model_name)

In [9]:
production_model = [
    meta for meta in registered_models.latest_versions if meta.current_stage == 'Production']
if len(production_model) == 0:
    raise Exception(
        f"Moldel Name:{model_name} not production model registered")
else:
    production_model = production_model[0]

In [10]:
run_id = production_model.run_id
print(f"run_id:{run_id}")

run_id:6714af877d514c1a8123f44ca6cd3a77


In [11]:
# download artifacts
artifact_uri = client.get_run(run_id).info.artifact_uri
print(f"artifact_uri:{artifact_uri}")

artifact_uri:s3://s3-mlflow-artifacts-storage/mlflow/15/6714af877d514c1a8123f44ca6cd3a77/artifacts


In [12]:
output = subprocess.run(["aws", "s3", "cp", artifact_uri,
                        "./mlflow_artifacts", '--recursive'], capture_output=True)

## Load Model From Artifacts 

In [13]:
from mlflow_artifacts.src.model import BSTRecommenderModel
from mlflow_artifacts.src.dataset import RatingDataset
from mlflow_artifacts.src import utils

In [14]:
config_dict = utils.open_json("./mlflow_artifacts/artifacts/config.json")
config = utils.Config(config_dict)
recommender = BSTRecommenderModel(config)
recommender.load_state_dict(torch.load(
    "./mlflow_artifacts/model/pytorch_model.pt"))

<All keys matched successfully>

In [15]:
test_data = pd.read_parquet("./mlflow_artifacts/artifacts/test_data.parquet")

In [16]:
test_data["genres_ids_sequence"] = test_data["genres_ids_sequence"].apply(
    lambda x: [array.tolist() for array in x]
)

In [17]:
test_dataset = RatingDataset(data=test_data)

In [18]:
test_loader = DataLoader(
    test_dataset, batch_size=config.batch_size, shuffle=True)

In [19]:
for inputs in test_loader:
    with torch.no_grad():
        probs = recommender(inputs)
        ratings = inputs["target_rating"].view(-1, 1)
        break

## Prepare Test Data 

In [20]:
inputs.keys()

dict_keys(['movie_sequence', 'genres_ids_sequence', 'sex', 'age_group_index', 'target_movie', 'target_rating'])

In [21]:
df_input = pd.DataFrame()

In [22]:
movie_id_map_dict = utils.open_object(
    "./mlflow_artifacts/artifacts/movie_id_map_dict.pkl")
movies_to_genres_dict = utils.open_object(
    "./mlflow_artifacts/artifacts/movies_to_genres_dict.pkl")
genres_map_dict = utils.open_object(
    "./mlflow_artifacts/artifacts/genres_map_dict.pkl")
age_group_id_map_dict = utils.open_object(
    "mlflow_artifacts/artifacts/age_group_id_map_dict.pkl")

### List Target Movies 

In [23]:
target_movies = list(movie_id_map_dict.values())
target_movies.remove(movie_id_map_dict["UNK"])
df_input["target_movie"] = target_movies

### User Movie View Sequence

In [24]:
sequence_length = config_dict['sequence_length']
random_length = np.random.randint(1, sequence_length-1)
movie_ids = random.sample(list(movie_id_map_dict.keys()), k=random_length)

In [25]:
movie_sequence_ids = [movie_id_map_dict[id_] for id_ in movie_ids]

In [26]:
df_input['movie_sequence'] = [movie_sequence_ids.copy()
                              for _ in range(len(df_input))]

In [27]:
df_input.head()

Unnamed: 0,target_movie,movie_sequence
0,1,"[937, 1, 2136, 1371]"
1,2,"[937, 1, 2136, 1371]"
2,3,"[937, 1, 2136, 1371]"
3,4,"[937, 1, 2136, 1371]"
4,5,"[937, 1, 2136, 1371]"


In [28]:
_ = df_input.apply(lambda x: x['movie_sequence'].append(
    x['target_movie']), axis=1)

In [29]:
df_input["movie_sequence"] = df_input["movie_sequence"].apply(
    lambda x: x + sequence_length * [movie_id_map_dict["UNK"]]
)

In [30]:
df_input["movie_sequence"] = df_input["movie_sequence"].apply(
    lambda x: x[:sequence_length]
)

In [31]:
df_input.head()

Unnamed: 0,target_movie,movie_sequence
0,1,"[937, 1, 2136, 1371, 1, 0]"
1,2,"[937, 1, 2136, 1371, 2, 0]"
2,3,"[937, 1, 2136, 1371, 3, 0]"
3,4,"[937, 1, 2136, 1371, 4, 0]"
4,5,"[937, 1, 2136, 1371, 5, 0]"


### Movie Genres

In [32]:
movies_to_genres_dict[movie_id_map_dict['UNK']] = [
    genres_map_dict['UNK']]*len(movies_to_genres_dict[1])
df_input['genres_ids_sequence'] = df_input['movie_sequence'].apply(
    lambda x: [movies_to_genres_dict[id_] for id_ in x])

### Sex

In [33]:
df_input['sex'] = random.randint(0, 2)

## Age Group

In [34]:
age_group = random.sample(list(age_group_id_map_dict.keys()), k=1)[0]

In [35]:
age_group_index = age_group_id_map_dict[age_group]

In [36]:
age_group_index

4

In [37]:
df_input['age_group_index'] = age_group_index

## Prepare Dataset

In [38]:
inference_dataset = RatingDataset(data=df_input)

In [39]:
inference_loader = DataLoader(
    inference_dataset, batch_size=config.batch_size, shuffle=False)

## Inference

In [40]:
probs_list = []
for inputs in inference_loader:
    with torch.no_grad():
        probs = recommender(inputs)
        probs_list.append(probs.cpu().numpy())

In [41]:
df_input['rating'] = np.concatenate(probs_list)[:, 0]

In [42]:
df_input = df_input.sort_values(by='rating', ascending=False)

In [43]:
reverse_movie_id_map_dict = {remap_id: id_ for id_,
                             remap_id in movie_id_map_dict.items()}

In [44]:
df_input["movie_id"] = df_input["target_movie"].map(reverse_movie_id_map_dict)

In [45]:
df_input

Unnamed: 0,target_movie,movie_sequence,genres_ids_sequence,sex,age_group_index,rating,movie_id
908,909,"[937, 1, 2136, 1371, 909, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.000000e+00,920
911,912,"[937, 1, 2136, 1371, 912, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.000000e+00,923
3020,3021,"[937, 1, 2136, 1371, 3021, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.000000e+00,3089
315,316,"[937, 1, 2136, 1371, 316, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.000000e+00,318
1205,1206,"[937, 1, 2136, 1371, 1206, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.000000e+00,1223
...,...,...,...,...,...,...,...
3870,3871,"[937, 1, 2136, 1371, 3871, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,2.140236e-18,3940
3868,3869,"[937, 1, 2136, 1371, 3869, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.917609e-19,3938
1909,1910,"[937, 1, 2136, 1371, 1910, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,2.821471e-22,1978
3869,3870,"[937, 1, 2136, 1371, 3870, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,5.821185e-25,3939


In [46]:
movies = pd.read_csv(
    "ml-1m/movies.dat",
    sep="::",
    names=["movie_id", "title", "genres"],
    engine="python",
    encoding="ISO-8859-1",
)

In [47]:
df_input.merge(movies)

Unnamed: 0,target_movie,movie_sequence,genres_ids_sequence,sex,age_group_index,rating,movie_id,title,genres
0,909,"[937, 1, 2136, 1371, 909, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.000000e+00,920,Gone with the Wind (1939),Drama|Romance|War
1,912,"[937, 1, 2136, 1371, 912, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.000000e+00,923,Citizen Kane (1941),Drama
2,3021,"[937, 1, 2136, 1371, 3021, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.000000e+00,3089,"Bicycle Thief, The (Ladri di biciclette) (1948)",Drama
3,316,"[937, 1, 2136, 1371, 316, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.000000e+00,318,"Shawshank Redemption, The (1994)",Drama
4,1206,"[937, 1, 2136, 1371, 1206, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.000000e+00,1223,"Grand Day Out, A (1992)",Animation|Comedy
...,...,...,...,...,...,...,...,...,...
3878,3871,"[937, 1, 2136, 1371, 3871, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,2.140236e-18,3940,"Slumber Party Massacre III, The (1990)",Horror
3879,3869,"[937, 1, 2136, 1371, 3869, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,1.917609e-19,3938,"Slumber Party Massacre, The (1982)",Horror
3880,1910,"[937, 1, 2136, 1371, 1910, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,2.821471e-22,1978,Friday the 13th Part V: A New Beginning (1985),Horror
3881,3870,"[937, 1, 2136, 1371, 3870, 0]","[[8, 0, 0, 0], [3, 4, 5, 0], [16, 0, 0, 0], [1...",2,4,5.821185e-25,3939,"Slumber Party Massacre II, The (1987)",Horror
