# Test for the Updated Online Pipeline
This notebook re-implements the tests from scratch for the updated `online_pipeline.py`.

In [1]:
import random
import sys
import os
from pathlib import Path
sys.path.append('D:\\Internship\\recsys\\data_pipelines')
sys.path.append('D:\\Internship\\recsys\\data_pipelines\\dags')
sys.path.append('D:\\Internship\\recsys\\movie_recommendation_system\\src')
sys.path.append('D:\\Internship\\recsys\\db_handlers')
sys.path.append('D:\\Internship\\recsys\\data')

from db_handlers.user_postgres_sql_db_handler import store_new_user
from db_handlers.user_mongodb_nosql_db_handler import store_user_movie_rating
from movie_recommender.data.tabular_dataset_handler import TabularDatasetHandler
from online_pipeline import (
    load_last_user_ratings,
    load_user_online_gnn_model,
    online_train_gnn_model,
    deploy_model,
    pre_compute_user_movie_ratings,
    deploy_pre_computed_user_movie_ratings,
    cleanup_temp_files,
)

OSError while attempting to symlink the latest log directory


[LOG] Detected local environment, with Docker: [False]
[LOG] Detected local environment, with Docker: [False]
[INFO] Running locally
[LOG] Detected local environment, with Docker: [False]
[LOG] Detected local environment, with Docker: [False]


## Simulate new ratings generation for a test user

In [2]:
import uuid

# Create a new test user
test_user_id = "bdd62e39-8999-468b-be8a-c36277a93bdc" #str(uuid.uuid4())
user_name = "Christian Francesco"
user_surname = "Russo"
user_email = "rus.chr.20499@gmail.com"

# Store the new user in the PostgreSQL database
store_new_user(test_user_id, user_name, user_surname, user_email) 

[LOG] User bdd62e39-8999-468b-be8a-c36277a93bdc already has an associated model ID 1000. Skipping insertion.
[LOG] New user bdd62e39-8999-468b-be8a-c36277a93bdc stored successfully in 'user_register' table


In [3]:
# Load movies DataFrame
tdh_filepath = "D:\\Internship\\recsys\\data\\movielens_processed\\tdh_instance.pkl"
tdh = TabularDatasetHandler.load_class_instance(tdh_filepath)
movies_df = tdh.get_movies_df_deepcopy()
test_user_id = "bdd62e39-8999-468b-be8a-c36277a93bdc"

# Sample 5 movies and store synthetic new ratings
sampled_movies = movies_df.sample(5)
for _, row in sampled_movies.iterrows():
    rating_value = round(random.uniform(1.0, 5.0), 1)
    print(f"Movie '{row['title']}' => rating {rating_value}")
    store_user_movie_rating(
        user_id=test_user_id,
        movie_id=int(row['id']),
        rating=rating_value
    )

Movie 'We Are from the Future 2' => rating 3.2
[LOG] New user rating stored successfully in 'user_movie_ratings' collection.
Movie 'Gangster Squad' => rating 1.1
[LOG] New user rating stored successfully in 'user_movie_ratings' collection.
Movie 'Mickey's House of Villains' => rating 3.3
[LOG] New user rating stored successfully in 'user_movie_ratings' collection.
Movie 'Wonderful Days' => rating 4.9
[LOG] New user rating stored successfully in 'user_movie_ratings' collection.
Movie 'Hacksaw Ridge' => rating 2.8
[LOG] New user rating stored successfully in 'user_movie_ratings' collection.


## Define Test Parameters

In [4]:
test_user_id = "bdd62e39-8999-468b-be8a-c36277a93bdc"
test_num_new_ratings = 5
print(f"Testing user {test_user_id} with {test_num_new_ratings} new ratings.")

Testing user bdd62e39-8999-468b-be8a-c36277a93bdc with 5 new ratings.


## Test: Load Last User Ratings

In [5]:
last_user_ratings_filepath = load_last_user_ratings.__wrapped__(
    user_id=test_user_id,
    num_new_ratings=test_num_new_ratings
)
print("Loaded ratings for", test_user_id, "=>", last_user_ratings_filepath)

[LOG] Loading the last 5 user-movie ratings for user bdd62e39-8999-468b-be8a-c36277a93bdc
[LOG] Saved ratings for user bdd62e39-8999-468b-be8a-c36277a93bdc to D:\Internship\recsys\data\temp\online\user_bdd62e39-8999-468b-be8a-c36277a93bdc_ratings.csv
Loaded ratings for bdd62e39-8999-468b-be8a-c36277a93bdc => D:\Internship\recsys\data\temp\online\user_bdd62e39-8999-468b-be8a-c36277a93bdc_ratings.csv


## Test: Load Online GNN Model

In [6]:
old_model_filepath = load_user_online_gnn_model.__wrapped__(
    user_id=test_user_id
)
print("Loaded model for", test_user_id, "=>", old_model_filepath)

[LOG] Loading user bdd62e39-8999-468b-be8a-c36277a93bdc online GNN model
[LOG] Attempting to download model 'old_GNN_model_user_bdd62e39-8999-468b-be8a-c36277a93bdc' for user bdd62e39-8999-468b-be8a-c36277a93bdc to D:\Internship\recsys\data\temp\online
[LOG] Online user model for user 'bdd62e39-8999-468b-be8a-c36277a93bdc' downloaded as 'old_GNN_model_user_bdd62e39-8999-468b-be8a-c36277a93bdc.pth'.
[LOG] Successfully downloaded existing model for user bdd62e39-8999-468b-be8a-c36277a93bdc
Loaded model for bdd62e39-8999-468b-be8a-c36277a93bdc => D:\Internship\recsys\data\temp\online\old_GNN_model_user_bdd62e39-8999-468b-be8a-c36277a93bdc.pth


## Test: Online Train GNN Model

In [7]:
new_model_filepath = online_train_gnn_model.__wrapped__(
    user_id=test_user_id,
    new_user_ratings_df_filepath=last_user_ratings_filepath,
    old_user_online_model_filepath=old_model_filepath
)
print("Trained new model for", test_user_id, "=>", new_model_filepath)

[LOG] Online training GNN model for user bdd62e39-8999-468b-be8a-c36277a93bdc, using ratings file: D:\Internship\recsys\data\temp\online\user_bdd62e39-8999-468b-be8a-c36277a93bdc_ratings.csv and base model file: D:\Internship\recsys\data\temp\online\old_GNN_model_user_bdd62e39-8999-468b-be8a-c36277a93bdc.pth
[LOG] Running script: D:\Internship\recsys\data_pipelines\scripts\online\train_online_user_gnn_script.py with args: ['--user_id', 'bdd62e39-8999-468b-be8a-c36277a93bdc', '--new_user_ratings_df_filepath', 'D:\\Internship\\recsys\\data\\temp\\online\\user_bdd62e39-8999-468b-be8a-c36277a93bdc_ratings.csv', '--old_user_online_model_filepath', 'D:\\Internship\\recsys\\data\\temp\\online\\old_GNN_model_user_bdd62e39-8999-468b-be8a-c36277a93bdc.pth', '--new_user_online_model_name', 'online_updated_GNN_user_bdd62e39-8999-468b-be8a-c36277a93bdc']
[INFO] Running CUDA script [train_online_user_gnn_script.py] as a subprocess
[SUCCESS] Script executed successfully as subprocess: D:\Internship\r

## Test: Deploy Model

In [8]:
deploy_model.__wrapped__(
    user_id=test_user_id,
    new_user_online_model_filepath=new_model_filepath
)
print("Deployed new model for", test_user_id)

[LOG] Deploying the updated GNN model for user bdd62e39-8999-468b-be8a-c36277a93bdc from D:\Internship\recsys\data\temp\online\online_updated_GNN_user_bdd62e39-8999-468b-be8a-c36277a93bdc.pth
[LOG] Online user model for user 'bdd62e39-8999-468b-be8a-c36277a93bdc' uploaded.
[LOG] Successfully deployed model for user bdd62e39-8999-468b-be8a-c36277a93bdc
Deployed new model for bdd62e39-8999-468b-be8a-c36277a93bdc


## Test: Pre-compute Ratings

In [9]:
precomputed_filepath = pre_compute_user_movie_ratings.__wrapped__(
    user_id=test_user_id,
    new_user_online_model_filepath=new_model_filepath
)
print("Pre-computed ratings for", test_user_id, "=>", precomputed_filepath)

[LOG] Pre-computing user-movie ratings for user bdd62e39-8999-468b-be8a-c36277a93bdc using model D:\Internship\recsys\data\temp\online\online_updated_GNN_user_bdd62e39-8999-468b-be8a-c36277a93bdc.pth
[LOG] Running script: D:\Internship\recsys\data_pipelines\scripts\online\pre_compute_user_movie_ratings_script.py with args: ['--user_id', 'bdd62e39-8999-468b-be8a-c36277a93bdc', '--model_id', '1000', '--new_user_online_model_filepath', 'D:\\Internship\\recsys\\data\\temp\\online\\online_updated_GNN_user_bdd62e39-8999-468b-be8a-c36277a93bdc.pth', '--precomputed_ratings_filepath', 'D:\\Internship\\recsys\\data\\temp\\online\\precomputed_ratings_user_bdd62e39-8999-468b-be8a-c36277a93bdc.csv']
[INFO] Running CUDA script [pre_compute_user_movie_ratings_script.py] as a subprocess
[SUCCESS] Script executed successfully as subprocess: D:\Internship\recsys\data_pipelines\scripts\online\pre_compute_user_movie_ratings_script.py
[INFO] Running locally

[LOG] Finished pre-computing ratings for user bd

## Test: Deploy Pre-computed Ratings

In [10]:
deploy_pre_computed_user_movie_ratings.__wrapped__(
    user_id=test_user_id,
    precomputed_ratings_filepath=precomputed_filepath,
)
print("Deployed pre-computed ratings for", test_user_id)

[LOG] Deploying pre-computed ratings for user bdd62e39-8999-468b-be8a-c36277a93bdc from D:\Internship\recsys\data\temp\online\precomputed_ratings_user_bdd62e39-8999-468b-be8a-c36277a93bdc.csv
[LOG] Storing 45433 predictions for user bdd62e39-8999-468b-be8a-c36277a93bdc.
[LOG] Stored 45433 predictions successfully.
[LOG] Successfully deployed pre-computed ratings for user bdd62e39-8999-468b-be8a-c36277a93bdc
Deployed pre-computed ratings for bdd62e39-8999-468b-be8a-c36277a93bdc


## Test: Cleanup Temporary Files

In [12]:
cleanup_temp_files = cleanup_temp_files.__wrapped__(
    new_user_ratings_df_filepath=last_user_ratings_filepath,
    old_user_online_model_filepath=old_model_filepath,
    new_user_online_model_filepath=new_model_filepath,
    precomputed_ratings_filepath=precomputed_filepath
)
print("Cleaned up temporary files for", test_user_id)

[LOG] Cleaning up temporary files.
[LOG] Removed temporary ratings file: D:\Internship\recsys\data\temp\online\user_bdd62e39-8999-468b-be8a-c36277a93bdc_ratings.csv
[LOG] Removed temporary old model file: D:\Internship\recsys\data\temp\online\old_GNN_model_user_bdd62e39-8999-468b-be8a-c36277a93bdc.pth
[LOG] Removed temporary model file: D:\Internship\recsys\data\temp\online\online_updated_GNN_user_bdd62e39-8999-468b-be8a-c36277a93bdc.pth
[LOG] Removed temporary pre-computed ratings file: D:\Internship\recsys\data\temp\online\precomputed_ratings_user_bdd62e39-8999-468b-be8a-c36277a93bdc.csv
Cleaned up temporary files for bdd62e39-8999-468b-be8a-c36277a93bdc
