In [1]:
import os
import sys
import subprocess
from IPython.display import display, HTML
import pandas as pd

In [2]:
BASE_PATH = "external_repo"
RETRAIN_IF_EXISTS = False
RERUN_EVAL_IF_EXISTS = False

### Dataset Curation & Preprocessing

The repository already provides the preprocessed datasets with the exceptoin of portfolios and demographic attributes used to implement the insurance baseline models SVD and demographic, due to privacy protection.

In [3]:
purchase_events_train = pd.read_csv(os.path.join(BASE_PATH,"purchase_events_train.csv"))
purchase_events_test = pd.read_csv(os.path.join(BASE_PATH,"purchase_events_test.csv"))
sessions_train = pd.read_csv(os.path.join(BASE_PATH,"sessions_train.csv"))
sessions_test = pd.read_csv(os.path.join(BASE_PATH,"sessions_test.csv"))
filter_train = pd.read_csv(os.path.join(BASE_PATH,"filter_train.csv"))
filter_test = pd.read_csv(os.path.join(BASE_PATH,"filter_test.csv"))

In [4]:
print("============ PURCHASE EVENTS")
display(purchase_events_train.head(3))
print("\n============ SESSIONS")
display(sessions_train.head(3))
print("\n============ FILTER")
display(filter_train.head(3))



Unnamed: 0,event_id,valid,item_id
0,1,0,item_15
1,2,0,item_1
2,3,0,item_11





Unnamed: 0,event_id,valid,session_id,action_time,action_section,action_object,action_type
0,33257,0,67298,2020-01-20 18:44:18,personal_account,service_6,click
1,1001,0,1911,2018-10-22 02:34:32,personal_account,service_14,click
2,35229,0,73022,2020-02-14 10:16:45,information_2,item_13,click





Unnamed: 0,event_id,valid,item_id
0,1,0,item_1
1,1,0,item_3
2,1,0,item_6


### Model Training

In [5]:
def run_module(module: str):
    """Run a Python script in specified directory."""
    result = subprocess.run(
        [sys.executable, module],
        capture_output=True,
        text=True,
        cwd=BASE_PATH  # Set working directory for subprocess
    )
    if result.returncode != 0:
        print("Error output:", result.stderr)
    return result.stdout

In [6]:
# Random - No training required
# Popularity - No training required
# SVD - Not reproducable due to missing data
# Demographic - not reproducable due to missing data and missing file

In [7]:
# GRU4REC
if RETRAIN_IF_EXISTS or not os.path.exists(os.path.join(BASE_PATH, "model_GRU4REC.h5")):
    _ = run_module("GRU4REC.py")

In [8]:
# GRU4REC Concat
if RETRAIN_IF_EXISTS or not os.path.exists(os.path.join(BASE_PATH, "model_GRU4REC_concat.h5")):
    _ = run_module("GRU4REC_concat.py")

In [9]:
# SKNN_E - not reproducable due to missing file
# SKNN_EB - not reproducable due to missing file

In [10]:
# Cross Session Auto
if RETRAIN_IF_EXISTS or not os.path.exists(os.path.join(BASE_PATH, "model_auto.h5")):
    _ = run_module("cross_sessions_auto.py")

In [11]:
# Cross Session Concat
if RETRAIN_IF_EXISTS or not os.path.exists(os.path.join(BASE_PATH, "model_concat.h5")):
    _ = run_module("cross_sessions_concat.py")

In [12]:
# Cross Session Encode
if RETRAIN_IF_EXISTS or not os.path.exists(os.path.join(BASE_PATH, "model_encode.h5")):
    _ = run_module("cross_sessions_encode.py")

### Evaluation

In [28]:
def eval_exists(module: str):
    stat_sign_exists = os.path.exists(os.path.join(BASE_PATH, f"statistical_significans_{module}.csv"))
    var_thresholds_exists = os.path.exists(os.path.join(BASE_PATH, f"varying_thresholds_{module}.csv"))
    return stat_sign_exists and var_thresholds_exists

In [14]:
# Random
if RERUN_EVAL_IF_EXISTS or not eval_exists("random"):
    _ = run_module("random_evaluation.py")

In [15]:
# Popularity
if RERUN_EVAL_IF_EXISTS or not eval_exists("popular"):
    _ = run_module("popular_evaluation.py")

In [16]:
# SVD - not reproducable due to missing data
# Demographic - not reproducable due to missing data & file

In [17]:
# GRU4REC
if RERUN_EVAL_IF_EXISTS or not eval_exists("GRU4REC"):
    _ = run_module("GRU4REC_evaluation.py")

In [18]:
# GRU4REC
if RERUN_EVAL_IF_EXISTS or not eval_exists("GRU4REC_concat"):
    _ = run_module("GRU4REC_concat_evaluation.py")

In [19]:
# SKNN_E - not reproducable due to missing file
# SKNN_EB - not reproducable due to missing file

In [29]:
# Cross Session Auto
if RERUN_EVAL_IF_EXISTS or not eval_exists("auto"):
    _ = run_module("cross_sessions_auto.py")

In [30]:
# Cross Session Concat
if RERUN_EVAL_IF_EXISTS or not eval_exists("concat"):
    _ = run_module("cross_sessions_concat.py")

In [31]:
# Cross Session Encode
if RERUN_EVAL_IF_EXISTS or not eval_exists("encode"):
    _ = run_module("cross_sessions_encode.py")