In [None]:
import polars as pl
import glob
import os
import json
from model import UserBasedCF, ItemBasedCF, MatrixFactorizationCF
from sklearn.model_selection import train_test_split

In [None]:
# Directory paths
parquet_dir = 'data/parquets'
json_path = 'data/game_details/SteamMostPlayed_game_details.json'
model_dir = 'data/model'
selection_file = 'data/selection.txt'  # Path to the selection file (if any)

In [None]:
# Load data
reviews = load_parquets(parquet_dir, selection_file)
game_details = load_json(json_path)

In [None]:
# Integrate game details with reviews
game_details = game_details.with_column(pl.col('appid').cast(pl.Utf8))
reviews = reviews.join(game_details, on='appid', how='inner')

# Split the data into train and test sets
train_data, test_data = train_test_split(reviews.to_pandas(), test_size=0.2, random_state=42)

In [None]:
# Train the models on the training data
user_based_model = UserBasedCF()
user_based_model.fit(train_data)
user_based_model.save(f'{model_dir}/user_based_cf_model.pkl')

In [None]:
item_based_model = ItemBasedCF()
item_based_model.fit(train_data)
item_based_model.save(f'{model_dir}/item_based_cf_model.pkl')

In [None]:
matrix_factorization_model = MatrixFactorizationCF()
matrix_factorization_model.fit(train_data)
matrix_factorization_model.save(f'{model_dir}/matrix_factorization_cf_model.pkl')