In [9]:
import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine
import os 
import psycopg
from implicit.als import AlternatingLeastSquares 
import scipy
import numpy as np
import mlflow

# load .env contstants
load_dotenv()

# gloabal vars upload
os.environ["MLFLOW_S3_ENDPOINT_URL"] = "https://storage.yandexcloud.net" #endpoint –±–∞–∫–µ—Ç–∞ –æ—Ç YandexCloud
os.environ["AWS_ACCESS_KEY_ID"] = os.getenv("AWS_ACCESS_KEY_ID") # –ø–æ–ª—É—á–∞–µ–º id –∫–ª—é—á–∞ –±–∞–∫–µ—Ç–∞, –∫ –∫–æ—Ç–æ—Ä–æ–º—É –ø–æ–¥–∫–ª—é—á—ë–Ω MLFlow, –∏–∑ .env
os.environ["AWS_SECRET_ACCESS_KEY"] = os.getenv("AWS_SECRET_ACCESS_KEY") 

EXPERIMENT_NAME = "e_commerce"
TRACKING_SERVER_HOST = "127.0.0.1"
TRACKING_SERVER_PORT = 5000
REGISTRY_MODEL_NAME = "ALS_1STEP"
pip_requirements = '../config/requirements.txt'

# —É—Å—Ç–∞–Ω–∞–≤–ª–∏–≤–∞–µ–º host, –∫–æ—Ç–æ—Ä—ã–π –±—É–¥–µ—Ç –æ—Ç—Å–ª–µ–∂–∏–≤–∞—Ç—å –Ω–∞—à–∏ —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã
mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")
mlflow.set_registry_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")

# fix random seed for experiemnts reproduction
SEED = 42 
np.random.seed(seed=SEED)

connection = {"sslmode": "require", "target_session_attrs": "read-write"}
postgres_credentials = {
    "host": os.getenv("DB_DESTINATION_HOST"),
    "port": os.getenv("DB_DESTINATION_PORT"),
    "dbname": os.getenv("DB_DESTINATION_NAME"),
    "user": os.getenv("DB_DESTINATION_USER"),
    "password": os.getenv("DB_DESTINATION_PASSWORD"),
}

# Create a connection string
connection_string = (
    f"postgresql://{postgres_credentials['user']}:{postgres_credentials['password']}"
    f"@{postgres_credentials['host']}:{postgres_credentials['port']}/{postgres_credentials['dbname']}"
)

# Create a SQLAlchemy engine
engine = create_engine(connection_string)


def load(TABLE_NAME):
    connection.update(postgres_credentials)
    with psycopg.connect(**connection) as conn:
        with conn.cursor() as cur:
            cur.execute(f"SELECT * FROM {TABLE_NAME}")
            data = cur.fetchall()
            columns = [col[0] for col in cur.description]
    df = pd.DataFrame(data, columns=columns)

    return df

  from .autonotebook import tqdm as notebook_tqdm


–û–±—É—á–µ–Ω–∏–µ –±—É–¥–µ—Ç –ø–æ –∫–∞—Ç–µ–≥–æ—Ä–∏—è–º —Ç–æ–≤–∞—Ä–æ–≤,—Ç–∞–∫ –∫–∞–∫ –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏—è —Å –∞—Ç–µ–º–∞–º–∏ —Ä–∞–∑—Ä—è–∂–µ–Ω—ã —Å–∏–ª—å (EDA: <20% users interact >=2 )

In [10]:
from sklearn.preprocessing import LabelEncoder

events = load("events")
categories = load("item_categories")
events = events.merge(categories,how="left",on="itemid")

category_encoder = LabelEncoder()
user_encoder = LabelEncoder()
events["timestamp"] = pd.to_datetime(events['timestamp'], unit='ms')

user_encoder.fit(events["visitorid"])
category_encoder.fit(events['categoryid'])

events.loc[:, 'user_id_enc'] = user_encoder.transform(events['visitorid'])
events.loc[:, 'categoryid_enc'] = category_encoder.fit_transform(events['categoryid'])

–û—Å—Ç–∞–≤–ª—è–µ–º –ø–æ—Å–ª–µ–Ω–∏–∏ 4 –Ω–µ–¥–µ–ª–∏ –Ω–∞ —Ç–µ—Å—Ç

In [21]:
train_test_global_time_split_date = pd.to_datetime("2015-09-18")

train_test_global_time_split_idx = events["timestamp"] < train_test_global_time_split_date
events_train = events[train_test_global_time_split_idx]
events_test = events[~train_test_global_time_split_idx]

add_to_cart = events_train[events_train["event"]=="addtocart"]
view_but_no_cart = events_train[(events_train["event"]!="addtocart") & (events_train["event"]=="view")]

–°–æ—Ö—Ä–∞–Ω–∏–º —Å–∞–º—ã–º–µ –ø–æ–ø—É–ª—è—Ä–Ω—ã–µ –∏–∑ –¥–æ–±–∞–≤–ª–µ–Ω–Ω—ã—Ö –≤ –∫–æ—Ä–∑–∏–Ω—É

In [22]:
top_100_pop = events_train[events_train["event"]=="addtocart"].groupby("categoryid_enc").agg("count").sort_values("event",ascending=False).reset_index()[["categoryid_enc","event"]].loc[:99]
top_100_pop["rank"] = range(1,101)
top_100_pop.to_parquet("../models/production/top_popular.parquet")

–ú–æ–¥–µ–ª—å –æ–±—É—á–µ–Ω–∏—è

In [4]:
class ALSRecommender:
    def __init__(self, config, view_but_no_cart, add_to_cart, user_encoder, category_encoder, events_train, events):
        """
        Parameters:
        - config: –ì–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –º–æ–¥–µ–ª–∏ (K, FACTORS, ITERATIONS, REGULARIZATION)
        - view_but_no_cart: –î–∞—Ç–∞—Ñ—Ä–µ–π–º –≥–¥–µ —é–∑–µ—Ä—ã —Å–º–æ—Ç—Ä–µ–ª–∏ –Ω–æ –Ω–µ –¥–æ–±–∞–≤–∏–ª–∏
        - add_to_cart: –î–∞—Ç–∞—Ñ—Ä—ç–π–º –≥–¥–µ —é–∑–µ—Ä—ã –¥–æ–±–∞–≤–∏–ª–∏ –≤ –∫–æ—Ä–∑–∏–Ω—É —Ç–æ–≤–∞—Ä—ã
        - user_encoder: –≠–Ω–∫–æ–¥–µ—Ä –¥–ª—è —é–∑–µ—Ä–æ–≤
        - category_encoder: –≠–Ω–∫–æ–¥–µ—Ä –∫–∞—Ç–µ–≥–æ—Ä–∏–π 
        - events_train: –û–±—É—á–∞–µ–º–∞—è –≤—ã–±–æ—Ä–∫–∞
        - events: –í—Å–µ –µ–≤–µ–Ω—Ç—ã —Ç–∞–º –µ—Å—Ç—å –≥–¥–µ –±—ã–ª–∏ –ø–æ–∫—É–ø–∫–∏
        """
        self.config = config
        self.view_but_no_cart = view_but_no_cart
        self.add_to_cart = add_to_cart
        self.user_encoder = user_encoder
        self.category_encoder = category_encoder
        self.events_train = events_train
        self.events = events
        self.als_model = None

    def build_interaction_matrix(self):
        """–°–æ–∑–¥–∞–Ω–∏–µ –º–∞—Ç—Ä–∏—Ü—ã –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–∏—è –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π —Å –∫–∞—Ç–µ–≥–æ—Ä–∏—è–º–∏ —Ç–æ–≤–∞—Ä–æ–≤."""
        # –ü—Ä–∏—Å–≤–æ–µ–Ω–∏–µ –≤–µ—Å–æ–≤: K –¥–æ–±–∞–≤–∏–ª –≤ –∫–æ—Ä–∑–∏–Ω—É, 1 —Å–º–æ—Ç—Ä–µ–ª —Ç–æ–ª—å–∫–æ, 0 –∏–Ω–∞—á–µ
        # –¢–æ–≤–∞—Ä—ã –∫–æ—Ç–æ—Ä—ã–µ –∫—É–ø–∏–ª–∏ –±—ã–ª–∏ –≤ –∫–æ—Ä–∑–∏–Ω–µ –∏ –∏–º –Ω–æ–ª—å –Ω–µ –±—É–¥–µ—Ç –ø—Ä–∏—Å—É—â–µ–Ω
        scores = list(np.ones(self.view_but_no_cart.shape[0])) + list(np.ones(self.add_to_cart.shape[0]) * self.config["K"])

        idx_users = list(self.view_but_no_cart["user_id_enc"].values.astype(int)) + list(self.add_to_cart["user_id_enc"].values.astype(int))
        idx_items = list(self.view_but_no_cart["categoryid_enc"].values.astype(int)) + list(self.add_to_cart["categoryid_enc"].values.astype(int))
        
        user_item_matrix_train = scipy.sparse.csr_matrix((scores, (idx_users, idx_items)), dtype=np.int8)
        
        return user_item_matrix_train

    def train_als_model(self):
        """–û–±—É—á–µ–Ω–∏–µ."""
        user_item_matrix_train = self.build_interaction_matrix()

        # Train ALS model
        self.als_model = AlternatingLeastSquares(
            factors=self.config["FACTORS"], 
            iterations=self.config["ITERATIONS"], 
            regularization=self.config["REGULARIZATION"], 
            random_state=0
        )
        self.als_model.fit(user_item_matrix_train)


    def sim_item_chunk(self, chunk_idx, max_similar_items=10):
        """–ù–∞—Ö–æ–¥–∏–º –ø–æ—Ö–æ–∂–∏–µ –∫–∞—Ç–µ–≥–æ—Ä–∏–π –¥–ª—è –ª–∏—Å—Ç–∞ –∫–∞—Ç–µ–≥–æ—Ä–∏–π."""
        similar_items = self.als_model.similar_items(chunk_idx, N=max_similar_items+1)
        
        # Convert to DataFrame
        sim_item_item_ids_enc = similar_items[0]
        sim_item_scores = similar_items[1]
        similar_items_df = pd.DataFrame({
            "item_id_enc": chunk_idx,
            "sim_item_id_enc": sim_item_item_ids_enc.tolist(), 
            "score": sim_item_scores.tolist()
        })
        
        similar_items_df = similar_items_df.explode(["sim_item_id_enc", "score"], ignore_index=True)

        return similar_items_df

    def get_similar_categories(self, chunk_size=10000, max_similar_items=10):
        """–Ω–∞—Ö–æ–¥–∏–º –æ–±—â–∏–µ –∞–π—Ç–µ–º—ã –¥–ª—è –≤—Å–µ—Ö –∫–∞—Ç–µ–≥–æ—Ä–∏–π."""
        unique_categories_train = self.events_train['categoryid_enc'].unique()
        num_rows = len(unique_categories_train)
        chunks = []
        
        for start in range(0, num_rows, chunk_size):
            end = min(start + chunk_size, num_rows)
            chunk_idx = unique_categories_train[start:end]
            chunk = self.sim_item_chunk(chunk_idx, max_similar_items=max_similar_items)
            chunks.append(chunk)
        
        # —Å–æ–±–∏—Ä–∞–µ–º –≤—Å–µ –≤–º–µ—Å—Ç–µ
        similar_categories = pd.concat(chunks, axis=0)
        
        return similar_categories

    def recommend_items(self, N=30):
        """Generate ALS recommendations for all users."""
        if self.als_model is None:
            raise ValueError("ALS model has not been trained yet. Call `train_als_model` first.")
        
        user_ids_encoded = range(self.events_train['user_id_enc'].max() + 1)
        user_item_matrix_train = self.build_interaction_matrix()
        
        # Generate ALS recommendations
        als_recommendations = self.als_model.recommend(
            user_ids_encoded, 
            user_item_matrix_train[user_ids_encoded], 
            filter_already_liked_items=False, N=N # –º–∞–ª–∞—è —Ö–∏—Ç—Ä–æ—Å—Ç—å —Å—Ç–∞–≤–∏–º False —á—Ç–æ–±—ã –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ –∫–æ—Ç–æ—Ä—ã–µ —Å–º–æ—Ç—Ä–µ–ª–∏
        )                                         # –∞–ª—Å –º–æ–¥–µ–ª—å –Ω–µ –≤—ã–∫–∏–Ω—É–ª–∞, –∞ —Ç–æ —á—Ç–æ –∫—É–ø–∏–ª–∏ –æ—Ç—Ñ–∏–ª—å—Ç—Ä—É–µ—Ç—Å—è –Ω–∏–∂–µ

        item_ids_enc = als_recommendations[0]
        als_scores = als_recommendations[1]

        # Format recommendations into a DataFrame
        als_recommendations_df = pd.DataFrame({
            "user_id_enc": user_ids_encoded,
            "categoryid_enc": item_ids_enc.tolist(), 
            "score": als_scores.tolist()
        })

        chunk_size = 10000
        num_rows = len(als_recommendations_df)
        chunks = []
        count=0
        for start in range(0, num_rows, chunk_size):
            count+=1
            end = min(start + chunk_size, num_rows)
            chunk = als_recommendations_df.iloc[start:end]
            exploded = chunk.explode(['categoryid_enc','score'],ignore_index=True)
            chunks.append(exploded)

        als_predictions = pd.concat(chunks,axis=0)

        return als_predictions

    def filter_already_bought(self, als_recommendations): # –Ω–∞ –±—É–¥—É—â–∏–µ –Ω—É–∂–Ω–æ –µ—â–µ —É–±—Ä–∞—Ç—å –∏–∑ –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–∏–π —Ç–æ–≤–∞—Ä—ã –∫–æ—Ç–æ—Ä—ã–µ –Ω–µ –¥–æ—Å—Ç—É–ø–Ω—ã (not available)
        """–£–±–∏—Ä–∞–µ–º –∏–∑ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–π –∫–∞—Ç–µ–≥–æ—Ä–∏–∏ –∫–æ—Ç–æ—Ä—ã–µ —é–∑–µ—Ä—ã —É–∂–µ –ø–æ–∫—É–ø–∞–ª–∏."""
        already_bought = self.events_train[self.events_train["event"] == "transaction"][["categoryid", "visitorid"]]

        # –∫–æ–¥–∏—Ä–æ–≤–∫–∞ —é–∑–µ—Ä–æ–≤ –∏ –∫–∞—Ç–µ–≥–æ—Ä–∏–π —Ç–æ–≤–∞—Ä–æ–≤ –∫–æ—Ç–æ—Ä—ã–µ —É–∂–µ –∫—É–ø–∏–ª–∏
        already_bought["user_id_enc"] = self.user_encoder.transform(already_bought["visitorid"])
        already_bought["categoryid_enc"] = self.category_encoder.transform(already_bought["categoryid"])

        already_bought = already_bought.drop(columns=["visitorid", "categoryid"])

        # Filter out already bought categories
        filtered_recommendations = als_recommendations.merge(already_bought, on=['user_id_enc', 'categoryid_enc'], how='left', indicator=True)
        filtered_recommendations = filtered_recommendations[filtered_recommendations['_merge'] == 'left_only'].drop(columns=['_merge'])
        
        # Rank the filtered recommendations by score
        filtered_recommendations["rank"] = filtered_recommendations.groupby("user_id_enc")["score"].rank(method="first", ascending=False)
        
        return filtered_recommendations

    def get_filtered_recommendations(self):
        """Train ALS, generate recommendations, and filter out already bought items."""
        self.train_als_model()
        als_recommendations = self.recommend_items()
        return self.filter_already_bought(als_recommendations)


–ú–µ—Ç—Ä–∏–∫–∏

In [5]:
def process_events_recs_for_binary_metrics(events_train, events_test, recs, top_k=None):
    "—Ñ—É–Ω–∫—Ü–∏—è —Å—á–∏—Ç–∞–µ—Ç –±–∏–Ω–∞—Ä–Ω—ã–µ —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏ –∫–æ—Ç–æ—Ä—ã–µ –Ω–∞—Ö–æ–¥—è—Ç —Å–ª—É—à–∞–ª –ª–∏ —á–µ–ª–æ–≤–µ–∫ —Ç—Ä–µ–∫ –∏–∑ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–π"

    "–ø–æ—Å—Ç–∞–≤–∏–º —Ñ–ª–∞–≥ —Ç—Ä—É –≤ —Ç–µ—Å—Ç–µ –¥–ª—è –≤—Å–µ—Ö –µ–≤–µ–Ω—Ç–æ–≤ –≤ —Ç–µ—Å—Ç–µ –∑–∞—Ñ–∏–∫—Å–∏—Ä—É—è —Ñ–∞–∫—Ç —á—Ç–æ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å –≤–∑–∞–∏–º–æ–¥–µ–π—Å—Ç–≤–æ–≤–∞–ª —Å —Ç—Ä–µ–∫–æ–º"
    events_test["gt"] = True
    "–≤—ã–±—Ä–µ–º –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –∫–æ—Ç–æ—Ä—ã–µ –ø—Ä–∏—Å—É—Ç—Å—Ç–≤—É—é—Ç –∏ –≤ –æ–±—É—á–µ–Ω–∏–∏ –∏ —Ç–µ—Å—Ç–µ"
    common_users = set(events_test["user_id_enc"]) & set(recs["user_id_enc"])
    
    "–≤–æ–∑—å–º–µ–º –∏–∑ —Ç–µ—Å—Ç–∞ –µ–≤–µ–Ω—Ç—ã –≥–¥–µ –ø—Ä–∏—Å—É—Ç—Å—Ç–≤—É—é—Ç –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–∏ –∏–∑ –æ–±—É—á–∞—é—â–µ–π –≤—ã–±–æ—Ä–∫–∏"
    events_for_common_users = events_test[events_test["user_id_enc"].isin(common_users)].copy()
    "–≤–æ–∑—å–º–µ–º —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏ –ø–æ–ª—É—á–µ–Ω–Ω—ã–µ –¥–ª—è –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –∫–æ—Ç–æ—Ä—ã–µ –ø—Ä–∏—Å—É—Ç—Å—Ç–≤—É—é—Ç –≤ —Ç–µ—Å—Ç–æ–≤–æ–π –≤—ã–±–æ—Ä–∫–µ"
    recs_for_common_users = recs[recs["user_id_enc"].isin(common_users)].copy()

    "–æ—Å—Ç–∞–≤–∏–º —ç–≤–µ–Ω—Ç—ã –∏–∑ —Ç–µ—Å—Ç–∞ –≥–¥–µ –µ—Å—Ç—å –ø–µ—Å–Ω–∏ –∏–∑ –æ–±—É—á–∞—é—â–µ–π –≤—ã–±–æ—Ä–∫–∏. –º–æ–¥–µ–ª—å –Ω–µ –º–æ–∂–µ—Ç –≤—ã–¥–∞—Ç—å —Ç—Ä–µ–∫ –∫–æ—Ç–æ—Ä—ã–π –Ω–µ –≤–∏–¥–µ–ª–∞ –≤ –æ–±—É—á–µ–Ω–∏–∏"
    events_for_common_users = events_for_common_users[events_for_common_users["categoryid_enc"].isin(events_train["categoryid_enc"].unique())]
    
    "–≤–æ–∑—å–º–µ–º –ª—É—á—à–∏–µ top_k —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–π –∏–∑ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–π"
    if top_k is not None:
        recs_for_common_users = recs_for_common_users[recs_for_common_users["rank"]<=top_k]

    events_recs_common = events_for_common_users[["user_id_enc", "categoryid_enc", "gt"]].merge(
        recs_for_common_users[["user_id_enc", "categoryid_enc", "rank"]], 
        on=["user_id_enc", "categoryid_enc"], how="outer") 
    
    events_recs_common["gt"] = events_recs_common["gt"].fillna(False)
    events_recs_common["pr"] = ~events_recs_common["rank"].isnull()

    "TP - —Å–∫–æ–ª—å–∫–æ –ø–µ—Å–µ–Ω –±—ã–ª–æ –æ–±—â–∏—Ö –≤ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏—è—Ö –∏ –ø–æ —Ñ–∞–∫—Ç—É –ø—Ä–æ—Å–ª—É—à–∫–∏"
    "FP - —Å–∫–æ–ª—å–∫–æ –ø–µ—Å–µ–Ω —Ä–µ–∫–æ–º–µ–Ω–¥–æ–≤–∞–ª–∏ –∫–æ—Ç–æ—Ä—ã—Ö —á–µ–ª–æ–≤–µ–∫ –Ω–µ —Å–ª—É—à–∞–ª"
    "FN - —Å–∫–æ–ª—å–∫–æ –ø–µ—Å–µ–Ω –±—ã–ª–æ –ø—Ä–æ—Å–ª—É—à–µ–Ω–Ω–æ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–º, –Ω–æ –∏—Ö –Ω–µ –±—ã–ª–æ –≤ —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏—è—Ö"
    
    events_recs_common["tp"] = events_recs_common["gt"] & events_recs_common["pr"]
    events_recs_common["fp"] = ~events_recs_common["gt"] & events_recs_common["pr"]
    events_recs_common["fn"] = events_recs_common["gt"] & ~events_recs_common["pr"]

    return events_recs_common



def compute_cls_metrics(events_recs_for_binary_metric):
    "–ø–æ–¥—Å—á–µ—Ç precision recall"
    
    groupper = events_recs_for_binary_metric.groupby("user_id_enc")

    # precision = tp / (tp + fp)
    precision = groupper["tp"].sum()/(groupper["tp"].sum()+groupper["fp"].sum())
    precision = precision.fillna(0).mean()
    
    # recall = tp / (tp + fn)
    recall = groupper["tp"].sum()/(groupper["tp"].sum()+groupper["fn"].sum())
    recall = recall.fillna(0).mean()

    return precision, recall

–≠–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ã

In [6]:
config1 = {"K": 5, "FACTORS": 50, "REGULARIZATION": 0.05, "ITERATIONS": 30}
config2 = {"K": 5, "FACTORS": 30, "REGULARIZATION": 0.1, "ITERATIONS": 20}
config3 = {"K": 3, "FACTORS": 70, "REGULARIZATION": 0.05, "ITERATIONS": 50}
config4 = {"K": 7, "FACTORS": 50, "REGULARIZATION": 0.01, "ITERATIONS": 50}
config5 =  {"K": 7, "FACTORS": 50, "REGULARIZATION": 0.01, "ITERATIONS": 70}

–õ–æ–≥–∏—Ä–æ–≤–∞–Ω–∏–µ —Ä–µ–∑—É–ª—å—Ç–∞—Ç–æ–≤

In [7]:
import mlflow
import joblib

RUN_NAME = 'ALS_5_STEP_MODEL'
config = config5

# Check if the experiment exists
experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)

# If not, create a new one
if experiment is None:
    experiment_id = mlflow.create_experiment(EXPERIMENT_NAME)
else:
    experiment_id = experiment.experiment_id

als_recommender = ALSRecommender(
    config=config, 
    view_but_no_cart=view_but_no_cart, 
    add_to_cart=add_to_cart, 
    user_encoder=user_encoder, 
    category_encoder=category_encoder, 
    events_train=events_train, 
    events=events
)

als_recommender.train_als_model()

filtered_recommendations = als_recommender.get_filtered_recommendations()
similar_categories = als_recommender.get_similar_categories(chunk_size=1000, max_similar_items=10)

filtered_recommendations.to_parquet("../models/staging/offline_5.parquet")
similar_categories.to_parquet("../models/staging/online_5.parquet")

precision_at5, recall_at5 = compute_cls_metrics(
    process_events_recs_for_binary_metrics(events_train, events_test, filtered_recommendations, 5)
)

with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    mlflow.log_params(config)

    model_path = "../models/als_model5.joblib"
    joblib.dump(als_recommender.als_model, model_path)
    mlflow.log_artifact(model_path, artifact_path="model")  
    mlflow.log_metric("precision_at5", precision_at5)
    mlflow.log_metric("recall_at5", recall_at5)

    mlflow.log_artifact("../models/staging/offline_5.parquet")
    mlflow.log_artifact("../models/staging/online_5.parquet")

  check_blas_config()
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [06:12<00:00,  5.33s/it]
100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 70/70 [06:05<00:00,  5.22s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  events_test["gt"] = True
  events_recs_common["gt"] = events_recs_common["gt"].fillna(False)
2024/10/26 16:58:04 INFO mlflow.tracking._tracking_service.client: üèÉ View run ALS_5_STEP_MODEL at: http://127.0.0.1:5000/#/experiments/15/runs/582f51904ca749a5b469029610b16030.
2024/10/26 16:58:04 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/15.


–†–µ–∑—É–ª—å—Ç–∞—Ç—ã

![Parameter Grid Result](/home/mle-user/mle_projects/mle-pr-final/mlflow_server/assets/param_grid_result.png)
