**Load libraries and config**

In [1]:
import sys
import os

sys.path.append(os.path.abspath('..'))

import numpy as np
import yaml
from src.data.data_loader import load_data, prepare_data, load_queries
from src.data.data_preparation import feature_selection

In [2]:
# read config
with open('../config/config.yml', 'r') as file:
    config=yaml.load(file, Loader= yaml.SafeLoader)
del file

**Load visitorid features**

In [3]:
# load and prepare data
data = load_data(data_paths= config['data_loader'])
data = prepare_data(data= data, config= config['data_preparation'])

In [4]:
# create features at visitorid level and go through feature selection process
data_features = load_queries(data_paths= config['features'], data= data)
features_visitor = data_features['visitor']
config['model']['features'] = feature_selection(dataframe= features_visitor)

In [None]:
# Step 1: Filter relevant rows and select required columns
ratings = (
    data['events']
    .loc[data['events']['visitorid'].isin(features_visitor.index)]
    [['timestamp', 'visitorid', 'event', 'itemid']]
)

# Step 2: Drop duplicates, keeping the most recent record per (visitorid, itemid, event)
ratings = (
    ratings.sort_values(by='timestamp', ascending=False)
    .drop_duplicates(subset=['visitorid', 'event', 'itemid'])
)

# Step 3: Add the 'rating' column
ratings['rating'] = np.where(
    ratings['event'] == 'view', 1,
    np.where(ratings['event'] == 'addtocart', 2, 3)
)

# Step 4: Aggregate to (visitorid, itemid), keeping the highest rating and corresponding timestamp
ratings = (
    ratings.sort_values(by=['visitorid', 'itemid', 'rating', 'timestamp'], ascending=[True, True, False, False])
    .groupby(['visitorid', 'itemid'], as_index=False)
    .first()  # Keeps the first row within each group, which has the highest rating and latest timestamp
)

# Final output
ratings = ratings[['visitorid', 'itemid', 'rating', 'timestamp']]

In [None]:
aux = ratings.groupby(by='visitorid').size().reset_index().rename(columns={0:'size'})
aux[aux['size']>1].shape