# Odporúčanie

In [19]:
%load_ext autoreload
%autoreload all

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
# dependencies
import pandas as pd
import wandb

In [21]:
#local modules
import sys
sys.path.append("..")

from _helpers import constants
from _helpers.verify_submission.verify_subm import main as verify_subm
from _helpers.score_submission.score_subm import main as score_subm

from models.model_random import ModelRandom
from models.model_nochange import ModelNoChange
from models.model_popular import ModelPopular
from models.model_log_reg import ModelLogisticRegression
from models.model_cheapest import ModelCheapest
from models.model_xgboost import ModelXGBoost
from models.model_mlp import ModelMLP

Iitialize wandb - logging tool

In [22]:
wandb.login()

True

Load the correct data

In [23]:
train_file = constants.DROPPED_TRAIN
test_file = constants.DROPPED_TEST

if (constants.SUBSET is not None):
    train_file = constants.DROPPED_SUBSET(constants.SUBSET, 'train')
    test_file = constants.DROPPED_SUBSET(constants.SUBSET, 'test')

df_train = pd.read_parquet(train_file)
df_test = pd.read_parquet(test_file)

In [24]:
models = {
    'random': ModelRandom(),
    'nochange': ModelNoChange(),
    'popular': ModelPopular(),
    'log-reg': ModelLogisticRegression(),
    'cheapest': ModelCheapest(),
    'xgboost': ModelXGBoost(),
    'mlp': ModelMLP(),
}

Choose the correct model and setup parameters of the model

In [37]:
# Tinker with the parameters
run = 4
notes = 'RandomizedSearchCV'
params = {
    'model': 'mlp',
    'subset': constants.SUBSET,
    'features': [
        "impressed_item_position",
        # "impressed_item_rating",
        # "user_impressed_item_interaction_count",
        "price",
        "price_above_impression_mean",
        "is_last_interacted",
    ]
}

wandb_run = wandb.init(entity='mcfreddie777', project="dp-recsys", name=f'model_{params["model"]}_run_{run}',
                       notes=notes)
wandb_run.config.update(params)

model = models[params['model']]
model.update(params)
model.fit(df_train)

Predikujeme odporúčania

In [38]:
df_recommendations = model.predict(df_test)

Verifikujeme predikcie

In [39]:
verify_subm(df_subm=df_recommendations, df_test=df_test)

Checking for required columns in the submission file...
> check passed
Checking for duplicate sessions in the submission file...
> check passed
Checking that all the required sessions are present in submission...
> check passed
All checks passed


In [40]:
df_recommendations.to_csv(constants.OUTPUT_DIR / f'submission_popular_{params["model"]}_{run}.csv', index=False)

Vypočítame si MRR na dátach

In [41]:
df_gt = pd.read_csv(constants.GROUND_TRUTH)
df_test = pd.read_parquet(test_file)
mrr, map3 = score_subm(df_subm=df_recommendations, df_gt=df_gt)
wandb_run.log({"mrr": mrr, "map3": map3})

Mean reciprocal rank:      0.2687
Mean average precision @3: 0.0955


In [42]:
wandb_run.finish()


0,1
map3,▁
mrr,▁

0,1
map3,0.0955
mrr,0.2687
