# Odporúčanie

In [1]:
%load_ext autoreload
%autoreload all

In [2]:
# dependencies
import pandas as pd
import wandb

In [3]:
#local modules
import sys
sys.path.append("..")

from _helpers import constants
from _helpers.verify_submission.verify_subm import main as verify_subm
from _helpers.score_submission.score_subm import main as score_subm

from models.model_random import ModelRandom
from models.model_nochange import ModelNoChange
from models.model_popular import ModelPopular
from models.model_log_reg import ModelLogisticRegression
from models.model_cheapest import ModelCheapest
from models.model_xgboost import ModelXGBoost
from models.model_mlp import ModelMLP
from models.model_neural import ModelNeural

ModuleNotFoundError: No module named 'mdoels'

Iitialize wandb - logging tool

In [None]:
wandb.login()

Load the correct data

In [None]:
train_file = constants.DROPPED_TRAIN
test_file = constants.DROPPED_TEST

if (constants.SUBSET is not None):
    train_file = constants.DROPPED_SUBSET(constants.SUBSET, 'train')
    test_file = constants.DROPPED_SUBSET(constants.SUBSET, 'test')

df_train = pd.read_parquet(train_file)
df_test = pd.read_parquet(test_file)

In [None]:
models = {
    'random': ModelRandom(),
    'nochange': ModelNoChange(),
    'popular': ModelPopular(),
    'log-reg': ModelLogisticRegression(),
    'cheapest': ModelCheapest(),
    'xgboost': ModelXGBoost(),
    'mlp': ModelMLP(),
    'neural': ModelNeural(),
}

Choose the correct model and setup parameters of the model

In [13]:
# Tinker with the parameters
run = 2
notes = 'With gridsearch'
params = {
    'model': 'neural',
    'subset': constants.SUBSET,
    'features': [
        "impressed_item_position",
        # "impressed_item_rating",
        # "user_impressed_item_interaction_count",
        "price",
        "price_above_impression_mean",
        "is_last_interacted",
    ]
}

wandb_run = wandb.init(entity='mcfreddie777', project="dp-recsys", name=f'model_{params["model"]}_run_{run}',
                       notes=notes)
wandb_run.config.update(params)

model = models[params['model']]
model.update(params)
model.fit(df_train)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Predikujeme odporúčania

In [14]:
df_recommendations = model.predict(df_test)



Verifikujeme predikcie

In [15]:
verify_subm(df_subm=df_recommendations, df_test=df_test)

Checking for required columns in the submission file...
> check passed
Checking for duplicate sessions in the submission file...
> check passed
Checking that all the required sessions are present in submission...
> check passed
All checks passed


In [16]:
df_recommendations.to_csv(constants.OUTPUT_DIR / f'submission_popular_{params["model"]}_{run}.csv', index=False)

Vypočítame si MRR na dátach

In [17]:
df_gt = pd.read_csv(constants.GROUND_TRUTH)
df_test = pd.read_parquet(test_file)
mrr, map3 = score_subm(df_subm=df_recommendations, df_gt=df_gt)
wandb_run.log({"mrr": mrr, "map3": map3})

Mean reciprocal rank:      0.161
Mean average precision @3: 0.0289


In [18]:
wandb_run.finish()


0,1
map3,▁
mrr,▁

0,1
map3,0.0289
mrr,0.161
