# Odporúčanie

In [1]:
%load_ext autoreload
%autoreload all

In [2]:
# dependencies
import pandas as pd
import wandb
from dotenv import load_dotenv

from _helpers import constants
from _helpers import functions as hf
from _helpers.verify_submission.verify_subm import main as verify_subm
from _helpers.score_submission.score_subm import main as score_subm
from _helpers.drop import drop

from models.model_random import ModelRandom
from models.model_nochange import ModelNoChange
from models.model_popular import ModelPopular
from models.model_log_reg import ModelLogisticRegression

Spin up the project - load environment variables from .env file, initialize wandb

In [3]:
load_dotenv(override=True) # Load env variables from .env file
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmcfreddie777[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

Load the correct train data

In [4]:
# Check whether we want
subset = hf.get_env('SUBSET',None)
target_file = constants.DROPPED if (subset is None) else constants.DROPPED_SUBSET(subset)

if not target_file.exists():
    if (subset is None):
        df_train = pd.read_csv(constants.TRAIN)
        df_train = drop(df_train)
        df_train.to_parquet(constants.DROPPED, index=False)
    else:
        raise FileNotFoundError(constants.DROPPED_SUBSET(subset))
else:
    df_train = pd.read_parquet(target_file)

In [5]:
models = {
    'random': ModelRandom(),
    'nochange': ModelNoChange(),
    'popular': ModelPopular(),
    'log-reg': ModelLogisticRegression(),
}

Choose the correct model and setup parameters of the model

In [6]:
# Tinker with the parameters
run = 2
notes = 'Subset 200, with user modelling'
params = {
    'model': 'popular',
    'subset': 200,
}

wandb_run = wandb.init(entity='mcfreddie777', project="dp-recsys", name=f'model_{params["model"]}_run_{run}', notes=notes)
wandb_run.config.update(params)

model = models[params['model']]
model.update(params)
model.fit(df_train)

Predikujeme odporúčania

In [7]:
df_test = pd.read_csv(constants.TEST)
df_recommendations = model.predict(df_test)

Verifikujeme predikcie

In [8]:
verify_subm(df_subm=df_recommendations,df_test=df_test)

Checking for required columns in the submission file...
> check passed
Checking for duplicate sessions in the submission file...
> check passed
Checking that all the required sessions are present in submission...
> check passed
All checks passed


In [9]:
df_recommendations.to_csv(constants.OUTPUT_DIR / f'submission_popular_{params["model"]}_{run}.csv', index=False)

Vypočítame si MRR na dátach

In [10]:
df_gt = pd.read_csv(constants.GROUND_TRUTH)
mrr,map3 = score_subm(df_subm=df_recommendations,df_gt=df_gt)
wandb_run.log({"mrr":mrr, "map3":map3})

Mean reciprocal rank:      0.2242
Mean average precision @3: 0.0668


In [11]:
wandb_run.finish()


0,1
map3,▁
mrr,▁

0,1
map3,0.0668
mrr,0.2242
