# Odporúčanie

In [1]:
%load_ext autoreload
%autoreload all

In [2]:
# dependencies
import pandas as pd
import wandb

In [3]:
#local modules
import sys
sys.path.append("..")

from _helpers import constants
from _helpers.verify_submission.verify_subm import main as verify_subm
from _helpers.score_submission.score_subm import main as score_subm

from drop import main as drop
from subset import main as subset

from models.model_random import ModelRandom
from models.model_nochange import ModelNoChange
from models.model_popular import ModelPopular
from models.model_log_reg import ModelLogisticRegression
from models.model_cheapest import ModelCheapest

Iitialize wandb - logging tool

In [4]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mmcfreddie777[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

Load the correct data

In [5]:
train_file = constants.DROPPED_TRAIN
test_file = constants.DROPPED_TEST
ground_truth_file = constants.DROPPED_GROUND_TRUTH

if (constants.SUBSET is not None):
    train_file = constants.DROPPED_SUBSET(constants.SUBSET, 'train')
    test_file = constants.DROPPED_SUBSET(constants.SUBSET, 'test')
    ground_truth_file = constants.DROPPED_SUBSET(constants.SUBSET, 'test')

if not (train_file.exists() and test_file.exists() and ground_truth_file.exists()):
    if (constants.SUBSET is not None):
        subset()
    else:
        drop()

df_train = pd.read_parquet(train_file)

In [6]:
models = {
    'random': ModelRandom(),
    'nochange': ModelNoChange(),
    'popular': ModelPopular(),
    'log-reg': ModelLogisticRegression(),
    'cheapest': ModelCheapest(),
}

Choose the correct model and setup parameters of the model

In [7]:
# Tinker with the parameters
run = 9
notes = 'Cached preprocessing and features tracked'
params = {
    'model': 'log-reg',
    'subset': constants.SUBSET,
    'features': [
        "impressed_item_position",
        # "impressed_item_rating",
        # "user_impressed_item_interaction_count",
        "price",
        "price_above_impression_mean",
        "is_last_interacted",
    ]
}

wandb_run = wandb.init(entity='mcfreddie777', project="dp-recsys", name=f'model_{params["model"]}_run_{run}',
                       notes=notes)
wandb_run.config.update(params)

model = models[params['model']]
model.update(params)
model.fit(df_train)

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  3.50109D+03    |proj g|=  3.46890D+05

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5     44     55      1     0     0   1.372D-02   7.379D+02
  F =   737.93636866388385     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
 This problem is unconstrained.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


Predikujeme odporúčania

In [8]:
df_test = pd.read_parquet(test_file)
df_recommendations = model.predict(df_test)

Verifikujeme predikcie

In [9]:
verify_subm(df_subm=df_recommendations, df_test=df_test)

Checking for required columns in the submission file...
> check passed
Checking for duplicate sessions in the submission file...
> check passed
Checking that all the required sessions are present in submission...
> check passed
All checks passed


In [10]:
df_recommendations.to_csv(constants.OUTPUT_DIR / f'submission_popular_{params["model"]}_{run}.csv', index=False)

Vypočítame si MRR na dátach

In [11]:
df_gt = pd.read_csv(constants.GROUND_TRUTH)
df_test = pd.read_parquet(test_file)
mrr, map3 = score_subm(df_subm=df_recommendations, df_gt=df_gt)
wandb_run.log({"mrr": mrr, "map3": map3})

Mean reciprocal rank:      0.287
Mean average precision @3: 0.1033


In [12]:
wandb_run.finish()


0,1
map3,▁
mrr,▁

0,1
map3,0.1033
mrr,0.287
