# Odporúčanie

In [9]:
%load_ext autoreload
%autoreload all

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
# dependencies
import pandas as pd
import wandb
from dotenv import load_dotenv

from _helpers import constants
from _helpers import functions as hf
from _helpers.verify_submission.verify_subm import main as verify_subm
from _helpers.score_submission.score_subm import main as score_subm
from _helpers.drop import drop

from models.model_random import ModelRandom
from models.model_nochange import ModelNoChange
from models.model_item_pop import ModelItemPopularity
from models.model_user_item_pop import ModelUserItemPopularity
from models.model_popular import ModelPopular
from models.model_log_reg import ModelLogisticRegression

Spin up the project - load environment variables from .env file, initialize wandb

In [23]:
load_dotenv(override=True) # Load env variables from .env file
wandb.login()



True

Load the correct train data

In [24]:
# Check whether we want
subset = hf.get_env('SUBSET',None)
target_file = constants.DROPPED if (subset is None) else constants.DROPPED_SUBSET(subset)

if not target_file.exists():
    if (subset is None):
        df_train = pd.read_csv(constants.TRAIN)
        df_train = drop(df_train)
        df_train.to_parquet(constants.DROPPED, index=False)
    else:
        raise FileNotFoundError(constants.DROPPED_SUBSET(subset))
else:
    df_train = pd.read_parquet(target_file)

In [25]:
models = {
    'random': ModelRandom(),
    'nochange': ModelNoChange(),
    'item-popularity': ModelItemPopularity(),
    'user-item-popularity': ModelUserItemPopularity(),
    'popular': ModelPopular(),
    'log-reg': ModelLogisticRegression(),
}

Choose the correct model and setup parameters of the model

In [40]:
# Tinker with the parameters
run = 1
notes = 'Subset 200'
params = {
    'model': 'log-reg',
    'subset': 200,
}

wandb_run = wandb.init(entity='mcfreddie777', project="dp-recsys", name=f'model_{params["model"]}_run_{run}', notes=notes)
wandb_run.config.update(params)

model = models[params['model']]
model.update(params)
model.fit(df_train)

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =            5     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  6.23417D+03    |proj g|=  4.72559D+05

At iterate   50    f=  1.16870D+03    |proj g|=  1.03679D+02

           * * *

Tit   = total number of iterations
Tnf   = total number of function evaluations
Tnint = total number of segments explored during Cauchy searches
Skip  = number of BFGS updates skipped
Nact  = number of active bounds at final generalized Cauchy point
Projg = norm of the final projected gradient
F     = final function value

           * * *

   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F
    5     54     71      1     0     0   9.092D-02   1.169D+03
  F =   1168.6507938624907     

CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH             


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
 This problem is unconstrained.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


Predikujeme odporúčania

In [41]:
df_test = pd.read_csv(constants.TEST)
df_recommendations = model.predict(df_test)

Verifikujeme predikcie

In [42]:
verify_subm(df_subm=df_recommendations,df_test=df_test)

Checking for required columns in the submission file...
> check passed
Checking for duplicate sessions in the submission file...
> check passed
Checking that all the required sessions are present in submission...
> check passed
All checks passed


In [43]:
df_recommendations.to_csv(constants.OUTPUT_DIR / f'submission_popular_{params["model"]}_{run}.csv', index=False)

Vypočítame si MRR na dátach

In [44]:
df_gt = pd.read_csv(constants.GROUND_TRUTH)
mrr,map3 = score_subm(df_subm=df_recommendations,df_gt=df_gt)
wandb_run.log({"mrr":mrr, "map3":map3})

Mean reciprocal rank:      0.2426
Mean average precision @3: 0.0807


In [45]:
wandb_run.finish()

0,1
map3,▁
mrr,▁

0,1
map3,0.0807
mrr,0.2426
