In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import os
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from tqdm import tqdm, tqdm_notebook
from pathlib import Path
# pd.set_option('display.max_columns', 1000)
# pd.set_option('display.max_rows', 400)
sns.set()

os.chdir('..')

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from project.ranker.ranker import RankingPredictor

In [3]:
%%time
from sklearn.model_selection import train_test_split
rp = Pipeline([
    ('scale', StandardScaler()),
    ('estimator', RankingPredictor("ma_100", n_neighbors=15)),
])
df_mf, df_rank, df_scores = rp.named_steps['estimator'].get_data()

X, _, y, _, y_scores, _ = train_test_split(df_mf.values,
                                           df_rank.values,
                                           df_scores.values,
                                           test_size=0,
                                           random_state=42)
print(X.shape, y.shape, y_scores.shape)

(60, 39) (60, 13) (60, 13)
CPU times: user 45.6 s, sys: 4.49 s, total: 50.1 s
Wall time: 51.2 s


In [71]:
%%time
from sklearn.model_selection import train_test_split
rp = Pipeline([
    ('scale', StandardScaler()),
    ('estimator', RankingPredictor("ma_100_42", n_neighbors=15)),
])
df_mf, df_rank, df_scores = rp.named_steps['estimator'].get_data()

X, _, y, _, y_scores, _ = train_test_split(df_mf.values,
                                           df_rank.values,
                                           df_scores.values,
                                           test_size=0,
                                           random_state=42)
print(X.shape, y.shape, y_scores.shape)

(60, 39) (60, 12) (60, 12)
CPU times: user 41.4 s, sys: 2.84 s, total: 44.2 s
Wall time: 44.2 s


## ma_100_42

In [72]:
%%time
import lightgbm
from sklearn.model_selection import RepeatedKFold
from project.ranker.neural_ranker import cv_neuralnet

kfolds = RepeatedKFold(10, n_repeats=10, random_state=42)
params = {'latent_sz': 30, 'batch_sz': 40, 'epochs': 29, 
          'learning_rate': 0.0023670978180025}
results, models = cv_neuralnet(X, y, y_scores, kfolds, params, 
                               verbose_folds=True)

Fold   1 | Trn_Spearman:  0.7123 | Val_Spearman:  0.2552 | Trn_ACCLoss:  0.0381 | Val_ACCLoss:  0.0898
Fold   2 | Trn_Spearman:  0.6994 | Val_Spearman:  0.2028 | Trn_ACCLoss:  0.0273 | Val_ACCLoss:  0.2074
Fold   3 | Trn_Spearman:  0.7435 | Val_Spearman:  0.0524 | Trn_ACCLoss:  0.0363 | Val_ACCLoss:  0.0845
Fold   4 | Trn_Spearman:  0.7066 | Val_Spearman:  0.2308 | Trn_ACCLoss:  0.0405 | Val_ACCLoss:  0.0913
Fold   5 | Trn_Spearman:  0.7100 | Val_Spearman:  0.2494 | Trn_ACCLoss:  0.0435 | Val_ACCLoss:  0.0539
Fold   6 | Trn_Spearman:  0.7069 | Val_Spearman:  0.2261 | Trn_ACCLoss:  0.0394 | Val_ACCLoss:  0.1601
Fold   7 | Trn_Spearman:  0.7161 | Val_Spearman:  0.3217 | Trn_ACCLoss:  0.0398 | Val_ACCLoss:  0.2051
Fold   8 | Trn_Spearman:  0.7120 | Val_Spearman:  0.1946 | Trn_ACCLoss:  0.0405 | Val_ACCLoss:  0.0520
Fold   9 | Trn_Spearman:  0.7124 | Val_Spearman:  0.0676 | Trn_ACCLoss:  0.0372 | Val_ACCLoss:  0.0836
Fold  10 | Trn_Spearman:  0.7147 | Val_Spearman:  0.3298 | Trn_ACCLoss:  

In [73]:
%%time
import lightgbm
from project.ranker.lambdarank import cv_lgbm
from sklearn.model_selection import RepeatedKFold
kfolds = RepeatedKFold(10, n_repeats=10, random_state=42)
params = {'objective': 'lambdarank', 'metric': 'ndcg', 'ndcg_at': 13, 'max_depth': 50, 
          'num_leaves': 37, 'min_sum_hessian_in_leaf': 2.838955470674485, 'min_data_in_leaf': 4, 
          'bagging_fraction': 0.41761591951124705, 'bagging_freq': 4, 'feature_fraction': 0.6258773979549109, 
          'scale_pos_weight': 1.1049055615452428, 'learning_rate': 0.02887716145050293, 'lambda_l2': 29.235934351290478}
tuned_results, tuned_models = cv_lgbm(lightgbm, X, y, y_scores, kfolds, 
                  params, num_rounds=1000, early_stopping_rounds=50, 
                  verbose_eval=False)

Fold   1 | #Est:   7 | Trn_Spearman:  0.3544 | Val_Spearman:  0.0408 | Trn_ACCLoss:  0.0426 | Val_ACCLoss:  0.1018 | Trn_NDCG:  0.7319 | Val_NDCG:  0.6322
Fold   2 | #Est: 125 | Trn_Spearman:  0.5233 | Val_Spearman:  0.1410 | Trn_ACCLoss:  0.0011 | Val_ACCLoss:  0.1758 | Trn_NDCG:  0.9235 | Val_NDCG:  0.6071
Fold   3 | #Est:   2 | Trn_Spearman:  0.1412 | Val_Spearman:  0.2191 | Trn_ACCLoss:  0.0769 | Val_ACCLoss:  0.0549 | Trn_NDCG:  0.6135 | Val_NDCG:  0.5887
Fold   4 | #Est:  51 | Trn_Spearman:  0.4729 | Val_Spearman:  0.3147 | Trn_ACCLoss:  0.0022 | Val_ACCLoss:  0.0951 | Trn_NDCG:  0.8889 | Val_NDCG:  0.6824
Fold   5 | #Est:   5 | Trn_Spearman:  0.3244 | Val_Spearman:  0.0676 | Trn_ACCLoss:  0.0343 | Val_ACCLoss:  0.0435 | Trn_NDCG:  0.7435 | Val_NDCG:  0.6759
Fold   6 | #Est:   1 | Trn_Spearman:  0.0559 | Val_Spearman:  0.0711 | Trn_ACCLoss:  0.0935 | Val_ACCLoss:  0.0866 | Trn_NDCG:  0.6009 | Val_NDCG:  0.6238
Fold   7 | #Est:  12 | Trn_Spearman:  0.3503 | Val_Spearman:  0.2517 |

## ma_100_rs_871

In [67]:
%%time
import lightgbm
from sklearn.model_selection import RepeatedKFold
from project.ranker.neural_ranker import cv_neuralnet

kfolds = RepeatedKFold(10, n_repeats=10, random_state=42)
params = {'latent_sz': 30, 'batch_sz': 40, 'epochs': 29, 
          'learning_rate': 0.0023670978180025}
results, models = cv_neuralnet(X, y, y_scores, kfolds, params, 
                               verbose_folds=True)

Fold   1 | Trn_Spearman:  0.6983 | Val_Spearman:  0.3485 | Trn_ACCLoss:  0.0345 | Val_ACCLoss:  0.0550
Fold   2 | Trn_Spearman:  0.7099 | Val_Spearman:  0.0664 | Trn_ACCLoss:  0.0164 | Val_ACCLoss:  0.1989
Fold   3 | Trn_Spearman:  0.7433 | Val_Spearman: -0.0023 | Trn_ACCLoss:  0.0280 | Val_ACCLoss:  0.0538
Fold   4 | Trn_Spearman:  0.6977 | Val_Spearman:  0.2249 | Trn_ACCLoss:  0.0335 | Val_ACCLoss:  0.0962
Fold   5 | Trn_Spearman:  0.6966 | Val_Spearman:  0.3030 | Trn_ACCLoss:  0.0354 | Val_ACCLoss:  0.0451
Fold   6 | Trn_Spearman:  0.7058 | Val_Spearman:  0.3275 | Trn_ACCLoss:  0.0258 | Val_ACCLoss:  0.1345
Fold   7 | Trn_Spearman:  0.7133 | Val_Spearman:  0.2436 | Trn_ACCLoss:  0.0287 | Val_ACCLoss:  0.1367
Fold   8 | Trn_Spearman:  0.7068 | Val_Spearman:  0.2121 | Trn_ACCLoss:  0.0297 | Val_ACCLoss:  0.0452
Fold   9 | Trn_Spearman:  0.6971 | Val_Spearman:  0.1270 | Trn_ACCLoss:  0.0294 | Val_ACCLoss:  0.0886
Fold  10 | Trn_Spearman:  0.7116 | Val_Spearman:  0.2168 | Trn_ACCLoss:  

In [68]:
%%time
import lightgbm
from project.ranker.lambdarank import cv_lgbm
from sklearn.model_selection import RepeatedKFold
kfolds = RepeatedKFold(10, n_repeats=10, random_state=42)
params = {'objective': 'lambdarank', 'metric': 'ndcg', 'ndcg_at': 13, 'max_depth': 50, 
          'num_leaves': 37, 'min_sum_hessian_in_leaf': 2.838955470674485, 'min_data_in_leaf': 4, 
          'bagging_fraction': 0.41761591951124705, 'bagging_freq': 4, 'feature_fraction': 0.6258773979549109, 
          'scale_pos_weight': 1.1049055615452428, 'learning_rate': 0.02887716145050293, 'lambda_l2': 29.235934351290478}
tuned_results, tuned_models = cv_lgbm(lightgbm, X, y, y_scores, kfolds, 
                  params, num_rounds=1000, early_stopping_rounds=50, 
                  verbose_eval=False)

Fold   1 | #Est:   1 | Trn_Spearman: -0.0464 | Val_Spearman:  0.0629 | Trn_ACCLoss:  0.1062 | Val_ACCLoss:  0.0638 | Trn_NDCG:  0.5797 | Val_NDCG:  0.6406
Fold   2 | #Est:   8 | Trn_Spearman:  0.4075 | Val_Spearman:  0.0932 | Trn_ACCLoss:  0.0202 | Val_ACCLoss:  0.1784 | Trn_NDCG:  0.7826 | Val_NDCG:  0.5979
Fold   3 | #Est:   2 | Trn_Spearman:  0.1659 | Val_Spearman: -0.1422 | Trn_ACCLoss:  0.0688 | Val_ACCLoss:  0.0462 | Trn_NDCG:  0.6919 | Val_NDCG:  0.5683
Fold   4 | #Est:  48 | Trn_Spearman:  0.4742 | Val_Spearman:  0.1072 | Trn_ACCLoss:  0.0140 | Val_ACCLoss:  0.0650 | Trn_NDCG:  0.8780 | Val_NDCG:  0.6430
Fold   5 | #Est:   1 | Trn_Spearman: -0.0122 | Val_Spearman:  0.0070 | Trn_ACCLoss:  0.0933 | Val_ACCLoss:  0.0326 | Trn_NDCG:  0.6252 | Val_NDCG:  0.5781
Fold   6 | #Est:   1 | Trn_Spearman: -0.0111 | Val_Spearman: -0.1142 | Trn_ACCLoss:  0.1026 | Val_ACCLoss:  0.1162 | Trn_NDCG:  0.5846 | Val_NDCG:  0.6457
Fold   7 | #Est:  24 | Trn_Spearman:  0.4325 | Val_Spearman:  0.1632 |

In [62]:
%%time
import lightgbm
from sklearn.model_selection import RepeatedKFold
from project.ranker.neural_ranker import cv_neuralnet

kfolds = RepeatedKFold(10, n_repeats=1, random_state=42)
params = {'latent_sz': 30, 'batch_sz': 40, 'epochs': 29, 
          'learning_rate': 0.0023670978180025}
results, models = cv_neuralnet(X, y, y_scores, kfolds, params, 
                               verbose_folds=True)

Fold   1 | Trn_Spearman:  0.7081 | Val_Spearman:  0.3636 | Trn_ACCLoss:  0.0303 | Val_ACCLoss:  0.0529
Fold   2 | Trn_Spearman:  0.7126 | Val_Spearman:  0.0742 | Trn_ACCLoss:  0.0168 | Val_ACCLoss:  0.1856
Fold   3 | Trn_Spearman:  0.7434 | Val_Spearman: -0.0568 | Trn_ACCLoss:  0.0318 | Val_ACCLoss:  0.0721
Fold   4 | Trn_Spearman:  0.7127 | Val_Spearman:  0.2628 | Trn_ACCLoss:  0.0292 | Val_ACCLoss:  0.1037
Fold   5 | Trn_Spearman:  0.7208 | Val_Spearman:  0.2674 | Trn_ACCLoss:  0.0342 | Val_ACCLoss:  0.0429
Fold   6 | Trn_Spearman:  0.7107 | Val_Spearman:  0.3901 | Trn_ACCLoss:  0.0276 | Val_ACCLoss:  0.1034
Fold   7 | Trn_Spearman:  0.7221 | Val_Spearman:  0.1905 | Trn_ACCLoss:  0.0305 | Val_ACCLoss:  0.1667
Fold   8 | Trn_Spearman:  0.7178 | Val_Spearman:  0.2930 | Trn_ACCLoss:  0.0332 | Val_ACCLoss:  0.0662
Fold   9 | Trn_Spearman:  0.7204 | Val_Spearman:  0.1804 | Trn_ACCLoss:  0.0276 | Val_ACCLoss:  0.0776
Fold  10 | Trn_Spearman:  0.7315 | Val_Spearman:  0.1566 | Trn_ACCLoss:  