### Схема тетрадки:
* [Импорт](#first-bullet)
* [Префильтрация](#second-bullet)
* [Модель 1 уровня: рекомендательная](#third-bullet)
* [Модель 2 уровня: ранжирующая](#forth-bullet)
* [Оценка результатов](#fith-bullet)
* [Сохранение результатов](#sixth-bullet)
---

***

***
# Блок импорта <a class="anchor" id="first-bullet"></a>
* импорт библиотек
* загрузка констант из файла `settings.yaml`

In [1]:
from src.utils import *
globals().update(load_settings(True))

Loaded following classes:
['Dataset', 'MainRecommender']

Loaded following functions:
['load_settings', 'pprint', 'precision_at_k']

Loaded following constants:
ITEM_COL          = item_id    | <class 'str'>
USER_COL          = user_id    | <class 'str'>
ACTUAL_COL        = actual     | <class 'str'>
TAKE_N_POPULAR    =          5 | <class 'int'>
N_PREDICT         =         50 | <class 'int'>
TOPK_PRECISION    =          5 | <class 'int'>
VAL_MATCHER_WEEKS =          6 | <class 'int'>
VAL_RANKER_WEEKS  =          3 | <class 'int'>
CATBOOST_RANKER   = YetiRankPairwise | <class 'str'>
CATBOOST_ITERATIONS =       2000 | <class 'int'>


***
# Блок предоброботки данных <a class="anchor" id="second-bullet"></a>
* загрузка датасетов
* первичная трансформация датасетов
* добавление новых фич
* конвертация текстовых фичей в цифровые для модели второго уровня
* разделение и подготовка датасетов для формирования рекомендаций и последующего ранжирования

In [2]:
data = Dataset()
data.data_prefilter()
data.data_split()

***
# Блок формирования рекомендаций <a class="anchor" id="third-bullet"></a>
* train_test_split
* получение рекомендаций по собственным покупкам (с добавлением популярных товаров)

In [8]:
data.result_lvl_1

Unnamed: 0,user_id,actual
0,1,"[999999, 874905, 5978648, 15596126, 835108, 13..."
1,7,"[999999, 15452140, 13511134]"
2,8,"[999999, 6602265, 15778504, 12948295]"
3,13,"[999999, 908317, 877374, 13158898, 15627237, 1..."
4,16,[999999]
...,...,...
786,2494,"[999999, 5978656, 9859236, 5978648]"
787,2496,"[999999, 5978648, 12263901, 827666]"
788,2497,"[999999, 921366, 9523067, 13417270, 15452266, ..."
789,2498,"[999999, 15972819, 966718, 16220496]"


In [3]:
recommender = MainRecommender(data.data_train_lvl_1)


GPU training requires factor size to be a multiple of 32. Increasing factors from 20 to 32.


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/20597 [00:00<?, ?it/s]

***
# Блок ранжирования полученных рекомендаций <a class="anchor" id="forth-bullet"></a>
* форматируем датасеты под формат (class Pool) CatBoost
* обучаем CatBoostRanker с функцией потерь YetiRankPairwise
* получаем предсказанные вероятности для ранжирования
* ранжируем рекомендации от модели первого уровня

In [4]:
recommender.preprocessing(data, t='train')
recommender.preprocessing(data, t='test')
recommender.ranker_fit()

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Unnamed: 0,user_id,item_id,manufacturer,brand,commodity_type,household_size_desc,kid_category_desc,age,income,has_kids,adults_num,Homeowner,Probable Owner,Probable Renter,Renter,Unknown,A,B,U,proba
0,1364,14025076,5456,0,238,1,0,75.0,112.0,0.0,1.0,1,0,0,0,0,0,1,0,0.504457
1,1364,1139027,69,1,88,1,0,75.0,112.0,0.0,1.0,1,0,0,0,0,0,1,0,0.496487
2,1364,1025265,5024,0,109,1,0,75.0,112.0,0.0,1.0,1,0,0,0,0,0,1,0,0.441980
3,1364,1086885,4311,0,140,1,0,75.0,112.0,0.0,1.0,1,0,0,0,0,0,1,0,0.487136
4,1364,6602718,5072,0,72,1,0,75.0,112.0,0.0,1.0,1,0,0,0,0,0,1,0,0.450730
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39822,2483,1034745,5015,0,114,1,0,49.0,87.0,0.0,1.0,1,0,0,0,0,0,1,0,0.434715
39823,2483,924881,4951,0,89,1,0,49.0,87.0,0.0,1.0,1,0,0,0,0,0,1,0,0.427326
39824,2483,5567814,1075,0,12,1,0,49.0,87.0,0.0,1.0,1,0,0,0,0,0,1,0,0.470063
39825,2483,12330764,520,0,4,1,0,49.0,87.0,0.0,1.0,1,0,0,0,0,0,1,0,0.472886


In [5]:
recommender.ranker_predict(recommender.X_train)

Unnamed: 0,user_id,item_id,manufacturer,brand,commodity_type,household_size_desc,kid_category_desc,age,income,has_kids,adults_num,Homeowner,Probable Owner,Probable Renter,Renter,Unknown,A,B,U,proba
0,1364,14025076,5456,0,238,1,0,75.0,112.0,0.0,1.0,1,0,0,0,0,0,1,0,0.504457
1,1364,1139027,69,1,88,1,0,75.0,112.0,0.0,1.0,1,0,0,0,0,0,1,0,0.496487
2,1364,1025265,5024,0,109,1,0,75.0,112.0,0.0,1.0,1,0,0,0,0,0,1,0,0.441980
3,1364,1086885,4311,0,140,1,0,75.0,112.0,0.0,1.0,1,0,0,0,0,0,1,0,0.487136
4,1364,6602718,5072,0,72,1,0,75.0,112.0,0.0,1.0,1,0,0,0,0,0,1,0,0.450730
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39822,2483,1034745,5015,0,114,1,0,49.0,87.0,0.0,1.0,1,0,0,0,0,0,1,0,0.434715
39823,2483,924881,4951,0,89,1,0,49.0,87.0,0.0,1.0,1,0,0,0,0,0,1,0,0.427326
39824,2483,5567814,1075,0,12,1,0,49.0,87.0,0.0,1.0,1,0,0,0,0,0,1,0,0.470063
39825,2483,12330764,520,0,4,1,0,49.0,87.0,0.0,1.0,1,0,0,0,0,0,1,0,0.472886


***
# Блок оценки полученных результатов <a class="anchor" id="fith-bullet"></a>

***
# Блок сохранения результата <a class="anchor" id="sixth-bullet"></a>

***