In [None]:
# %pip install cython
# %pip install sortedcontainers
# %pip install zipp==3.1.0 --upgrade
# %pip install scorer

In [None]:

import sys

import pyximport
# import scorer
import pystan
import os
import numpy as np
import pandas as pd
import lightgbm as lg
import catboost
import zipp
from catboost.utils import get_gpu_device_count
from tqdm import tqdm
from pathlib import Path

# Если запускаемся на DataSphere
if str(Path.home()) == "/home/jupyter":
    os.chdir("/home/jupyter/work/resources/wunder_challenge/examples/ml_example")
    sys.path.append("/home/jupyter/work/resources/wunder_challenge/scorer")

import orderbook_fast as ob
from my_orderbook import MyOrderBook
catboost_myob = MyOrderBook()

print("curdir = ", Path.cwd())
print("homedir = ", Path.home())


sys.path.append("../../scorer/")
# sys.path.append("../../scorer/orderbook_fast")
# sys.path.append("/home/gpu/wunder_summer/scorer/orderbook_fast")


SIDE_BID = 0
SIDE_ASK = 1

## Собираем датасет для тренировки модели

In [None]:
%time

def collect_dataset(data_path):
    '''
        Собираем датасет
    '''

    global catboost_myob
    event_player = ob.EventPlayer(data_path)
    orderbook = ob.OrderBook()

    X = []
    Y = []

    # last_deal = [None, None]
    for ev in tqdm(event_player.iget_events(),
                    total=len(event_player),
                    desc="collecting dataset"):

        if ev.action == ob.Action.DEAL:
            # last_deal[ev.side] = ev
            catboost_myob.set_last_deal(ev)
        elif ev.action == ob.Action.NEW_CHUNK:
            catboost_myob.clear()
            # last_deal = [None, None]

        orderbook.apply_event(ev)
        if ev.need_prediction:
            features = catboost_myob.get_features(ev, orderbook)

            X.append(features)
            Y.append(ev.Y)

    print(f"Dataset collected: len(X) = {len(X)}")
    return np.array(X), np.array(Y)


X_train, Y_train = collect_dataset("../../data/train_small_A.npz")
X_test, Y_test = collect_dataset("../../data/train_small_B.npz")

## Обучаем модель градиентного бустинга

In [None]:
#!L
train_pool = catboost.Pool(X_train, Y_train, cat_features=catboost_myob.cat_features)
test_pool = catboost.Pool(X_test, Y_test, cat_features=catboost_myob.cat_features)

clf = catboost.CatBoostClassifier(
    learning_rate=0.02,
    depth=6,
    l2_leaf_reg=2,

    bootstrap_type="Bayesian",
    # scale_pos_weight=en.scale_pos_weight,
    # scale_pos_weight=1,
    loss_function = "Logloss",
    eval_metric="AUC:hints=skip_train~false",
    custom_metric=["Recall", "Precision", "Accuracy", "F1", "Kappa", "MCC"],
    # use_best_model=True,
    iterations=2000,
    od_type="Iter",
    od_wait=30,
    rsm=1,
    # random_seed=100,
    task_type="GPU",
    boosting_type='Ordered',
    logging_level="Verbose",
    train_dir="grid",

)


clf.fit(
        X=train_pool,
        plot=False,
        eval_set=test_pool,
        use_best_model=True
    )


## Тестируем получившийся классификатор

In [None]:
import pystan

In [None]:
# %pip install cmake
# %pip install scorer==2.0.2 --upgrade --use-feature=2020-resolver --force-reinstall
# %pip install pystan


In [None]:
def process_event_and_predict_proba(ev, orderbook):

    if ev.action == ob.Action.DEAL:
        catboost_myob.set_last_deal(ev)
    elif ev.action == ob.Action.NEW_CHUNK:
        catboost_myob.clear()



    if not ev.need_prediction:
        return None

    features = catboost_myob.get_features(ev, orderbook)
    proba = clf.predict_proba([features])[0, 1]
    return proba

from scorer import Scorer

scoring = Scorer("../../data/train_small_C.npz")
roc_auc, (true_ys, pred_probas) = scoring.score(process_event_and_predict_proba)

## Сохраним модель, и решение для отправки готово

In [None]:
# Сохраним нашу модель
# clf.booster_.save_model("wunder.model")
print(Path.cwd())
clf.save_model("wunder.model", format="cbm")

'''Посмотрите код файла solution.py.
Он использует те же функции что и этот ноутбук, но уже готов к отправке на серверю.
Попробуйте создать архив с файлами solution.py и wunder.model и отправить их на проверку.'''