In [2]:
%load_ext IPython.extensions.autoreload
%autoreload 2

In [3]:
import sys

sys.path.append('../..')

import pandas as pd
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import datetime as dt

from model import FinData
from model import train_valid_split, train_valid_test_split
from model import CatboostFinModel

from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


#### Oil and gas

Будем предсказывать акции Татнефти, включив в качестве признаков свечи других акций по категории Oil and gas

In [12]:
dfs = []
numerics = []
cats = []
names = ['Tatneft', 'Bashneft', 'Gazprom', 'Lukoil', 'Novatek', 'Rosneft']
target_name = "Tatneft"
start_date = datetime(2024, 1, 1)

for name in names:
    data = FinData(f"../../datasets/{name}_10_min.csv")
    data.restrict_time_down(start_date)

    windows_ma = [2, 3, 5, 7, 9, 18, 21, 28, 30, 50, 500]
    shifts_norms = [2, 3, 4, 5, 6]

    # data.insert_time_features()
    data.insert_rolling_means(windows_ma)
    data.insert_shifts_norms(shifts_norms)
    data.insert_exp_rolling_means(windows_ma)

    data.df.set_index('utc', inplace=True)

    if name == target_name:
        data.make_binary_class_target(target_name="direction_binary")
    else:
        data.df.rename({feature: feature + '_' + name for feature in data.df.columns}, axis=1, inplace=True)
        data.numeric_features = [feature + '_' + name for feature in data.numeric_features]
        data.cat_features = [feature + '_' + name for feature in data.cat_features]

    dfs.append(data.df)
    numerics += data.numeric_features
    cats += data.cat_features

joint_data = FinData(pd.concat(dfs, axis=1).reset_index())
joint_data.numeric_features = numerics
joint_data.cat_features = cats

In [13]:
numeric = joint_data.get_numeric_features()
cat = joint_data.get_cat_features()

X_train, X_val, y_train, y_val = train_valid_split(data=joint_data.df, 
                                                   year=2024, month=11, day=1, 
                                                   numeric=numeric, cat=cat, target="direction_binary")

In [15]:
args = {"iterations" : 10000, 
        "depth" : 5, 
        "use_best_model" : True, 
        "l2_leaf_reg" : 200,
        "loss_function" : 'Logloss', 
        "eval_metric" : 'Accuracy', 
        "cat_features" : cat, 
        "random_state" : 42,
        "early_stopping_rounds" : 1000}

In [16]:
model = CatboostFinModel(args = args)

model.set_datasets(X_train=X_train, X_val=X_val, y_train=y_train, y_val=y_val)
model.set_features(numeric_features=numeric, cat_features=cat)

model.fit()

0:	learn: 0.6020640	test: 0.5912807	best: 0.5912807 (0)	total: 40.3ms	remaining: 6m 43s
1:	learn: 0.6077711	test: 0.5960891	best: 0.5960891 (1)	total: 75.6ms	remaining: 6m 18s
2:	learn: 0.6083183	test: 0.5962494	best: 0.5962494 (2)	total: 111ms	remaining: 6m 10s
3:	learn: 0.6082011	test: 0.5965700	best: 0.5965700 (3)	total: 146ms	remaining: 6m 5s
4:	learn: 0.6083183	test: 0.5994550	best: 0.5994550 (4)	total: 180ms	remaining: 6m
5:	learn: 0.6090611	test: 0.5981728	best: 0.5994550 (4)	total: 217ms	remaining: 6m 1s
6:	learn: 0.6101165	test: 0.6002565	best: 0.6002565 (6)	total: 259ms	remaining: 6m 10s
7:	learn: 0.6124619	test: 0.6016990	best: 0.6016990 (7)	total: 297ms	remaining: 6m 11s
8:	learn: 0.6125401	test: 0.6002565	best: 0.6016990 (7)	total: 338ms	remaining: 6m 15s
9:	learn: 0.6133219	test: 0.5991345	best: 0.6016990 (7)	total: 376ms	remaining: 6m 15s
10:	learn: 0.6148073	test: 0.6023401	best: 0.6023401 (10)	total: 421ms	remaining: 6m 22s
11:	learn: 0.6144555	test: 0.5988139	best: 0.

<model.model.CatboostFinModel at 0x1c003b9a6b0>