In [1]:
# чтобы изменения в других файлах автоматически подгружались в импорты
%load_ext IPython.extensions.autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../..')
from model import FinData
from model import train_valid_split
from model import CatboostFinModel

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import pandas as pd
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import datetime as dt

In [4]:
data = FinData("../../datasets/T_yandex_10min.csv")

data.restrict_time_down(months=6)

data.insert_shifts_norms()
data.insert_rolling_means()
data.insert_exp_rolling_means()
data.insert_stochastic_oscillator()
data.insert_high_low_diff()
data.insert_bollinger()


In [5]:
args = {"iterations" : 10000, 
        "depth" : 5, 
        "learning_rate" : 0.01, # тут слегка неадекватные параметры, которые можно менять 
        "use_best_model" : True, 
        "reg_lambda" : 0.1, # L1
        "loss_function" : 'CrossEntropy', 
        "eval_metric" : 'Accuracy', 
        "cat_features" : data.cat_features, 
        "random_state" : 42,
        "early_stopping_rounds" : 2500}


model = CatboostFinModel(args)
model.cat = data.cat_features
model.numeric = data.numeric_features

X, y = data.df[model.cat + model.numeric], data.df['direction_binary']

samples_list = [5, 10, 15]
answers = []
for i in samples_list:
        answers.append(model.cross_validation(X, y, n_samples=i))

for i, n in enumerate(samples_list):
        print(f"N_samples: {n} - avg accuracy {answers[i]}")

0:	learn: 0.6051937	test: 0.5589789	best: 0.5589789 (0)	total: 196ms	remaining: 32m 39s
1000:	learn: 0.8551937	test: 0.5629401	best: 0.5827465 (211)	total: 5.69s	remaining: 51.2s
2000:	learn: 0.9555458	test: 0.5550176	best: 0.5827465 (211)	total: 10.8s	remaining: 43s
Stopped by overfitting detector  (2500 iterations wait)

bestTest = 0.5827464789
bestIteration = 211

Shrink model to first 212 iterations.
0:	learn: 0.5875880	test: 0.5787852	best: 0.5787852 (0)	total: 11.4ms	remaining: 1m 54s
1000:	learn: 0.7585827	test: 0.5748239	best: 0.5915493 (284)	total: 5.8s	remaining: 52.2s
2000:	learn: 0.8653169	test: 0.5695423	best: 0.5915493 (284)	total: 11.4s	remaining: 45.5s
Stopped by overfitting detector  (2500 iterations wait)

bestTest = 0.5915492958
bestIteration = 284

Shrink model to first 285 iterations.
0:	learn: 0.5837735	test: 0.6034331	best: 0.6034331 (0)	total: 9.69ms	remaining: 1m 36s
1000:	learn: 0.7177230	test: 0.5897887	best: 0.6043134 (95)	total: 7.22s	remaining: 1m 4s
2000:

In [7]:
data.insert_random_prediction()

model = CatboostFinModel(args)
model.cat = data.cat_features
model.numeric = data.numeric_features

X, y = data.df[model.cat + model.numeric], data.df['direction_binary']

samples_list = [5, 10, 15]
answers = []
for i in samples_list:
        answers.append(model.cross_validation(X, y, n_samples=i))

for i, n in enumerate(samples_list):
        print(f"N_samples: {n} - avg accuracy {answers[i]}")

0:	learn: 0.6051937	test: 0.5589789	best: 0.5589789 (0)	total: 8.43ms	remaining: 1m 24s
1000:	learn: 0.8551937	test: 0.5629401	best: 0.5827465 (211)	total: 7.85s	remaining: 1m 10s
2000:	learn: 0.9555458	test: 0.5550176	best: 0.5827465 (211)	total: 21.9s	remaining: 1m 27s
Stopped by overfitting detector  (2500 iterations wait)

bestTest = 0.5827464789
bestIteration = 211

Shrink model to first 212 iterations.
0:	learn: 0.5875880	test: 0.5787852	best: 0.5787852 (0)	total: 17.3ms	remaining: 2m 52s
1000:	learn: 0.7585827	test: 0.5748239	best: 0.5915493 (284)	total: 16.2s	remaining: 2m 25s
2000:	learn: 0.8653169	test: 0.5695423	best: 0.5915493 (284)	total: 34.2s	remaining: 2m 16s
Stopped by overfitting detector  (2500 iterations wait)

bestTest = 0.5915492958
bestIteration = 284

Shrink model to first 285 iterations.
0:	learn: 0.5837735	test: 0.6034331	best: 0.6034331 (0)	total: 20.1ms	remaining: 3m 21s
1000:	learn: 0.7177230	test: 0.5897887	best: 0.6043134 (95)	total: 20.5s	remaining: 3m 4

In [8]:
data = FinData("../../datasets/T_yandex_10min.csv")
data.insert_shifts_norms([3, 6, 18])
data.insert_rolling_means()
data.insert_exp_rolling_means()
data.insert_stochastic_oscillator()
data.insert_high_low_diff()
data.insert_butter_filter()

data.restrict_time_down(months=12)

In [10]:
data.df.shape

(28013, 53)

In [11]:
numeric = data.numeric_features

cat = data.cat_features

args = {"iterations" : 10000, 
        "depth" : 5, 
        "learning_rate" : 0.01, # тут слегка неадекватные параметры, которые можно менять 
        "use_best_model" : True, 
        "l2_leaf_reg" : 200,
        "verbose" : 100, # отчет каждые 100 итераций 
        "loss_function" : 'CrossEntropy', 
        "eval_metric" : 'Accuracy', 
        "cat_features" : cat, 
        "random_state" : 42,
        "early_stopping_rounds" : 5000}

# X_train, X_val, y_train, y_val = train_valid_split(data=data.df, 
#                                                    year=2024, month=12, day=5, 
#                                                    numeric=numeric, cat=cat, target="direction_binary")
X = data.df.drop(columns="direction_binary")
y = data.df["direction_binary"]
X_train, X_val, X_test, y_train, y_val, y_test = X[:-2000], X[-2000: -1000], X[-1000:], y[:-2000], y[-2000: -1000], y[-1000:]

model = CatboostFinModel(args)

model.set_datasets(X_train, X_val, y_train, y_val)
model.set_features(numeric, cat)

model.fit()


0:	learn: 0.6716642	test: 0.6780000	best: 0.6780000 (0)	total: 27.8ms	remaining: 4m 37s
100:	learn: 0.6763541	test: 0.6910000	best: 0.6940000 (82)	total: 1.47s	remaining: 2m 23s
200:	learn: 0.6795448	test: 0.6940000	best: 0.6940000 (82)	total: 3s	remaining: 2m 26s
300:	learn: 0.6836966	test: 0.7010000	best: 0.7010000 (299)	total: 4.58s	remaining: 2m 27s
400:	learn: 0.6860416	test: 0.6990000	best: 0.7010000 (299)	total: 6.07s	remaining: 2m 25s
500:	learn: 0.6875408	test: 0.6980000	best: 0.7010000 (299)	total: 7.33s	remaining: 2m 18s
600:	learn: 0.6887710	test: 0.7000000	best: 0.7010000 (299)	total: 8.65s	remaining: 2m 15s
700:	learn: 0.6905778	test: 0.7000000	best: 0.7010000 (299)	total: 9.88s	remaining: 2m 11s
800:	learn: 0.6920002	test: 0.7010000	best: 0.7020000 (780)	total: 11.1s	remaining: 2m 7s
900:	learn: 0.6930765	test: 0.7000000	best: 0.7020000 (780)	total: 12.4s	remaining: 2m 5s
1000:	learn: 0.6949987	test: 0.6990000	best: 0.7020000 (780)	total: 13.7s	remaining: 2m 3s
1100:	lea

<model.model.CatboostFinModel at 0x2af5a3e5190>

In [22]:
y_pred = model.predict((X_test[numeric + cat].iloc[0]))
y_true = y_test.iloc[0]
print(y_pred, y_true)

0 0


In [35]:
money = 5000
stock = 0
stock_price = 0
win = 100
initial = 5000
last_time_stock_zero = (0, money)

for i in range(X_test.shape[0]):
    y_pred = model.predict(X_test[numeric + cat].iloc[i])
    y_true = y_test.iloc[i]
    close_now = X_test['close'].iloc[i]
    if stock == 0:
        last_time_stock_zero = (i, money)

    if stock == 0 and y_pred == 0: # buying
        money -= close_now
        stock = 1
    elif stock == 1 and y_pred == 1: # selling
        stock = 0
        money += close_now
    
    if money < 0:
        print("I am broke on interation:", i)
        break
    if money >= initial + win:
        print(f"I gained {win} money on iteration: {i}")
        break
if 0 < money < initial + win:
    print(f"I traided a lot, now I have {stock} stocks and {money} money")
    print(f"Last time I had no stocks was on {last_time_stock_zero[0]} and I had {last_time_stock_zero[1]} money")

I am broke on interation: 652


In [87]:
data = FinData("../../datasets/T_yandex_10min.csv")
data.insert_shifts_norms([3, 6, 9, 12, 18])
data.insert_rolling_means()
data.insert_exp_rolling_means()
data.insert_butter_filter()
data.insert_high_low_diff()
data.insert_random_prediction()
data.insert_hull_moving_average()

args = {"iterations" : 10000, 
            "depth" : 5, 
            "learning_rate" : 0.01,
            "use_best_model" : True, 
            "l2_leaf_reg" : 200,
            "verbose" : False, 
            "loss_function" : 'CrossEntropy', 
            "eval_metric" : 'Accuracy', 
            "cat_features" : data.cat_features, 
            "random_state" : 42,
            "early_stopping_rounds" : 2000}
model = CatboostFinModel(args)
model.test_trading(data.df, start_date=dt.datetime(2024, 8, 1), proportion=[3, 1, 1], initial_budget = 10000, cat=data.cat_features, num=data.numeric_features)

My budget before 10000 and after trading 6893.0
Mommy, are you prod of me?


In [89]:
some_date = dt.datetime(2024, 8, 1)
model.test_trading(data.df, start_date=some_date, proportion = [3, 1, 1], initial_budget = 10000, cat=data.cat_features, num=data.numeric_features, print_actions=True)

0.6999803265787921
Date&Time: 2024-11-12 13:10:00 - I bought Yandex for 3789.5 and sold for 3772.5 -> budget: 9983.0 Daaaaaaaaaamn I was wrong
Date&Time: 2024-11-12 14:10:00 - I bought Yandex for 3778.5 and sold for 3772.0 -> budget: 9976.5 Daaaaaaaaaamn I was wrong
Date&Time: 2024-11-12 14:20:00 - I bought Yandex for 3772.5 and sold for 3770.0 -> budget: 9974.0 Daaaaaaaaaamn I was wrong
Date&Time: 2024-11-12 15:10:00 - I bought Yandex for 3773.0 and sold for 3764.0 -> budget: 9965.0 Daaaaaaaaaamn I was wrong
Date&Time: 2024-11-12 15:20:00 - I bought Yandex for 3765.0 and sold for 3763.5 -> budget: 9963.5 Daaaaaaaaaamn I was wrong
Date&Time: 2024-11-12 15:30:00 - I bought Yandex for 3763.5 and sold for 3762.0 -> budget: 9962.0 Daaaaaaaaaamn I was wrong
Date&Time: 2024-11-12 16:20:00 - I bought Yandex for 3756.5 and sold for 3758.5 -> budget: 9964.0
Date&Time: 2024-11-12 16:30:00 - I bought Yandex for 3759.0 and sold for 3759.0 -> budget: 9964.0
Date&Time: 2024-11-12 16:40:00 - I bought