In [1]:
%load_ext IPython.extensions.autoreload
%autoreload 2

In [4]:
import backtrader as bt
import datetime as dt
import sys
sys.path.append('../..')
from model import FinData
from model import merged_split
from model import CatboostFinModel
from model import train_valid_test_split

In [8]:
data = FinData(df="../../datasets/Yandex_10_min.csv")


windows_shifts_norms = [2, 3, 4, 5, 6]
windows_ma = [2, 4, 6, 8, 18, 30, 50]
windows_ema = [3, 5, 7, 9, 20, 35, 100]

data.insert_shifts_norms(windows_shifts_norms=windows_shifts_norms)
data.insert_rolling_means(windows_ma=windows_ma)
data.insert_exp_rolling_means(windows_ema=windows_ema)
data.insert_bollinger()
data.insert_rsi()
data.insert_high_low_diff()
data.insert_stochastic_oscillator()
data.insert_random_prediction()

num = data.get_numeric_features()
cat = data.get_cat_features()
target = data.target


X_train, X_val, X_test, y_train, y_val, y_test = train_valid_test_split(data.df, 
                                                                        start_period=dt.datetime(2023, 7, 1), 
                                                                        train_size=3000, 
                                                                        val_size=200, 
                                                                        test_size=200, 
                                                                        numeric=num, cat=cat, target=target, silenced=False)

Начало тренировочного периода: 2023-07-01 07:00:00. Конец тренировочного периода: 2023-08-10 16:40:00 
                     Начало валидационного периода: 2023-08-10 16:50:00. Конец валидационного периода: 2023-08-13 13:30:00 
                     Начало тестового периода: 2023-08-13 13:40:00. Конец тестового периода: 2023-08-16 10:30:00 
 


In [11]:
X_train, X_val, X_test, y_train, y_val, y_test = train_valid_test_split(data.df, 
                                                                        start_period=dt.datetime(2023, 7, 1), 
                                                                        train_size=1000, 
                                                                        val_size=500, 
                                                                        test_size=500, 
                                                                        numeric=num, cat=cat, target=target, silenced=False)

Начало тренировочного периода: 2023-07-01 07:00:00. Конец тренировочного периода: 2023-07-14 14:30:00 
                     Начало валидационного периода: 2023-07-14 14:40:00. Конец валидационного периода: 2023-07-21 11:30:00 
                     Начало тестового периода: 2023-07-21 11:40:00. Конец тестового периода: 2023-07-28 08:30:00 
 


In [12]:
args = args = {"iterations" : 3000, 
        "depth" : 5, 
        "learning_rate" : 0.01, 
        "use_best_model" : True, 
        "l2_leaf_reg" : 3,
        "loss_function" : 'Logloss', 
        "eval_metric" : 'Logloss', 
        "cat_features" : cat, 
        "random_state" : 42,
        "early_stopping_rounds" : 1000}

model = CatboostFinModel(args = args)

model.set_datasets(X_train=X_train, X_val=X_val, y_train=y_train, y_val=y_val)
model.set_features(numeric_features=num, cat_features=cat)

model.fit()

0:	learn: 0.6921667	test: 0.6923157	best: 0.6923157 (0)	total: 11.1ms	remaining: 33.4s
1:	learn: 0.6910118	test: 0.6918945	best: 0.6918945 (1)	total: 18.1ms	remaining: 27.1s
2:	learn: 0.6899846	test: 0.6912008	best: 0.6912008 (2)	total: 25.2ms	remaining: 25.2s
3:	learn: 0.6890973	test: 0.6905404	best: 0.6905404 (3)	total: 30ms	remaining: 22.5s
4:	learn: 0.6880961	test: 0.6899959	best: 0.6899959 (4)	total: 34.6ms	remaining: 20.7s
5:	learn: 0.6871700	test: 0.6895506	best: 0.6895506 (5)	total: 42.2ms	remaining: 21s
6:	learn: 0.6862305	test: 0.6890767	best: 0.6890767 (6)	total: 49.1ms	remaining: 21s
7:	learn: 0.6853893	test: 0.6883621	best: 0.6883621 (7)	total: 54.9ms	remaining: 20.5s
8:	learn: 0.6845373	test: 0.6877163	best: 0.6877163 (8)	total: 61.6ms	remaining: 20.5s
9:	learn: 0.6836791	test: 0.6872440	best: 0.6872440 (9)	total: 68.1ms	remaining: 20.4s
10:	learn: 0.6830464	test: 0.6868485	best: 0.6868485 (10)	total: 76.1ms	remaining: 20.7s
11:	learn: 0.6824825	test: 0.6864936	best: 0.68

<model.model.CatboostFinModel at 0x1fe6b477d70>

In [13]:
print(model.score(X_test, y_test))

              precision    recall  f1-score   support

           0       0.59      0.70      0.64       247
           1       0.64      0.53      0.58       253

    accuracy                           0.61       500
   macro avg       0.61      0.61      0.61       500
weighted avg       0.61      0.61      0.61       500



In [18]:
def mama_train_valid_test_split(data, start_period : dt.datetime, train_size, val_size, test_size, numeric, cat, target, silenced = True):
        cutted_df = data[data["utc"] >= start_period]
        train_df = cutted_df[:train_size]
        val_df = cutted_df[train_size : train_size+val_size]
        test_df = cutted_df[train_size+val_size : train_size+val_size+test_size]

        if not silenced:
            train_sd, val_sd, test_sd = train_df["utc"].iloc[0], val_df["utc"].iloc[0], test_df["utc"].iloc[0]
            train_ed, val_ed, test_ed = train_df["utc"].iloc[-1], val_df["utc"].iloc[-1], test_df["utc"].iloc[-1]
            print(f"Начало тренировочного периода: {train_sd}. Конец тренировочного периода: {train_ed} \n \
                    Начало валидационного периода: {val_sd}. Конец валидационного периода: {val_ed} \n \
                    Начало тестового периода: {test_sd}. Конец тестового периода: {test_ed} \n ")
        
        X_train, y_train = train_df[numeric + cat], train_df[target]
        X_val, y_val = val_df[numeric + cat], val_df[target]
        X_test, y_test = test_df[numeric + cat], test_df[target]

        return X_train, X_val, y_train, y_val, test_df

In [19]:
X_train, X_val, y_train, y_val, test_df = mama_train_valid_test_split(data.df, 
                                                                        start_period=dt.datetime(2023, 7, 1), 
                                                                        train_size=1000, 
                                                                        val_size=500, 
                                                                        test_size=500, 
                                                                        numeric=num, cat=cat, target=target, silenced=False)

Начало тренировочного периода: 2023-07-01 07:00:00. Конец тренировочного периода: 2023-07-14 14:30:00 
                     Начало валидационного периода: 2023-07-14 14:40:00. Конец валидационного периода: 2023-07-21 11:30:00 
                     Начало тестового периода: 2023-07-21 11:40:00. Конец тестового периода: 2023-07-28 08:30:00 
 


In [61]:
res = model.test_trading(df = data.df, start_date=dt.datetime(2022, 4, 1), end_date=dt.datetime(2022, 7, 25), train_size=1000, val_size=500, num=num, cat=cat)

3493
0.512167191525909
0:	learn: 0.6927391	test: 0.6931736	best: 0.6931736 (0)	total: 6.58ms	remaining: 19.7s
1:	learn: 0.6921326	test: 0.6930285	best: 0.6930285 (1)	total: 15.6ms	remaining: 23.4s
2:	learn: 0.6916335	test: 0.6929671	best: 0.6929671 (2)	total: 28.5ms	remaining: 28.5s
3:	learn: 0.6910770	test: 0.6927462	best: 0.6927462 (3)	total: 35.6ms	remaining: 26.6s
4:	learn: 0.6907719	test: 0.6925701	best: 0.6925701 (4)	total: 44.4ms	remaining: 26.6s
5:	learn: 0.6904504	test: 0.6925890	best: 0.6925701 (4)	total: 51.6ms	remaining: 25.7s
6:	learn: 0.6898221	test: 0.6924885	best: 0.6924885 (6)	total: 59.5ms	remaining: 25.4s
7:	learn: 0.6894789	test: 0.6922210	best: 0.6922210 (7)	total: 64.9ms	remaining: 24.3s
8:	learn: 0.6889218	test: 0.6922977	best: 0.6922210 (7)	total: 71.3ms	remaining: 23.7s
9:	learn: 0.6883353	test: 0.6921236	best: 0.6921236 (9)	total: 81.5ms	remaining: 24.4s
10:	learn: 0.6878024	test: 0.6920339	best: 0.6920339 (10)	total: 89.2ms	remaining: 24.2s
11:	learn: 0.68740

In [62]:
res

Unnamed: 0,datetime,budget
0,2022-05-17 09:40:00,10000.0
1,2022-05-17 09:50:00,10000.0
2,2022-05-17 10:00:00,10007.0
3,2022-05-17 10:10:00,10025.0
4,2022-05-17 10:20:00,10085.0
...,...,...
3488,2022-07-24 15:10:00,16928.6
3489,2022-07-24 15:20:00,16872.8
3490,2022-07-24 15:30:00,16890.8
3491,2022-07-24 15:40:00,16898.0


In [33]:
model = CatboostFinModel(args=args)
model.set_datasets(X_train=X_train, X_val=X_val, y_train=y_train, y_val=y_val)
model.set_features(numeric_features=num, cat_features=cat)
model.fit()

0:	learn: 0.6921667	test: 0.6923157	best: 0.6923157 (0)	total: 14.6ms	remaining: 43.8s
1:	learn: 0.6910118	test: 0.6918945	best: 0.6918945 (1)	total: 22.5ms	remaining: 33.7s
2:	learn: 0.6899846	test: 0.6912008	best: 0.6912008 (2)	total: 36.1ms	remaining: 36s
3:	learn: 0.6890973	test: 0.6905404	best: 0.6905404 (3)	total: 49.9ms	remaining: 37.4s
4:	learn: 0.6880961	test: 0.6899959	best: 0.6899959 (4)	total: 60.8ms	remaining: 36.4s
5:	learn: 0.6871700	test: 0.6895506	best: 0.6895506 (5)	total: 73.5ms	remaining: 36.7s
6:	learn: 0.6862305	test: 0.6890767	best: 0.6890767 (6)	total: 82.5ms	remaining: 35.3s
7:	learn: 0.6853893	test: 0.6883621	best: 0.6883621 (7)	total: 98.7ms	remaining: 36.9s
8:	learn: 0.6845373	test: 0.6877163	best: 0.6877163 (8)	total: 112ms	remaining: 37.2s
9:	learn: 0.6836791	test: 0.6872440	best: 0.6872440 (9)	total: 121ms	remaining: 36.1s
10:	learn: 0.6830464	test: 0.6868485	best: 0.6868485 (10)	total: 131ms	remaining: 35.5s
11:	learn: 0.6824825	test: 0.6864936	best: 0.6

<model.model.CatboostFinModel at 0x1fe0b8ada00>

In [34]:
import backtrader as bt

class MLStrategy(bt.Strategy):
    def __init__(self, model, num):
        self.model = model

    def next(self):
        # Формируем входные данные на основе текущих данных
        data = self.data

        # Делаем предсказание
        prediction = self.model.predict(X_test)[0]

        # Торговая логика
        if prediction == 1:
            self.buy()
        else:
            self.sell()

# def testing_one_period(X_train, X_val, y_train, y_val, test_df, num, cat, args):
#     # Создаем и обучаем модель
#     model = CatboostFinModel(args=args)
#     model.set_datasets(X_train=X_train, X_val=X_val, y_train=y_train, y_val=y_val)
#     model.set_features(numeric_features=num, cat_features=cat)
#     model.fit()

# Инициализируем Backtrader
cerebro = bt.Cerebro()

# Загружаем тестовые данные
test_data = bt.feeds.PandasData(dataname=test_df, datetime=0)
cerebro.adddata(test_data)

# Добавляем стратегию с моделью и списком колонок
cerebro.addstrategy(MLStrategy, model=model, num=num)

# Запускаем бэктест
cerebro.run()


AttributeError: 'Lines_LineSeries_DataSeries_OHLC_OHLCDateTime_AbstractDataBase_DataBase_PandasData' object has no attribute 'close_norms_2'

In [26]:
test_df

Unnamed: 0,utc,open,close,high,low,volume,direction_binary,close_norms_2,close_high_norms_2,high_norms_2,...,close_normed_hl_diff_18,stochastic_oscillator,close_normed_stochastic_oscillator,stochastic_oscillator_ma_3,close_normed_stochastic_oscillator_ma_3,stochastic_oscillator_ma_6,close_normed_stochastic_oscillator_ma_6,stochastic_oscillator_ma_18,close_normed_stochastic_oscillator_ma_18,target_predict
97014,2023-07-21 11:40:00,2535.4,2533.8,2537.6,2532.0,16290,0,1.004998,1.004998,1.006106,...,575.863636,2.545455,995.421429,2.306263,1098.660652,1.073265,2360.833047,2.151486,1177.697813,2391.898512
97015,2023-07-21 11:50:00,2532.2,2533.4,2535.8,2531.8,5726,0,0.999369,0.999369,0.999763,...,145.597701,1.482759,1708.572093,2.787182,908.946688,1.320392,1918.673135,2.013861,1257.981449,2410.295097
97016,2023-07-21 12:00:00,2534.0,2526.4,2534.6,2526.4,7236,1,0.997079,0.997079,0.998818,...,350.888889,1.472222,1716.045283,1.833478,1377.927285,1.606412,1572.696990,1.936701,1304.486555,2407.658387
97017,2023-07-21 12:10:00,2527.0,2528.4,2532.2,2526.2,6830,0,0.998026,0.998026,0.998580,...,407.806452,1.580645,1599.600000,1.511875,1672.360109,1.909069,1324.415213,1.691181,1495.049936,2408.981012
97018,2023-07-21 12:20:00,2528.0,2519.4,2529.0,2516.0,31435,1,0.997229,0.997229,0.997791,...,572.590909,-0.590909,-4263.600000,0.820653,3069.995143,1.803917,1396.627090,1.341686,1877.786579,2347.343998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97509,2023-07-28 07:50:00,2635.6,2635.0,2636.2,2630.2,10578,0,0.998182,0.998182,0.998561,...,941.071429,3.500000,752.857143,2.741176,961.266094,1.769073,1489.480578,,,2401.735597
97510,2023-07-28 08:00:00,2635.8,2632.6,2637.0,2632.0,5572,0,0.998862,0.998862,0.998864,...,626.809524,2.714286,969.905263,2.871429,916.825871,2.188121,1203.132897,0.300723,8754.231204,2406.972593
97511,2023-07-28 08:10:00,2632.0,2621.4,2633.0,2618.2,32802,1,0.994839,0.994839,0.998786,...,1092.250000,0.333333,7864.200000,2.182540,1201.077818,2.143676,1222.852560,0.380244,6893.997119,2348.332672
97512,2023-07-28 08:20:00,2621.4,2623.6,2626.8,2618.6,12273,1,0.996581,0.996581,0.996132,...,409.937500,0.468750,5597.013333,1.172123,2238.331612,1.956650,1340.863386,0.495891,5290.676126,2390.856595


In [23]:
class MLStrategy(bt.Strategy):
    def next(self):
        global num 
        global model
        X_test = self.data[num].iloc[0]
        prediction = model.predict(X_test)
        if prediction == 1:
            self.buy()
        else:
            self.sell()

def testing_one_period(X_train, X_val, y_train, y_val, test_df, num, cat, args):
    model = CatboostFinModel(args = args)
    model.set_datasets(X_train=X_train, X_val=X_val, y_train=y_train, y_val=y_val)
    model.set_features(numeric_features=num, cat_features=cat)
    model.fit()
    cerebro = bt.Cerebro()
    test_data = bt.feeds.PandasData(dataname=test_df)
    cerebro.adddata(test_data)
    cerebro.addstrategy(MLStrategy)
    cerebro.run()
        
testing_one_period(X_train, X_val, y_train, y_val, test_df, num, cat, args)

0:	learn: 0.6921667	test: 0.6923157	best: 0.6923157 (0)	total: 9.92ms	remaining: 29.7s
1:	learn: 0.6910118	test: 0.6918945	best: 0.6918945 (1)	total: 31ms	remaining: 46.5s
2:	learn: 0.6899846	test: 0.6912008	best: 0.6912008 (2)	total: 40.4ms	remaining: 40.3s
3:	learn: 0.6890973	test: 0.6905404	best: 0.6905404 (3)	total: 50.2ms	remaining: 37.6s
4:	learn: 0.6880961	test: 0.6899959	best: 0.6899959 (4)	total: 60.1ms	remaining: 36s
5:	learn: 0.6871700	test: 0.6895506	best: 0.6895506 (5)	total: 69.1ms	remaining: 34.5s
6:	learn: 0.6862305	test: 0.6890767	best: 0.6890767 (6)	total: 83.2ms	remaining: 35.6s
7:	learn: 0.6853893	test: 0.6883621	best: 0.6883621 (7)	total: 99.6ms	remaining: 37.2s
8:	learn: 0.6845373	test: 0.6877163	best: 0.6877163 (8)	total: 114ms	remaining: 37.8s
9:	learn: 0.6836791	test: 0.6872440	best: 0.6872440 (9)	total: 124ms	remaining: 37.2s
10:	learn: 0.6830464	test: 0.6868485	best: 0.6868485 (10)	total: 131ms	remaining: 35.6s
11:	learn: 0.6824825	test: 0.6864936	best: 0.686

  '''Adds a callback to get messages which would be handled by the
  '''Adds a callback to get messages which would be handled by the


AttributeError: 'numpy.int64' object has no attribute 'to_pydatetime'

In [None]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier

# Загружаем основной датасет
df = pd.read_csv('data.csv', parse_dates=['utc'])

# Разделяем train и test по дате
train_df = df[df['utc'] < '2023-01-01']
test_df = df[df['utc'] >= '2023-01-01']

# Обучаем модель на train_df
X_train = train_df[['open', 'high', 'low', 'close', 'volume']]
y_train = (train_df['close'].shift(-1) > train_df['close']).astype(int)  # Целевая переменная – рост цены

model = GradientBoostingClassifier()
model.fit(X_train, y_train)

# Переобучаем на новых данных каждый день
def daily_update(new_data_path):
    new_data = pd.read_csv(new_data_path, parse_dates=['utc'])
    global train_df, model

    # Добавляем новые данные
    train_df = pd.concat([train_df, new_data])
    X_train = train_df[['open', 'high', 'low', 'close', 'volume']]
    y_train = (train_df['close'].shift(-1) > train_df['close']).astype(int)

    # Обучаем новую модель
    model.fit(X_train, y_train)

# Использование новой модели в Backtrader
class MLStrategy(bt.Strategy):
    def next(self):
        features = [self.data.open[0], self.data.high[0], self.data.low[0], self.data.close[0], self.data.volume[0]]
        prediction = model.predict([features])[0]
        if prediction == 1:
            self.buy()
        else:
            self.sell()

# Тестируем стратегию
cerebro = bt.Cerebro()
test_data = bt.feeds.PandasData(dataname=test_df)
cerebro.adddata(test_data)
cerebro.addstrategy(MLStrategy)
cerebro.run()


In [None]:
import backtrader as bt
import datetime

class MyStrategy(bt.Strategy):
    def next(self):
        # Здесь можно использовать предсказания градиентного бустинга
        if self.data.close[0] > self.data.close[-1]:
            self.buy()
        else:
            self.sell()

# Создаем движок Backtrader
cerebro = bt.Cerebro()

# Загружаем CSV-файл
data = bt.feeds.GenericCSVData(
    dataname='data.csv',  # Укажите путь к вашему CSV-файлу
    dtformat='%Y-%m-%dT%H:%M:%S.%f%z'
    timeframe=bt.TimeFrame.Minutes,  # Указываем, что у нас минутные свечи
    compression=10,  # Если у вас 10-минутные свечи
    openinterest=-1  # Если столбца open interest нет, указываем -1
)

cerebro.adddata(data)
cerebro.addstrategy(MyStrategy)
cerebro.run()
cerebro.plot()
