# Итоговый проект по курсу "ML для финансового анализа"

*Автор - Карданов М. Т.*

## Задача проекта:

Разработать торгового робота, представляющего из себя ансамбль моделей технического анализа и машинного обучения, анализирующего данные о стоимости акций компании Google за последний год и формулирующего торговые действия.

## План проекта:

1. Загрузка данных;
1. Feature engineering;
1. Построение модели технического анализа;
1. Построение модели машинного обучения;
1. Проверка качества ансамбля на тестовой выборке;
1. Составление презентации, защита.

## 0: Импорты

In [500]:
import pandas as pd
import sklearn
import numpy as np
import yfinance
import os
import warnings
from backtesting import Strategy, Backtest
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
import itertools
import talib

In [501]:
RANDOM_STATE = 12345

## 1: Загрузка данных

In [502]:
def download_data() -> pd.DataFrame:

    data = yfinance.download(tickers="GOOG", 
                             period="5y", 
                             interval="1d", 
                             multi_level_index=False)
    data.index = pd.to_datetime(data.index)

    return data

## 2: Feature-engineering

In [503]:
def add_date_features(data: pd.DataFrame) -> pd.DataFrame:

    data["Date_time"] = data.index

    data['Year']=data['Date_time'].dt.strftime('%Y')
    data['Month']=data['Date_time'].dt.strftime('%m')
    data['Day']=data['Date_time'].dt.strftime('%d')

    return data

В качестве признаков технического анализа я решил выбрать 5-ти, 10-ти и 20-барные MA и EMA, в основном из-за того, что они просты в реализации и, одновременно с этим, несут в себе много информации о более долгосрочном поведении цены.

In [504]:
def add_price_features(data: pd.DataFrame) -> pd.DataFrame:

    data["5_day_MA"] = data["Close"].rolling(window=5, min_periods=1).mean()
    data["10_day_MA"] = data["Close"].rolling(window=10, min_periods=1).mean()
    data["20_day_MA"] = data["Close"].rolling(window=20, min_periods=1).mean()

    data["5_day_EMA"] = data["Close"].ewm(span=5, min_periods=1).mean()
    data["10_day_EMA"] = data["Close"].ewm(span=10, min_periods=1).mean()
    data["20_day_EMA"] = data["Close"].ewm(span=20, min_periods=1).mean()

    return data

In [505]:
def add_target(data: pd.DataFrame) -> pd.DataFrame:
    
    data["Target"] = pd.Series(0)
    data["Close_in_3_days"] = data["Close"].shift(-3)

    data.loc[(data["Close_in_3_days"] - data["Close"]) >= 3, "Target"] = 1
    data.loc[(data["Close_in_3_days"] - data["Close"]) <= -3, "Target"] = -1
    data.loc[(-3 < (data["Close_in_3_days"] - data["Close"])) & ((data["Close_in_3_days"] - data["Close"]) < 3), "Target"] = 0

    data = data.dropna(subset=["Target"])

    data = data.drop("Close_in_3_days", axis=1)

    return data

In [506]:
def save_data(data: pd.DataFrame, filename: str = "data/processed_data.csv") -> None:

    if os.path.exists(filename):
        data.to_csv(filename, mode='a', header=False, index=False)
    else:
        data.to_csv(filename, mode='w', header=True, index=False)

In [507]:
def load_data():
    data = download_data()
    data = add_date_features(data)
    data = add_price_features(data)
    data = add_target(data)
    save_data(data)

## 3: Модель технического анализа

В качестве модели технического анализа я выбрал стохастический осцилятор, поскольку, **ДОБАВИТЬ**

In [508]:
class StochOscilatorStrategy(Strategy):

    def init(self):
        self.signal = self.I(lambda: self.data.Signal)
        self.previous_signal = 0
        self.size = 0.1

    def next(self):
        current_signal = self.signal[-1]

        if current_signal != self.previous_signal:
            if current_signal == 1:
                if self.position.is_short:
                    self.position.close()
                    
                if not self.position.is_long:
                    self.buy(size=self.size)
                    
            elif current_signal == -1:
                if self.position.is_long:
                    self.position.close()
                   
                if not self.position.is_short:
                    self.sell(size=self.size)
                    
            elif current_signal == 0:
                if self.position:
                    self.position.close()

        self.previous_signal = current_signal  

In [509]:
def prepare_data_for_backtest():

    data = pd.read_csv("data/processed_data.csv")

    return data

In [510]:
def apply_strategy(data: pd.DataFrame, params: dict) -> pd.DataFrame:
    
    df = data.copy()

    fastk_period = params["fastk_period"]
    slowk_period = params["slowk_period"] 
    slowd_period = params["slowd_period"]

    df["Slowk"], df["Slowd"] = talib.STOCH(df["High"], df["Low"], df["Close"],
                                                        fastk_period=fastk_period, slowk_period=slowk_period, 
                                                        slowd_period=slowd_period)
    df["Signal"] = 0

    df.loc[df['Slowk'] > df['Slowd'], 'Signal'] = 1
    df.loc[df['Slowk'] < df['Slowd'], 'Signal'] = -1

    return df[["Open", "High", "Low", "Close", "Volume", "Signal"]]

In [511]:
def backtest_strategy(data, params, strategy_class=StochOscilatorStrategy, plot=False):
    
    data = apply_strategy(data, params)
    
    bt_data = data.copy()

    bt = Backtest(bt_data, strategy_class, cash=500000, commission=.002, exclusive_orders=True, margin=0.1)

    stats = bt.run()
    if plot:
        bt.plot(
        plot_equity=True,
        plot_drawdown=True,
        relative_equity=False,
        )

    return stats

In [512]:
def get_best_strategy(data):
    
    fastk_period = [2, 3, 5, 10, 20, 30, 40]
    slowk_period = [2, 3, 5, 10, 20, 30, 40]
    slowd_period = [3, 6, 9, 12, 18, 24, 36]

    best_params = None
    best_performance = -float('inf') 

    for fastk_period_curr, slowk_period_curr, slowd_period_curr in itertools.product(fastk_period, slowk_period, slowd_period):
        
        params = {
            "fastk_period": fastk_period_curr, 
            "slowk_period": slowk_period_curr, 
            "slowd_period": slowd_period_curr
        }
        
        stats = backtest_strategy(data.copy(), params)

        performance = stats['Return [%]']
        
        if performance > best_performance:
            best_performance = performance
            best_params = params

    print(f"Best Performance: {best_performance}")
    print(f"Best Parameters: {best_params}")

    return best_params

In [513]:
def run_optimization(data, train_size, test_size):

    signals = pd.Series()

    num_iterations = (len(data) - train_size) // test_size

    for i in range(num_iterations + 1):
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            start_train = i * test_size
            end_train = start_train + train_size
            start_test = end_train
            end_test = start_test + test_size
    
            if end_test > len(data):
                end_test = len(data)
    
            train_data = data.iloc[start_train:end_train].copy()
            test_data = data.iloc[start_test:end_test].copy()
    
            best_params = get_best_strategy(train_data)
    
            combined_data = pd.concat([train_data, test_data]).reset_index(drop=True)
            combined_with_signal = apply_strategy(combined_data.copy(), best_params)
            test_with_signal = combined_with_signal.iloc[-test_size:].copy()

            signals = pd.concat([signals, test_with_signal], ignore_index=True)

        return best_params

In [514]:
def optimize_strategy():
    load_data()
    data = prepare_data_for_backtest()
    best_params = run_optimization(data=data, train_size=200, test_size=50)
    signals_osc = apply_strategy(data=data, params=best_params)
    return signals_osc, best_params

In [515]:
def backtest_best_model(data_with_signal, params):
    
    stats = backtest_strategy(data_with_signal, params)
    print(stats[:27])

    return stats

In [516]:
with warnings.catch_warnings():
    warnings.simplefilter(action="ignore")
    preds_osc, best_params = optimize_strategy()
    backtest_best_model(data_with_signal=preds_osc, params=best_params)

[*********************100%***********************]  1 of 1 completed


Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Backtest.run:   0%|          | 0/199 [00:00<?, ?bar/s]

Best Performance: 46.123430642921456
Best Parameters: {'fastk_period': 5, 'slowk_period': 20, 'slowd_period': 12}


Backtest.run:   0%|          | 0/3758 [00:00<?, ?bar/s]

Start                               0.0
End                              3758.0
Duration                         3758.0
Exposure Time [%]             98.935887
Equity Final [$]           35889.232266
Equity Peak [$]           826527.901108
Commissions [$]           335489.724154
Return [%]                   -92.822154
Buy & Hold Return [%]        141.922149
Return (Ann.) [%]                   0.0
Volatility (Ann.) [%]               NaN
Sharpe Ratio                        NaN
Sortino Ratio                       NaN
Calmar Ratio                        0.0
Alpha [%]                    -65.744637
Beta                          -0.190791
Max. Drawdown [%]            -95.898477
Avg. Drawdown [%]             -6.508115
Max. Drawdown Duration           3505.0
Avg. Drawdown Duration       154.333333
# Trades                          325.0
Win Rate [%]                  36.307692
Best Trade [%]                57.433036
Worst Trade [%]              -11.280413
Avg. Trade [%]                 -0.41887


In [517]:
preds_osc = preds_osc["Signal"]
preds_osc

0       0
1       0
2       0
3       0
4       0
       ..
3754   -1
3755   -1
3756   -1
3757   -1
3758   -1
Name: Signal, Length: 3759, dtype: int64

## 4: Модель машинного обучения

Мне кажется, что мне, как начинающему кванту, можно для начала построить систему с не-нейросетевой моделью, чтобы на ограниченных ресурсах, имеющихся в моём распоряжении, добиться, насколько это возможно, быстрой выдачи качественных предсказаний. Поэтому я остановился на модели градиентного бустинга из библиотеки sklearn.

In [518]:
def prepare_data_for_training(test_days: int=500) -> list:

    data = pd.read_csv("data/processed_data.csv")
    data["Date_time"] = pd.to_datetime(data["Date_time"])

    test_start_date = data["Date_time"].max() - pd.DateOffset(days=test_days)

    train_data = data[data["Date_time"] < test_start_date]
    test_data = data[data["Date_time"] >= test_start_date]

    train_data = train_data.drop("Date_time", axis=1)
    test_data = test_data.drop("Date_time", axis=1)

    y_train = train_data["Target"]
    y_test = test_data["Target"]
    X_train = train_data.drop("Target", axis=1)
    X_test = test_data.drop("Target", axis=1)

    return [X_train, y_train, X_test, y_test]

In [519]:
def train_model(X_train: pd.DataFrame, y_train: pd.Series) -> GradientBoostingClassifier:

    model = GradientBoostingClassifier(random_state=RANDOM_STATE, max_depth=10)
    cv = GridSearchCV(estimator=model, param_grid={
                                       "n_estimators" : [5, 20, 50, 75, 100],
                                       "learning_rate" : [0.0001, 0.001, 0.01, 0.05, 0.08, 0.1, 0.2, 0.5, 1]
                                       }, verbose=2, n_jobs=-1, cv=5)

    cv.fit(X_train, y_train)
    best_model = cv.best_estimator_

    return best_model

In [520]:
def test_model(X_test: pd.DataFrame, y_test: pd.Series, model: GradientBoostingClassifier) -> GradientBoostingClassifier:

    preds = model.predict(X_test)

    test_acc = sklearn.metrics.accuracy_score(y_test, preds)
    test_prec = sklearn.metrics.precision_score(y_test, preds, average="weighted")
    test_rec = sklearn.metrics.recall_score(y_test, preds, average="weighted")

    print(f"Точность модели на тестовой выборке составила {test_acc}")
    print(f"Precision модели на тестовой выборке составила {test_prec}")
    print(f"Recall модели на тестовой выборке составил {test_rec}")

    return model, preds, y_test

In [521]:
def train_evaluate() -> GradientBoostingClassifier:
    data_list = prepare_data_for_training()
    trained_model = train_model(X_train=data_list[0], y_train=data_list[1])
    tested_model, preds_model, y_test = test_model(X_test=data_list[2], y_test=data_list[3], model=trained_model)
    return tested_model, preds_model, y_test

In [522]:
model_boosting, preds_boosting, y_test = train_evaluate()

Fitting 5 folds for each of 45 candidates, totalling 225 fits
Точность модели на тестовой выборке составила 0.2807017543859649
Precision модели на тестовой выборке составила 0.3261706887437882
Recall модели на тестовой выборке составил 0.2807017543859649


In [523]:
preds_boosting = pd.Series(preds_boosting)
preds_boosting

0      -1.0
1       0.0
2       0.0
3       0.0
4       1.0
       ... 
1021   -1.0
1022   -1.0
1023   -1.0
1024   -1.0
1025   -1.0
Length: 1026, dtype: float64

## 5: Проверка качества работы ансамбля на тестовой выборке

In [None]:
def generate_ensemble_predictions(preds_boosting, preds_osc):
    
    preds_osc = pd.Series(preds_osc.iloc[(preds_osc.shape[0] - preds_boosting.shape[0]):]).reset_index(drop=True)
    preds_ensemble = round(preds_boosting*0.5 + preds_osc*0.5)

    return preds_ensemble

In [563]:
def calculate_metrics(preds_ensemble, y_test):

    test_acc = sklearn.metrics.accuracy_score(y_test, preds_ensemble)
    test_prec = sklearn.metrics.precision_score(y_test, preds_ensemble, average="weighted")
    test_rec = sklearn.metrics.recall_score(y_test, preds_ensemble, average="weighted")

    print(f"Точность ансамбля на тестовой выборке составила {test_acc}")
    print(f"Precision ансамбля на тестовой выборке составила {test_prec}")
    print(f"Recall ансамбля на тестовой выборке составил {test_rec}")

In [564]:
def evaluate_ensemble(preds_boosting, preds_osc, y_test):
    preds_ensemble = generate_ensemble_predictions(preds_boosting=preds_boosting, preds_osc=preds_osc)
    calculate_metrics(preds_ensemble=preds_ensemble, y_test=y_test)

In [569]:
evaluate_ensemble(preds_boosting, preds_osc, y_test)

Точность ансамбля на тестовой выборке составила 0.2807017543859649
Precision ансамбля на тестовой выборке составила 0.3261706887437882
Recall ансамбля на тестовой выборке составил 0.2807017543859649
