In [30]:
from tester_trade_log.data_iterator import DataIterator

from ta.momentum import RSIIndicator
from tqdm import tqdm

import datetime
import pandas as pd

TRAIN_LENGTH = 199
TEST_LENGTH = 50


# всего 249 дней
# train = 80% = 199
# test = 20% = 50
def get_dfs(ticker, length, alpha, predict_period=10, period=datetime.timedelta(minutes=1), directory="analysis/data/tickers_trade_log"):
    data_iterator = DataIterator(directory, ticker, period)
    dfs = []
    for i, (day, (time, close, high, low, volume)) in tqdm(enumerate(data_iterator), total=TRAIN_LENGTH + TEST_LENGTH):
        price = pd.Series(close)
        df = pd.DataFrame({"price_change": price.diff(periods=predict_period).shift(-predict_period)})
        df[f"rsi ({length} length)"] = RSIIndicator(price, length).rsi()
        df = df[df.price_change.abs() > alpha * df.price_change.std()]
        df = df.dropna()
        dfs.append(df)
    return dfs


def get_X_y(df, is_train):
    df = df.copy()

    def get_class(change):
        if change > 0:
            return 1
        if change < 0:
            return -1
        return 0

    df.price_change = df.price_change.apply(get_class)
    counts = df.price_change.value_counts()
    total_count = sum(counts)
    if is_train:
        for i, c in zip(counts.index, counts):
            print(f"{i}:\t{c}\t({c / total_count * 100:.1f}%)")
    df = df[df.price_change != 0]
    X = df.drop(columns=["price_change"])
    y = df.price_change
    return X, y

In [31]:
import sklearn.tree as tree
import matplotlib.pyplot as plt
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split

In [32]:
print("Read data")
ticker = "SBER"
alpha = 0.01
length = 14
period = 14
dfs = get_dfs(ticker, length, alpha, directory="../../analysis/data/tickers_trade_log", period=datetime.timedelta(minutes=period))
train, test = train_test_split(dfs, train_size=TRAIN_LENGTH, shuffle=False)
train_df = pd.concat(train, ignore_index=True)
test_df = pd.concat(test, ignore_index=True)

Read data


KeyboardInterrupt: 

In [None]:
X_train, y_train = get_X_y(train_df, True)
X_test, y_test = get_X_y(test_df, False)

In [None]:
model = tree.DecisionTreeClassifier(max_depth=2, class_weight="balanced")
print("Train model")
model.fit(X_train, y_train)
print("Test model")
score = model.score(X_test, y_test)
print(f"Score = {score}")
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test, normalize="pred", cmap="CMRmap")
plt.show()

In [None]:
text = tree.export_text(model, feature_names=[f"rsi ({length} length)"])
print(text)