In [None]:
try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:
    computed_data_dir = "/content/drive/MyDrive/colab/computed-data"

    from google.colab import drive

    drive.mount('/content/drive')
else:
    computed_data_dir = "./computed-data"

import os

try:
    os.makedirs(f"{computed_data_dir}/")
except:
    pass

In [None]:
!pip install binance_historical_data mplfinance stable-baselines3 backtesting tensorboard binance ta scikit-learn line_profiler

In [None]:
if IN_COLAB:
    !rm -rf ./python-price-predict-experiment
    !git clone https://github.com/Klok-e/python-price-predict-experiment.git

    import sys

    sys.path.insert(0, 'python-price-predict-experiment')

In [None]:
from util import download_and_process_data_if_available, create_synthetic_price_data, create_random_walk_price_data, split_tickers_train_test
import numpy as np

np.random.seed(42)

df_tickers = download_and_process_data_if_available(
    f"{computed_data_dir}/dataset", reload=False)
# df_tickers = create_synthetic_price_data()
# df_tickers = create_random_walk_price_data()
df_tickers[0][0]

In [None]:

df_tickers_train, df_tickers_test = split_tickers_train_test(
    df_tickers, 16)
del df_tickers

In [None]:
%load_ext tensorboard
%tensorboard --logdir "$computed_data_dir/tensorboard"

In [None]:
from model import (
    MLPExtractor,
    LSTMExtractor,
    SequenceCNNExtractor,
    TransformerExtractor,
)
from train_model import train_model, model_eval_dead_relu
from torch import nn

arch = [512]
window_size = 128

# hidden_sizes = [1024, 2048, 1024, 512]
# model_save_name = f"hs{hidden_sizes}_net{arch}_ws{window_size}"
# policy_kwargs = dict(
#     features_extractor_class=MLPExtractor,
#     features_extractor_kwargs=dict(hidden_sizes=hidden_sizes),
# )

hidden_size = 512
lstm_layers = 1
linear_arch = [64, 64]
linear_arch_after = [1024, 2048, 1024]
activation_fn = nn.SiLU
model_save_name = f"hs{hidden_size}_lstm{lstm_layers}_before{linear_arch}_after{linear_arch_after}_act[{activation_fn.__name__}]_net{arch}_ws{window_size}"
policy_kwargs = dict(
    features_extractor_class=LSTMExtractor,
    features_extractor_kwargs=dict(
        lstm_hidden_size=hidden_size,
        lstm_layers=lstm_layers,
        linear_arch=linear_arch,
        linear_arch_after=linear_arch_after,
    ),
)

# cnn_channels = [32, 64, 128]
# model_save_name = f"cnn{cnn_channels}_net{arch}_ws{window_size}"
# policy_kwargs = dict(
#     features_extractor_class=SequenceCNNExtractor,
#     features_extractor_kwargs=dict(cnn_channels=cnn_channels, kernel_size=5, stride=2),
# )

train_model(
    df_tickers_train,
    df_tickers_test,
    arch,
    10_000_000,
    window_size,
    16,
    computed_data_dir,
    model_save_name,
    policy_kwargs,
)

model_eval_dead_relu(
    df_tickers_test,
    arch,
    window_size,
    computed_data_dir,
    model_save_name,
    policy_kwargs,
    time_delta_days=3,
)

In [None]:
from backtest import create_backtest_model_with_data, create_buy_and_hold_strategy
from model_load_backtest import run_backtest_on_all_tickers, print_metrics
from stable_baselines3 import PPO
from util import get_name_max_timesteps
import re
import os

!rm -rf "$computed_data_dir/backtest-results/"

dirnames = next(os.walk(f"{computed_data_dir}/rl-model/"), (None, [], None))[
    1
]  # [] if no file
for i, dir in enumerate(sorted(dirnames)):
    models_dir = f"{computed_data_dir}/rl-model/{dir}/checkpoints"
    model_name = get_name_max_timesteps(models_dir)
    print(model_name)

    window_size = int(re.match(".*?ws(\\d+).*", dir).group(1))
    print(window_size)

    rl_model = PPO.load(f"{models_dir}/{model_name}")
    metrics = run_backtest_on_all_tickers(
        df_tickers_test,
        f"Model {dir}",
        window_size,
        lambda df, scaler, model_in_observations, start, end: create_backtest_model_with_data(
            rl_model, df, scaler, start, end, model_in_observations, print_actions=True
        ),
        computed_data_dir,
    )
    print_metrics(*metrics)

metrics = run_backtest_on_all_tickers(
    df_tickers_test,
    "Buy and Hold",
    64,
    lambda df, scaler, model_in_observations, start, end: create_buy_and_hold_strategy(
        df, start, end
    ),
    computed_data_dir,
)
print_metrics(*metrics)

In [None]:
import numpy as np
from util import load_pickle
import os
import matplotlib.pyplot as plt
from matplotlib import dates as mdates
from trading_metrics import calculate_metrics
import re
import pandas as pd


def plot_equity(model_name, model_equities, ax1, ax2, plot_hist):
    match = re.search("ws(\\d+)", model_name)
    in_obs = int(match.group(1)) if match else 64
    skip_steps = 1024 + in_obs

    sum_equity = None
    buy_trades = []
    sell_trades = []
    for item in model_equities:
        if sum_equity is None:
            sum_equity = item[2]["Equity"].iloc[skip_steps:].copy()
        else:
            sum_equity += item[2]["Equity"].iloc[skip_steps:]

        if len(item[1]) > 0:
            buy_trades.append(item[1]["EntryTime"])
            sell_trades.append(item[1]["ExitTime"])

    if len(buy_trades) > 0:
        buy_trades = pd.concat(buy_trades)
        sell_trades = pd.concat(sell_trades)

        combined_trades = pd.DataFrame({
            "Buy trades": buy_trades,
            "Sell trades": sell_trades
        })

    start_cash = 1_000_000
    tickers_count = 5
    trades_count = sum(map(lambda x: len(x[1]), model_equities))
    metrics = calculate_metrics(sum_equity, trades_count, start_cash * tickers_count)

    sum_equity = sum_equity.rolling(window=256).mean()[256:]

    y = (sum_equity - start_cash * tickers_count) / (start_cash * tickers_count)

    # Plot line on the primary axis
    ax1.plot(y, markevery=1024, label=model_name)

    # Plot histogram on the secondary axis
    if plot_hist and len(buy_trades) > 0:
        ax2.hist(combined_trades, bins=32, alpha=0.5, label=["Buy trades", "Sell trades"], color=['green', 'red'])

    print()
    print(f"{model_name} metrics:")
    print(f"cumulative_return={metrics[0]:.4f}, "
          f"max_earning_rate={metrics[1]:.4f}, "
          f"maximum_pullback={metrics[2]:.4f}, "
          f"average_profitability_per_trade={metrics[3]:.4f}, "
          f"sharpe_ratio={metrics[4]:.4f}")
    print()


filenames = next(os.walk(f"{computed_data_dir}/backtest-results/"), (None, None, []))[2]  # [] if no file
results = {}
for filename in filenames:
    (trades, equity_curve) = load_pickle(f"{computed_data_dir}/backtest-results/{filename}")
    parts = filename.split("_")
    model_name = "_".join(parts[:-1])  # Get all parts except the last one as the model name
    coin = parts[-1].split('.')[0]  # Get the last part as the coin, and remove file extension

    if model_name not in results:
        results[model_name] = []
    results[model_name].append((coin, trades, equity_curve, filename))

buy_hold = "Buy and Hold"
buy_hold_res = results.pop(buy_hold)

plt.figure(figsize=(10, 7))

ax1 = plt.gca()
ax2 = ax1.twinx()

for i, model in enumerate(sorted(list(results.keys()))):
    plot_equity(model, results[model], ax1, ax2, True)

    plot_equity(buy_hold, buy_hold_res, ax1, ax2, False)

    ax2.set_ylim([0, max(ax2.get_ylim()[1], ax1.get_ylim()[1])])

    ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    ax1.xaxis.set_major_locator(mdates.AutoDateLocator())
    plt.gcf().autofmt_xdate()
    ax1.legend(loc="upper left")
    ax2.legend(loc="upper right")
    plt.tight_layout()

    # plt.show()

    try:
        os.makedirs(f"{computed_data_dir}/pics")
    except:
        pass
    plt.savefig(f"{computed_data_dir}/pics/figure{i}.png")
    ax1.cla()
    ax2.cla()