In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import pandas as pd
import numpy as np
import torch.nn as nn
import torch as th
from torch.nn import functional as F
from trade_tester.env import TradingEnv2Actions
import talib
import os

In [None]:
klines = pd.read_csv('klines/DOGEUSDT_15m.csv')[['open_time', 'open', 'close', 'high', 'low', 'vol', 'trades']]
klines = klines.rename({'open_time': 'date'}, axis=1)
klines['date'] = pd.to_datetime(klines['date'], unit='ms')
klines['bb_upper'], klines['bb_middle'], klines['bb_lower'] = talib.BBANDS(klines['close'], 200, 2, 2)
klines_train = klines.iloc[len(klines)-50000: len(klines)-10000]
klines_validate = klines.iloc[len(klines)-10000:]

In [None]:
class SaveBestModelCallback(BaseCallback):
    def __init__(self, save_path: str, verbose: int = 0):
        super(SaveBestModelCallback, self).__init__(verbose)
        self.save_path = save_path
        self.best_mean_reward = -np.inf

    def _on_step(self) -> bool:
        if self.n_calls % 1000 == 0:
            mean_reward = np.mean(self.locals["rollout_buffer"].rewards)
            if mean_reward > self.best_mean_reward:
                self.best_mean_reward = mean_reward
                self.model.save(os.path.join(self.save_path, "best_model"))
        return True

In [None]:
class CustomPolicy(BaseFeaturesExtractor):
    def __init__(self, observation_space, features_dim) -> None:
        super().__init__(observation_space, features_dim)
        channels = observation_space.shape[1]
        kernel_conv = 8
        pooling_kernel = 2
        conv1_out = 32

        len_conv_out = int(conv1_out * ((observation_space.shape[0] - kernel_conv) / 1 + 1))
        pooling_out = int((len_conv_out - pooling_kernel) / pooling_kernel + 1)

        self.nn = nn.Sequential(
            nn.Conv1d(in_channels=channels, out_channels=8, kernel_size=kernel_conv),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Conv1d(in_channels=8, out_channels=16, kernel_size=kernel_conv),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Conv1d(in_channels=16, out_channels=conv1_out, kernel_size=kernel_conv),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.MaxPool1d(kernel_size=pooling_kernel),
            nn.Flatten(),
            nn.Linear(pooling_out - 64, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.Dropout(p=0.2),
            nn.ReLU(),
            nn.Linear(64, features_dim),
        )

    def forward(self, x: th.Tensor) -> th.Tensor:
        x = x.transpose(1, 2)
        return self.nn(x) 

In [None]:
# make the env
indicators = [
    dict(name='bb_upper', color='yellow'),
    dict(name='bb_middle', color='yellow'),
    dict(name='bb_lower', color='yellow'),
]

env_kwargs = dict(
        klines=klines_train,
        window=300,
        b_size=1000,
        tester='BBFutureTester',
        # features=['open', 'high', 'low', 'close'],
        tester_kwargs = dict(
            depo=1000,
            TP=0.5,
            SL=0.25,
            indicators = indicators,
        ),
    )
num_envs = 1
env = DummyVecEnv([lambda: Monitor(TradingEnv2Actions(**env_kwargs)) for i in range(num_envs)])
# env = make_vec_env(TradingEnv2Actions, n_envs=4, env_kwargs=env_kwargs)
# env = TradingEnv2Actions(**env_kwargs)

In [None]:
# Define the policy network architecture
policy_kwargs = dict(
    features_extractor_class=CustomPolicy,
    features_extractor_kwargs=dict(features_dim=env.action_space.n),
    optimizer_kwargs=dict(optim_cls=th.optim.Adam, optim_kwargs=dict(lr=3e-4)),
    normalize_images=False,
)
save_path = "./ppo_best"

try:
    model = PPO.load(save_path + '/best_model')
    model.set_env(env)
except:
    # Define the model
    model = PPO(
        policy="CnnPolicy",
        env=env,
        tensorboard_log='tblog',
        policy_kwargs=policy_kwargs,
        # learning_rate=0.001,
        gamma=0.8,
        verbose=1,
        batch_size=256,
        n_steps=4096,
    )
callback = SaveBestModelCallback(save_path)
model.learn(total_timesteps=int(1e5), callback=callback)


In [None]:
# make the env
indicators = [
    dict(name='bb_upper', color='yellow'),
    dict(name='bb_middle', color='yellow'),
    dict(name='bb_lower', color='yellow'),
]

env_kwargs = dict(
        klines=klines_validate,
        window=300,
        # b_size=3000,
        tester='BBFutureTester',
        # features=['open', 'high', 'low', 'close'],
        tester_kwargs = dict(
            depo=1000,
            indicators = indicators,
        ),
        risk=0.1,
    )
env = TradingEnv2Actions(**env_kwargs)

obs = env.reset()
done = False
while not done:
    action, _ = model.predict(obs)
    obs, reward, done, info = env.step(action)
    env.render()

In [None]:
obs = env.reset()

In [None]:
obs.reshape(9, 300).shape


In [None]:
import torch
import torch.nn as nn

# Создание случайных данных для временного ряда
temperature = torch.randn(168)
humidity = torch.randn(168)
pressure = torch.randn(168)

# Создание временного ряда из трех переменных
data = torch.stack([temperature, humidity, pressure], dim=1)
data.shape
# data[-6:][:, None].reshape(1, 3, 6)
plt.plot(temperature)

In [None]:
time = np.arange(0, 100, 0.1)
data_sin = np.sin(time)
data_cos = np.cos(time)
data = np.column_stack((data_sin, data_cos))
tens = torch.from_numpy(data[-6:].reshape(2, 6)).float()
tens.shape

In [None]:
16 * ((6 - 3) / 1 + 1)

In [None]:
# output_length_conv1d = (sequence_length + 2 * padding - kernel_size) // stride + 1

m = nn.Conv1d(2, 16, 3)
m(tens).numel()

In [None]:
mp = nn.MaxPool1d(kernel_size=2)
mp(m(tens)).numel()

In [None]:
a = np.arange(1, 10, 1)
b = np.arange(100, 1000, 100)
c = th.tensor(np.column_stack((a, b))[None, :], dtype=torch.float32)
c

In [None]:
c.transpose(1, 2)