In [None]:
from tensortrade.oms.instruments import Instrument
from tensortrade.env.default.actions import BSH
from tensortrade.env.default.rewards import PBR
from tensortrade.env import default
from tensortrade.env.generic import Renderer
from tensortrade.feed.core import DataFeed, Stream
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.wallets import Wallet, Portfolio

from tensortradeExtension.env.generic.components.renderer.positionChangeChart import PositionChangeChart

from tensortrade.agents import DQNAgent

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
n_steps = 1000
n_episodes = 10
render_interval = 1000
window_size = 30
memory_capacity = n_steps * 10
save_path = "models/tests/sinewave/dqn"

In [None]:
# Setup Instruments
USD = Instrument("USD", 2, "U.S. Dollar")
TTC = Instrument("TTC", 8, "TensorTrade Coin")

In [None]:
# Generate the rendere
class PositionChangeChart(Renderer):

    def __init__(self, color: str = "orange"):
        self.color = "orange"

    def render(self, env, **kwargs):
        history = pd.DataFrame(env.observer.renderer_history)

        actions = list(history.action)
        p = list(history.price)

        buy = {}
        sell = {}

        for i in range(len(actions) - 1):
            a1 = actions[i]
            a2 = actions[i + 1]

            if a1 != a2:
                if a1 == 0 and a2 == 1:
                    buy[i] = p[i]
                else:
                    sell[i] = p[i]

        buy = pd.Series(buy)
        sell = pd.Series(sell)

        fig, axs = plt.subplots(1, 2, figsize=(15, 5))

        fig.suptitle("Performance")

        axs[0].plot(np.arange(len(p)), p, label="price", color=self.color)
        axs[0].scatter(buy.index, buy.values, marker="^", color="green")
        axs[0].scatter(sell.index, sell.values, marker="^", color="red")
        axs[0].set_title("Trading Chart")

        performance_df = pd.DataFrame().from_dict(env.action_scheme.portfolio.performance, orient='index')
        performance_df.plot(ax=axs[1])
        axs[1].set_title("Net Worth")

        plt.show()

In [None]:
# Generate the data stream
def generate_data_stream():
  # generate x values over 1001
  x = np.arange(0, 2*np.pi, 2*np.pi / 1001)
  # generate y values from x values
  y = 50*np.sin(3*x) + 100
  # reset x values over 1000
  x = np.arange(0, 2*np.pi, 2*np.pi / 1000)

  price_stream = Stream.source(y, dtype="float").rename("USD-TTC")

  return [price_stream, y]

generate_data_stream()

In [None]:
# Create the environement
def create_env():
    price_stream, y = generate_data_stream()
    sinewaveex = Exchange("sine-wave", service=execute_order)(
        price_stream
    )
    cash = Wallet(sinewaveex, 1000 * USD)
    asset = Wallet(sinewaveex, 0 * TTC)

    portfolio = Portfolio(USD, [
        cash,
        asset
    ])

    feed = DataFeed([
        price_stream,
        price_stream.rolling(window=10).mean().rename("fast"),
        price_stream.rolling(window=50).mean().rename("medium"),
        price_stream.rolling(window=100).mean().rename("slow"),
        price_stream.log().diff().fillna(0).rename("lr")
    ])

    reward_scheme = PBR(price=price_stream)

    action_scheme = BSH(
        cash=cash,
        asset=asset
    ).attach(reward_scheme)

    renderer_feed = DataFeed([
        Stream.source(y, dtype="float").rename("price"),
        Stream.sensor(action_scheme, lambda s: s.action, dtype="float").rename("action")
    ])

    environment = default.create(
        feed=feed,
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        renderer_feed=renderer_feed,
        renderer=PositionChangeChart(),
        window_size=window_size,
        max_allowed_loss=0.6
    )
    return environment

# Instantiate the environment
env = create_env()

In [None]:
# calculate the batch size
def get_optimal_batch_size(window_size=30, n_steps=1000, batch_factor=4, stride=1):
    """
    lookback = 30          # Days of past data (also named window_size).
    batch_factor = 4       # batch_size = (sample_size - lookback - stride) // batch_factor
    stride = 1             # Time series shift into the future.
    """
    lookback = window_size
    sample_size = n_steps
    batch_size = ((sample_size - lookback - stride) // batch_factor)
    return batch_size

batch_size = get_optimal_batch_size()

In [None]:
#environment details
print("Action Space: "+str(env.action_space))
print("State Space: "+str(env.observation_space))
print("Next observation")
env.observer.feed.next()

In [None]:
# agent = DQNAgent(env)

# agent.train(
#   batch_size=batch_size, 
#   n_steps=n_steps, 
#   n_episodes=n_episodes, 
#   memory_capacity=memory_capacity, 
#   render_interval=render_interval,
#   save_path=save_path
# )

In [None]:
# Test the environment
# Run until episode ends
episode_reward = 0
done = False
obs = env.reset()

while not done:
    action = agent.get_action(obs)
    obs, reward, done, info = env.step(action)
    episode_reward += reward

env.render()

In [None]:
from stable_baselines3 import PPO

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)

obs = env.reset()
while not done:
    action = agent.get_action(obs)
    obs, reward, done, info = env.step(action)
    episode_reward += reward

env.render()