In [948]:
import numpy as np
import random
import numpy as np
import matplotlib.pyplot as plt
import os
import gymnasium as gym
import gym_anytrading
from gym_anytrading.envs import TradingEnv, ForexEnv, StocksEnv, Actions, Positions 
from stable_baselines3.common.vec_env import DummyVecEnv
from gym_anytrading.datasets import FOREX_EURUSD_1H_ASK, STOCKS_GOOGL
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
import quantstats as qs
import pandas as pd
from stable_baselines3 import A2C, PPO
import warnings 
import plotly.express as px
import plotly.graph_objects as go
from finta import TA

px.template = 'plotly_dark'

warnings.filterwarnings(action='ignore',category=FutureWarning)

warnings.filterwarnings(action='ignore',category=RuntimeWarning)

device = 'mps'

In [949]:
import plotly.graph_objects as go
import numpy as np

def render_all(self, title=None):
    window_ticks = np.arange(len(self._position_history))

    # Prices trace
    prices_trace = go.Scatter(
        x=window_ticks,
        y=self.prices,
        mode='lines',
        name='Prices'
    )

    short_ticks = []
    long_ticks = []
    for i, tick in enumerate(window_ticks):
        if self._position_history[i] == Positions.Short:
            short_ticks.append(tick)
        elif self._position_history[i] == Positions.Long:
            long_ticks.append(tick)

    # Short positions trace
    short_trace = go.Scatter(
        x=short_ticks,
        y=np.array(self.prices)[short_ticks],
        mode='markers',
        marker=dict(color='red'),
        name='Short'
    )

    # Long positions trace
    long_trace = go.Scatter(
        x=long_ticks,
        y=np.array(self.prices)[long_ticks],
        mode='markers',
        marker=dict(color='green'),
        name='Long'
    )

    layout = go.Layout(
        title=f"{title} - Total Reward: {self._total_reward:.6f} ~ Total Profit: {self._total_profit:.6f}",
        xaxis_title="Ticks",
        yaxis_title="Price"
    )

    fig = go.Figure(data=[prices_trace, short_trace, long_trace], layout=layout)
    fig.show()


In [950]:
# df = gym_anytrading.datasets.STOCKS_GOOGL.copy()

In [951]:
df = pd.read_csv('../monty_python/monty_python/datasets/ethereum_daily_indicators.csv',low_memory=False)

renamer = {'date':'Date','open':'Open','high':'High','low':'Low','close':'Close','volume':'Volume'}
df.rename(columns=renamer,inplace=True)
df.set_index('Date',inplace=True)

df.sort_index(inplace=True)
df.fillna(0, inplace=True)
#Add momentum, volatitlity, & distance to the df_frame
df['return'] = np.log(df['Close'] / df['Close'].shift(1))

df['momentum'] = df['return'].rolling(5).mean().shift(1)
df['volatility'] = df['return'].rolling(20).std().shift(1)
df['distance'] = (df['Close'] - df['Close'].rolling(50).mean()).shift(1)
df['RSI'] = TA.RSI(df,16)
df['SMA'] = TA.SMA(df, 20)
df['SMA_L'] = TA.SMA(df, 41)
df['OBV'] = TA.OBV(df)
df['VWAP'] = TA.VWAP(df)
df['EMA'] = TA.EMA(df)
df['ATR'] = TA.ATR(df)
df.fillna(0, inplace=True)

In [952]:
df = df.head(128).copy()


In [953]:

#Create a function to properly format data frame to be passed through environment
def signals(env):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:,'Close'].to_numpy()[start:end]
    signal_features = env.df.loc[:, ['Open','High','Low','Close','Volume','return','momentum','volatility','distance','RSI','OBV','SMA','SMA_L','VWAP','EMA','ATR', 'Prediction']].to_numpy()[start:end]
    return prices, signal_features

#Perform a simple linear regression direction prediction
lags = 5

cols = []
for lag in range(1, lags + 1):
  col = f'lag_{lag}'
  df[col] = df['Close'].shift(lag)
  cols.append(col)

df.dropna(inplace=True)

reg = np.linalg.lstsq(df[cols], df['Close'], rcond=None)[0]
df['Prediction'] = np.dot(df[cols], reg)

#Replace default df process with custom function from above
class MyCustomEnv(StocksEnv):
    _process_df = signals
    
#Initialize an environment setting the window size and train df
window_size = 5
start_index = window_size
end_train_index = round(len(df)*0.70)
end_val_index = len(df)    

In [954]:
# get the list of models in the folder
dir = "models/eth_daily_100"
models = os.listdir(dir)

# sort the models by modified date
models.sort(key=lambda x: os.path.getmtime(os.path.join(dir, x)))
model_path = os.path.join(dir, models[-1])

# choose the latest model
model = PPO.load(model_path)

# env = gym.make("stocks-v0", df=df, frame_bound=(start_index, end_index), window_size=window_size)
env = MyCustomEnv(df=df, window_size=window_size, frame_bound=(end_train_index, end_val_index))

obs, info = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, term, trunc, info = env.step(action)
    done = term or trunc
    if done:
        print("info", info)
        break
env.close()

#visualize the environment
render_all(env.unwrapped)


info {'total_reward': -155.3988037109375, 'total_profit': 0.7225332450511992, 'position': <Positions.Long: 1>}


In [955]:
# model_path 'models/nvidia3/1600000.zip' is really good

In [956]:
from stable_baselines3.common.evaluation import evaluate_policy

eval_env = Monitor(gym.make("stocks-v0", df=df, frame_bound=(end_train_index, end_val_index), window_size=window_size))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=False)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}") 

mean_reward=-144.86 +/- 37.22383804927026


In [957]:
qs.extend_pandas()
df.index = pd.to_datetime(df.index, utc=True, errors='ignore')

In [958]:
net_worth = pd.Series(
    env.unwrapped.history["total_profit"], index=df.index[end_train_index+1:end_val_index])
returns = net_worth.pct_change().iloc[1:]

qs.reports.html(returns, output="SB3_a2c_quantstats.html")
from IPython.display import HTML

HTML(filename="SB3_a2c_quantstats.html")

Metric,Strategy
Risk-Free Rate,0.0%
Time in Market,26.0%
,
Cumulative Return,-27.75%
CAGR﹪,-91.01%
,
Sharpe,-3.45
Prob. Sharpe Ratio,2.53%
Smart Sharpe,-3.31
Sortino,-3.53

Year,Return,Cumulative
2018,-29.53%,-27.75%

Started,Recovered,Drawdown,Days
2018-02-21,2018-03-16,-26.79,24


In [959]:
df['net_worth'] = np.nan
df.loc[df.index[end_train_index+1:end_val_index],'net_worth'] = net_worth

In [960]:

#the close price and the net worth amounts need to be normalized to the same scale
# We will get the z-scores for each column and then plot them
df['close_z'] = (df['Close'] - df['Close'].mean())/df['Close'].std(ddof=0)
df['net_worth_z'] = (df['net_worth'] - df['net_worth'].mean())/df['net_worth'].std(ddof=0)
px.line(df, x=df.index, y=['close_z','net_worth_z'],template=px.template)

In [961]:
sum(env.unwrapped.history['total_profit'])

32.35521443207511