<a href="https://colab.research.google.com/github/Pfannen/AI-Stock-Agent/blob/main/notebooks/RL_trading_bot_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install data and TA dependencies
!pip install yahoo_fin
!pip install requests_html
!pip install pandas-ta

# Install RL dependencies
!pip install stable-baselines3 gym-anytrading gym #gym-trading-env

In [70]:
# Standard imports
import pandas as pd
import numpy as np
import torch
from matplotlib import pyplot as plt

# Data and TA imports
import pandas_ta as ta
from yahoo_fin.stock_info import get_data

# RL imports
import gymnasium as gym
import gym_anytrading
#import gym_trading_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3 import A2C
from gym_anytrading.envs import StocksEnv

In [3]:
# Getting testing data

test_df = get_data(
    ticker="AMZN",
    start_date="12/04/2020",
    end_date="12/04/2021",
    index_as_date=True,
    interval="1d"
)

# See data

test_df.head()

  and should_run_async(code)


Unnamed: 0,open,high,low,close,adjclose,volume,ticker
2020-12-04,159.910507,159.910507,157.938004,158.128998,158.128998,58272000,AMZN
2020-12-07,157.824005,159.037994,157.084503,157.899994,157.899994,55026000,AMZN
2020-12-08,157.945007,159.206497,156.001007,158.864502,158.864502,65726000,AMZN
2020-12-09,158.394501,158.721497,154.399994,155.210007,155.210007,82016000,AMZN
2020-12-10,154.449493,157.104996,153.800003,155.074493,155.074493,60604000,AMZN


In [4]:
# Create a strategy to add certain indicators to data frame

TestStrategy = ta.Strategy(
    name="Test Strategy",
    ta=[
        {"kind": "sma", "length": 10},
        {"kind": "rsi", "length": 10}
    ]
)

# Use strategy on test_df

test_df.ta.strategy(TestStrategy, length=10, append=True)

# Clean test_df

test_df.dropna(inplace=True)

# See data

test_df.head()

  and should_run_async(code)


Unnamed: 0,open,high,low,close,adjclose,volume,ticker,SMA_10,RSI_10
2020-12-18,162.199493,162.470993,158.580002,160.082504,160.082504,119914000,AMZN,158.2909,59.065015
2020-12-21,160.000504,161.348495,158.300003,160.309006,160.309006,76736000,AMZN,158.531801,60.13189
2020-12-22,160.141998,161.100006,159.003998,160.326004,160.326004,47388000,AMZN,158.677951,60.218346
2020-12-23,160.25,160.5065,159.208496,159.263504,159.263504,41876000,AMZN,159.083301,52.336089
2020-12-24,159.695007,160.100006,158.449997,158.634506,158.634506,29038000,AMZN,159.439302,48.187206


In [None]:
# Create method to return indicators on given environment

def signals(env):
  start = env.frame_bound[0] - env.window_size
  end = env.frame_bound[1]
  prices = env.df.loc[:, 'close'].to_numpy()[start:end]
  signal_features = env.df.loc[:, ['close', 'volume', 'SMA_10', 'RSI_10']].to_numpy()[start:end]
  return prices, signal_features

# Create custom environment

class CustomEnv(StocksEnv):
  _process_data = signals

In [83]:
# Create test environment

env = CustomEnv(df=test_df, window_size=5, frame_bound=(10, 200))
env_lambda = lambda: env
test_vec_env = DummyVecEnv([env_lambda])

In [84]:
# Create RL model

test_model = A2C('MlpPolicy', test_vec_env, verbose=0)
test_model.learn(total_timesteps=10000)

<stable_baselines3.a2c.a2c.A2C at 0x7cd96c18edd0>

In [98]:
env = CustomEnv(df=test_df, window_size=5, frame_bound=(200, len(test_df)-1))
obs = env.reset()[0]

while True:
  action, _states = test_model.predict(obs)
  obs, rewards, done, info, extra_info = env.step(action)
  if done or len(obs) < 5:
    print("info", extra_info)
    break

info {'total_reward': 1.42401123046875, 'total_profit': 0.9324345387442089, 'position': <Positions.Long: 1>}


In [None]:
plt.figure(figsize=(15,6), facecolor='w')
plt.cla()
env.render_all()
plt.show()