<a href="https://colab.research.google.com/github/eminshall/Reinforcment-Learning-Agent-for-Stock-Trading-AAPL/blob/main/RL_Trading_Agent_4_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
# !pip install git+https://github.com/DLR-RM/stable-baselines3@feat/gymnasium-support
# !pip install git+https://github.com/Stable-Baselines-Team/stable-baselines3-contrib@feat/gymnasium-support

In [36]:
# pip install gym-anytrading

In [37]:
# pip install shimmy>=0.2.1

In [38]:
# !pip install finta

In [39]:
# !pip install quantstats

In [40]:
import warnings

# Suppress the specific DeprecationWarning
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [41]:
# Gym stuff
import gym
import gym_anytrading
from gym_anytrading.envs import StocksEnv

# Stable baselines - rl stuff
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3 import A2C

#Quant Finance
from finta import TA
import quantstats as qs

# Processing libraries
import os
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [42]:
data = pd.read_csv("tickers.csv")

In [43]:
data = data[data.tic == "^TNX"]

In [44]:
data.head()

Unnamed: 0.1,Unnamed: 0,date,open,high,low,close,volume,tic,day
1,2,2000-01-04,6.53,6.548,6.485,6.485,0,^TNX,1
3,4,2000-01-05,6.521,6.599,6.508,6.599,0,^TNX,2
5,6,2000-01-06,6.558,6.585,6.54,6.549,0,^TNX,3
7,8,2000-01-07,6.545,6.595,6.504,6.504,0,^TNX,4
9,11,2000-01-11,6.6,6.664,6.595,6.664,0,^TNX,6


In [45]:
data = data.drop(["Unnamed: 0", "tic", "day"], axis=1)

In [46]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5727 entries, 1 to 11453
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    5727 non-null   object 
 1   open    5727 non-null   float64
 2   high    5727 non-null   float64
 3   low     5727 non-null   float64
 4   close   5727 non-null   float64
 5   volume  5727 non-null   int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 313.2+ KB


In [47]:
data = data.rename(columns = {'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close', 'volume': 'Volume'})

data['date'] = pd.to_datetime(data['date'])
data.set_index('date', inplace=True)
data.sort_values('date', ascending=True, inplace=True)

data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2000-01-04,6.53,6.548,6.485,6.485,0
2000-01-05,6.521,6.599,6.508,6.599,0
2000-01-06,6.558,6.585,6.54,6.549,0
2000-01-07,6.545,6.595,6.504,6.504,0
2000-01-11,6.6,6.664,6.595,6.664,0


## Add Custom Indicators

In [48]:
data['return'] = np.log(data['Close'] / data['Close'].shift(1))

In [49]:
#Create columns for technical indicators & add them to the dataframe
data['RSI'] = TA.RSI(data,16)
data['SMA'] = TA.SMA(data, 20)
data['SMA_L'] = TA.SMA(data, 41)
data['OBV'] = TA.OBV(data)
data['VWAP'] = TA.VWAP(data)
data['EMA'] = TA.EMA(data)
data['ATR'] = TA.ATR(data)
data.fillna(0, inplace=True)

In [50]:
#Add momentum, volatitlity, & distance to the data_frame
data['momentum'] = data['return'].rolling(5).mean().shift(1)
data['volatility'] = data['return'].rolling(20).std().shift(1)
data['distance'] = (data['Close'] - data['Close'].rolling(50).mean()).shift(1)

In [51]:
#Perform a simple linear regression direction prediction
lags = 5

cols = []
for lag in range(1, lags + 1):
  col = f'lag_{lag}'
  data[col] = data['Close'].shift(lag)
  cols.append(col)

data.dropna(inplace=True)

reg = np.linalg.lstsq(data[cols], data['Close'], rcond=None)[0]
data['Prediction'] = np.dot(data[cols], reg)

In [52]:
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,return,RSI,SMA,SMA_L,OBV,...,ATR,momentum,volatility,distance,lag_1,lag_2,lag_3,lag_4,lag_5,Prediction
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-09-13,4.312,4.344,4.235,4.249,0,-0.003524,56.143707,4.24285,4.10639,631500.0,...,0.077429,-0.000188,0.013608,0.19996,4.264,4.288,4.258,4.26,4.29,4.261579
2023-09-14,4.229,4.298,4.223,4.288,0,0.009137,58.51678,4.2462,4.118366,631500.0,...,0.072929,-0.001921,0.013631,0.17534,4.249,4.264,4.288,4.258,4.26,4.248713
2023-09-15,4.32,4.338,4.303,4.322,0,0.007898,60.504121,4.2494,4.131366,631500.0,...,0.073,0.00131,0.013641,0.20574,4.288,4.249,4.264,4.288,4.258,4.2875
2023-09-18,4.337,4.359,4.311,4.319,0,-0.000694,60.232514,4.24995,4.145439,631500.0,...,0.070643,0.002984,0.013617,0.2322,4.322,4.288,4.249,4.264,4.288,4.319068
2023-09-19,4.333,4.365,4.325,4.365,0,0.010594,62.952358,4.25565,4.157902,631500.0,...,0.070714,0.001441,0.013374,0.22364,4.319,4.322,4.288,4.249,4.264,4.316102


In [53]:
#Create a function to properly format data frame to be passed through environment
def signals(env):
    start = env.frame_bound[0] - env.window_size
    end = env.frame_bound[1]
    prices = env.df.loc[:,'Close'].to_numpy()[start:end]
    signal_features = env.df.loc[:, ['Open','High','Low','Close','Volume','return','momentum','volatility','distance','RSI','OBV','SMA','SMA_L','VWAP','EMA','ATR', 'Prediction']].to_numpy()[start:end]
    return prices, signal_features

In [54]:
#Replace default data process with custom function from above
class MyCustomEnv(StocksEnv):
    _process_data = signals

#Initialize an environment setting the window size and train data
window_size = 65
start_index = window_size
end_train_index = round(len(data)*0.70)
end_val_index = len(data)

env2 = MyCustomEnv(df=data, window_size=window_size, frame_bound=(start_index, end_train_index))

In [55]:
#Create a Dummy Vector of our environment
env_maker = lambda: env2
env = DummyVecEnv([env_maker])

## Train Test

In [56]:
#initialize our model and train
policy_kwargs = dict(optimizer_class='RMSpropTFLike', optimizer_kwargs=dict(eps=1e-5))
actor_critic = A2C('MlpPolicy', env, verbose=1)
actor_critic.learn(total_timesteps=500000)

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
|    value_loss         | 6.44e-12  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 403       |
|    iterations         | 64400     |
|    time_elapsed       | 797       |
|    total_timesteps    | 322000    |
| train/                |           |
|    entropy_loss       | -6.78e-05 |
|    explained_variance | -1.66e+03 |
|    learning_rate      | 0.0007    |
|    n_updates          | 64399     |
|    policy_loss        | -1.4e-08  |
|    value_loss         | 1.45e-05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 404       |
|    iterations         | 64500     |
|    time_elapsed       | 798       |
|    total_timesteps    | 322500    |
| train/                |           |
|    entropy_loss       | -6.69e-05 |
|    explained_variance | -

<stable_baselines3.a2c.a2c.A2C at 0x7c3eec375a20>

In [89]:
#Create a new environment with validation data
env = MyCustomEnv(df=data, window_size=window_size, frame_bound=(end_train_index, end_val_index))
obs = env.reset()

In [90]:
# type(obs)

In [91]:
# obs = np.array(obs)
# obs = obs[np.newaxis, ...]

In [92]:
# type(obs)

In [93]:
# #Create a new environment with validation data
# env = MyCustomEnv(df=data, window_size=window_size, frame_bound=(end_train_index, end_val_index))
# obs = env.reset()

In [94]:
while True:
    obs = obs[np.newaxis, ...]
    action, _states = actor_critic.predict(obs)
    obs, rewards, done, info = env.step(action)
    if done:
        print("info", info)
        break

TypeError: ignored

In [None]:
#Plot the results
plt.figure(figsize=(16,9))
env.render_all()
plt.show()

In [None]:
qs.extend_pandas()

net_worth = pd.Series(env.history['total_profit'], index=data.index[end_train_index+1:end_val_index])
returns = net_worth.pct_change().iloc[1:]

qs.reports.full(returns)