In [1]:
import pandas as pd
import talib

import glob

In [2]:
def create_df_from_file(file_name):
    print(file_name)
    df = pd.read_csv(file_name)

    df['time'] = pd.to_datetime(df['time'], unit='ms')

    # Apply Min-Max scaling to the selected columns
    columns_to_normalize = df.columns.difference(['time'])
    df = df.copy()  # Create a copy to preserve the original DataFrame
    df[columns_to_normalize] = (df[columns_to_normalize]) / (df[columns_to_normalize].iloc[0])

    # Calculate Moving Averages
    df['SMA_10'] = talib.SMA(df['close'], timeperiod=10)  # Simple Moving Average for 10 periods
    df['EMA_10'] = talib.EMA(df['close'], timeperiod=10)  # Exponential Moving Average for 10 periods
    df['SMA_day'] = talib.SMA(df['close'], timeperiod=6*60)  # Simple Moving Average for day periods
    df['EMA_day'] = talib.EMA(df['close'], timeperiod=6*60)  # Exponential Moving Average dayr 10 periods


    # Calculate RSI (Relative Strength Index)
    df['RSI_14'] = talib.RSI(df['close'], timeperiod=14)  # RSI over 14 periods

    # Calculate MACD (Moving Average Convergence Divergence)
    df['MACD'], df['MACD_signal'], df['MACD_hist'] = talib.MACD(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)
    df = df.dropna()

    # Select columns to normalize (all except 'time')

    # Display the first few rows of the DataFrame with columns (except 'time')
    df = df
    # print(df)
    # Display the first few rows of the DataFrame
    return df

In [3]:
# Use glob to find all files matching the pattern 'Records*.csv'
file_list = glob.glob('Records/*.csv')

# Initialize an empty list to store dataframes
dfs = []
# print(file_list)
# Loop over the list of file names
for file_name in file_list:
    dfs.append(create_df_from_file(file_name))

Records/Records-ABCAPITAL-1677748500000-1709383680000.csv
Records/Records-CONCOR-1677748500000-1709383560000.csv
Records/Records-PEL-1677748500000-1709383500000.csv
Records/Records-PETRONET-1677748500000-1709383740000.csv
Records/Records-DEEPAKNTR-1677748500000-1709383620000.csv
Records/Records-LODHA-1677748500000-1709383740000.csv
Records/Records-IDEA-1677748500000-1709383740000.csv
Records/Records-MAZDOCK-1677748500000-1709383680000.csv
Records/Records-IRFC-1677748500000-1709383740000.csv
Records/Records-LUPIN-1677748500000-1709383620000.csv
Records/Records-TATACOMM-1677748500000-1709383680000.csv
Records/Records-NAVINFLUOR-1677748500000-1709383500000.csv
Records/Records-TIINDIA-1677748500000-1709383740000.csv
Records/Records-ACC-1677748500000-1709383500000.csv
Records/Records-TORNTPOWER-1677748500000-1709383740000.csv
Records/Records-VOLTAS-1677748500000-1709383740000.csv
Records/Records-RAMCOCEM-1677748500000-1709383560000.csv
Records/Records-MSUMI-1677748500000-1709383740000.csv
R

In [112]:
display(dfs[0])

Unnamed: 0,time,open,high,low,close,volume,SMA_10,EMA_10,SMA_day,EMA_day,RSI_14,MACD,MACD_signal,MACD_hist
359,2023-03-02 15:14:00,0.997048,0.994768,0.999343,0.996723,0.067736,0.996232,0.996186,0.993510,0.993510,56.309484,0.000243,0.000219,0.000024
360,2023-03-02 15:15:00,0.997704,0.994768,0.998356,0.996723,0.208290,0.996298,0.996284,0.993501,0.993528,56.309484,0.000271,0.000230,0.000042
361,2023-03-02 15:16:00,0.997048,0.995422,0.999014,0.997379,0.135095,0.996429,0.996483,0.993482,0.993549,60.490521,0.000343,0.000252,0.000090
362,2023-03-02 15:17:00,0.998360,0.995422,1.000000,0.996723,0.056762,0.996461,0.996527,0.993460,0.993566,54.838933,0.000342,0.000270,0.000072
363,2023-03-02 15:18:00,0.997704,0.999673,1.000000,1.000655,0.569823,0.996887,0.997277,0.993458,0.993606,71.839404,0.000652,0.000346,0.000305
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92835,2024-03-02 12:42:00,1.224992,1.221387,1.227811,1.223788,0.000000,1.223788,1.223660,1.221711,1.220160,50.223898,-0.000061,-0.000016,-0.000046
92836,2024-03-02 12:43:00,1.224992,1.221387,1.227811,1.223788,0.000000,1.223788,1.223683,1.221741,1.220180,50.223898,-0.000048,-0.000022,-0.000026
92837,2024-03-02 12:44:00,1.224992,1.221387,1.227811,1.223788,0.000000,1.223755,1.223702,1.221770,1.220200,50.223898,-0.000038,-0.000025,-0.000012
92838,2024-03-02 12:45:00,1.224992,1.221387,1.227811,1.223788,0.000000,1.223689,1.223718,1.221788,1.220220,50.223898,-0.000029,-0.000026,-0.000003


In [5]:
# !python -m pip install "stable-baselines3" "huggingface_sb3"
# !conda search -c conda-forge *baseline*


In [27]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import random
# import gym
# from gym import spaces
# import numpy as np

class ContinuousStockTradingEnv(gym.Env):
    """A stock trading environment for OpenAI gym with continuous action space"""
    metadata = {'render.modes': ['human']}
    
    def __init__(self, dfs):
        super(ContinuousStockTradingEnv, self).__init__()
        self.dfs = dfs
        self.df = self.dfs[random.randint(1, len(self.dfs))]
        
        # Initial conditions
        self.initial_balance = 1000
        self.current_balance = self.initial_balance
        
        self.df = self.df.drop('time', axis=1)

        # self.df = df  # DataFrame containing the stock data
        self.max_steps = len(self.df)
        self.current_step = 0
        
        # Define action and observation space
        # Actions of the format: number of shares to buy (positive) or sell (negative)
        current_price = self.df.iloc[self.current_step]['close']
        max_buyable_shares = int(self.current_balance / current_price)
        
        self.action_space = spaces.Discrete(11)  # 21 discrete actions

        # Map index to action value (-20 to +20 with gaps of 2)
        # self.action_map = np.arange(-20, 21, 2)  # This creates an array [-20, -18, ..., 18, 20]

        # Observation space using the five provided features (Open, High, Low, Close, Volume)
        self.observation_space = spaces.Box(low=0, high=float('inf'), shape=(13,), dtype=np.float32)
        
        self.holding = 0

    def step(self, action):
        truncated = False
        # print('action', action)
        # Update the current step
        self.current_step += 1

        action = (action - 20)*2
        
        # Get the current stock data
        current_data = self.df.iloc[self.current_step]
        current_price = current_data['close']  # Assuming we use the closing price for transactions

        # action = int(round(action))
    
        # Calculate the maximum number of shares we can buy
        current_price = self.df.iloc[self.current_step]['close']
        # print(current_price, self.current_balance)
        # print(self.current_balance, current_price)
        if current_price > 0:
            max_buyable_shares = int(self.current_balance / current_price)
        else:
            max_buyable_shares = 20
        # if max_buyable_shares == 0:
        #     truncated = True
    
        # Ensure the action is within bounds
        # action = np.clip(action, -self.holding, max_buyable_shares)
        
        # Execute the trade
        trade_value = action * current_price
        self.current_balance -= trade_value  # If action is negative, this increases balance
        self.holding += action  # Update the number of shares held

        print('self.holding, trade_value, self.current_balance', self.holding, trade_value, self.current_balance)
        portfolio_value = self.current_balance + self.holding * current_price

        reward = portfolio_value - (self.initial_balance + self.holding * current_data['open'])
        
        # Check if the episode is done
        done = self.current_step >= self.max_steps - 1

        # Optionally include additional info
        info = {}
        return current_data.values, reward, done, truncated, info

    def reset(self, **kwargs):
        # Reset the environment state for a new episode
        self.current_step = 0
        self.current_balance = self.initial_balance
        self.holding = 0
        
        self.df = self.dfs[random.randint(1, len(self.dfs))]
        self.df = self.df.drop('time', axis=1)
        
        self.max_steps = len(self.df)
        # print(self.df.iloc[self.current_step].values.shape)
        return [self.df.iloc[self.current_step].values, None]

    def render(self, mode='human', close=False):
        # Render the environment to the screen (optional)
        pass


In [17]:
env = ContinuousStockTradingEnv(dfs)

observations = env.reset()
print(observations)
done = False
while not done:
    # Random action: buy/sell a random number of shares
    action = np.random.uniform(0, 11, (1, 1))[0][0]  # Adjust the range based on expected max trading volume
    # print(env.action_map)
    # print(env.action_space)
    # print(action)
    observations, reward, done, t, info = env.step(action)
    # print(observations)
    print(f"Step: {env.current_step}, Action: {action}, Reward: {reward}, Portfolio Value: {env.current_balance + env.holding * observations[3]}")  # Observations[3] is the 'Close' price


IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [21]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

env = ContinuousStockTradingEnv(dfs)

model = PPO("MlpPolicy", env, verbose=1, device='mps')
# model = PPO.load("ppo_stock_trading")

# Train the model
print('starting training')
total_timesteps = 20000  # Adjust based on your needs
model.learn(total_timesteps=total_timesteps)

model.save("ppo_stock_trading_v2")
print('saved model')


Using mps device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
starting training
-----------------------------
| time/              |      |
|    fps             | 60   |
|    iterations      | 1    |
|    time_elapsed    | 33   |
|    total_timesteps | 2048 |
-----------------------------
--------------------------------------------
| time/                   |                |
|    fps                  | 46             |
|    iterations           | 2              |
|    time_elapsed         | 87             |
|    total_timesteps      | 4096           |
| train/                  |                |
|    approx_kl            | -2.0198058e-08 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -2.4           |
|    explained_variance   | 0              |
|    learning_rate        | 0.0003         |
|    loss                 | 1.88e+11       |
|    n_updates            | 10             |
|  

In [32]:
# Optionally, test the trained model
env = ContinuousStockTradingEnv(dfs)

model = PPO.load("ppo_stock_trading_v2")

obs, _  = env.reset()
print(obs)
for _ in range(1000):
    # print(obs)
    action, _states = model.predict(obs, deterministic=True)
    print(type(action), action)
    # print((action[0]))
    # print(type(action))
    # print(round(action))
    # print(action)
    obs, rewards, done, truncated, info = env.step(action)
    print(reward)
    # env.render()  # If your environment supports rendering

[ 9.91695244e-01  9.94383415e-01  9.93871636e-01  9.94699793e-01
  4.98809524e-01  9.94749482e-01  9.94781171e-01  9.95670232e-01
  9.95670232e-01  4.58324260e+01 -4.80164684e-04 -3.80674822e-04
 -9.94898626e-05]
<class 'numpy.ndarray'> 4
self.holding, trade_value, self.current_balance -32 -31.83966873706004 1031.83966873706
3017015.5548087754
<class 'numpy.ndarray'> 4
self.holding, trade_value, self.current_balance -64 -31.850269151138715 1063.6899378881988
3017015.5548087754
<class 'numpy.ndarray'> 4
self.holding, trade_value, self.current_balance -96 -31.801242236024844 1095.4911801242235
3017015.5548087754
<class 'numpy.ndarray'> 4
self.holding, trade_value, self.current_balance -128 -31.85424430641822 1127.3454244306417
3017015.5548087754
<class 'numpy.ndarray'> 4
self.holding, trade_value, self.current_balance -160 -31.83701863354037 1159.182443064182
3017015.5548087754
<class 'numpy.ndarray'> 4
self.holding, trade_value, self.current_balance -192 -31.825093167701866 1191.0075362