In [7]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load data
#C:\Users\rohit\OneDrive\Documents\Asset-Pricing-with-Reinforcement-Learning\XOM_30_minute_6_month_data.csv
df = pd.read_csv('XOM_30_minute_6_month_data.csv', parse_dates=['Date'])
df.sort_values('Date', inplace=True)

# Check for missing values
# df.isnull().sum()

# Fill missing values if any
# df.fillna(method='ffill', inplace=True)

# Normalize
scaler = MinMaxScaler()
df[['Last Price', 'Volume', 'SMAVG (15)']] = scaler.fit_transform(df[['Last Price', 'Volume', 'SMAVG (15)']])

# Split into training and testing sets
train_size = int(len(df) * 0.8)
train_df = df[:train_size]
test_df = df[train_size:]


In [8]:
import gym
import numpy as np
from gym import spaces
from enum import Enum

# Define Actions and Positions
class Actions(Enum):
    Sell = 0
    Buy = 1

class Positions(Enum):
    Short = 0
    Long = 1

    def opposite(self):
        return Positions.Short if self == Positions.Long else Positions.Long

# Define the Trading Environment
class CustomTradingEnv(gym.Env):
    metadata = {'render_modes': ['human'], 'render_fps': 3}

    def __init__(self, df, window_size, render_mode=None):
        super(CustomTradingEnv, self).__init__()
        assert df.ndim == 2
        assert render_mode is None or render_mode in self.metadata['render_modes']

        self.df = df
        self.window_size = window_size
        self.render_mode = render_mode

        self.prices, self.signal_features = self._process_data()
        self.shape = (window_size, self.signal_features.shape[1])

        # Define action and observation spaces
        self.action_space = spaces.Discrete(len(Actions))
        INF = np.inf
        self.observation_space = spaces.Box(low=-INF, high=INF, shape=self.shape, dtype=np.float32)

        # Initialize state
        self.reset()

    def reset(self):
        self.current_step = self.window_size
        self.position = Positions.Short
        self.total_profit = 1.0  # Starting with unit profit
        self.history = []
        self.balance = 10000  # Example starting balance
        self.shares_held = 0
        self.portfolio_value = self.balance  # Initialize portfolio value
        self.trade_amount = 100  # Example trade amount
        self.previous_portfolio_value = self.portfolio_value
        return self._get_observation()

    def _process_data(self):
        # Read CSV file
        df = pd.read_csv('XOM_30_minute_6_month_data.csv', parse_dates=['Date'])
        df.sort_values('Date', inplace=True)

        # Optional: Create additional features
        # df['Price Change'] = df['Last Price'].pct_change().fillna(0)

        # Normalize features
        scaler = MinMaxScaler()
        df[['Last Price', 'Volume', 'SMAVG (15)']] = scaler.fit_transform(df[['Last Price', 'Volume', 'SMAVG (15)']])

        # Extract prices and features
        prices = df['Last Price'].values
        features = df[['Last Price', 'Volume', 'SMAVG (15)']].values

        return prices, features

    def _get_observation(self):
        """
        Returns an observation window containing the signal features for the current step.
        """
        # Ensure the current step does not exceed the bounds of our data
        assert self.current_step >= self.window_size

        # Get the observation window from the signal features
        start = self.current_step - self.window_size
        end = self.current_step
        observation = self.signal_features[start:end]

        return observation

    def _take_action(self, action):
        """
        Take an action in the environment.
        - Buy or sell the stock and update the state accordingly.
        """
        current_price = self.prices[self.current_step]
        action_type = Actions(action)

        if action_type == Actions.Buy:
            # Buy the stock
            self._buy_stock(current_price)
        elif action_type == Actions.Sell:
            # Sell the stock
            self._sell_stock(current_price)

    def _update_portfolio(self, action):
        """
        Update the portfolio value based on the action.
        """
        current_price = self.prices[self.current_step]
        action_type = Actions(action)

        if action_type == Actions.Buy:
            self._buy_stock(current_price)
        elif action_type == Actions.Sell:
            self._sell_stock(current_price)

        # Update the portfolio value
        self.portfolio_value = self.balance + self.shares_held * current_price
        print(f"Updated portfolio. Portfolio value: {self.portfolio_value}")

    def _buy_stock(self, current_price):
        print("buying")
        """
        Buy stock and update the relevant state variables.
        """
        # Example implementation
        # Ensure that the buying logic aligns with your trading strategy
        if self.balance >= current_price * self.trade_amount:
            self.balance -= current_price * self.trade_amount
            self.shares_held += self.trade_amount

    def _sell_stock(self, current_price):
        print("selling")
        """
        Sell stock and update the relevant state variables.
        """
        # Example implementation
        # Ensure that the selling logic aligns with your trading strategy
        if self.shares_held >= self.trade_amount:
            self.balance += current_price * self.trade_amount
            self.shares_held -= self.trade_amount


    def step(self, action):
        # Validate action
        assert self.action_space.contains(action), f"{action} is an invalid action"

        # Perform the action and update state
        self._take_action(action)

        # Advance the current step
        self.current_step += 1

        # Calculate reward
        reward = self._calculate_reward(action)

        # Check if the end of the data is reached
        done = self.current_step >= len(self.prices) - 1

        # Get the next observation
        observation = self._get_observation()

        # Additional information, useful for debugging
        info = {'current_step': self.current_step, 'total_profit': self.total_profit}

        return observation, reward, done, info

    def render(self, mode='human'):
        # Simple text rendering
        if mode == 'human':
            print(f"Step: {self.current_step}, Total Profit: {self.total_profit}")

    def _calculate_reward(self, action):
        """
        Calculate the reward based on the action taken.
        Action can either be buying or selling a stock.
        The reward is the change in portfolio value as a result of the action.
        """
        # Assuming self.portfolio_value stores the current value of the portfolio
        previous_portfolio_value = self.portfolio_value

        # Update portfolio value based on the action
        self._update_portfolio(action)

        # New portfolio value
        current_portfolio_value = self.portfolio_value

        # Reward is the change in portfolio value
        reward = current_portfolio_value - previous_portfolio_value

        self._update_portfolio(action)  # Ensure this method updates the portfolio value
        current_portfolio_value = self.portfolio_value
        reward = current_portfolio_value - self.previous_portfolio_value
        self.previous_portfolio_value = current_portfolio_value
        return reward

    def _update_profit(self, action):
        """
        Update the total profit based on the action taken.
        - Calculate the impact of buying/selling on the profit.
        - Include transaction costs if applicable.
        """
        current_price = self.prices[self.current_step]

        if action == Actions.Buy.value:
            # Calculate the cost of buying
            cost = current_price * self.trade_amount  # Assuming trade_amount is defined
            # Subtract cost from total profit, include transaction fee if applicable
            self.total_profit -= cost

        elif action == Actions.Sell.value:
            # Calculate the revenue from selling
            revenue = current_price * self.trade_amount  # Assuming trade_amount is defined
            # Add revenue to total profit, subtract transaction fee if applicable
            self.total_profit += revenue



In [5]:
import pandas as pd
import stable_baselines3
import shimmy
# Load the dataset
df = pd.read_csv('XOM_30_minute_6_month_data.csv', parse_dates=['Date'])

# Initialize the environment
window_size = 60  # Example window size
env = CustomTradingEnv(df, window_size)
from stable_baselines3 import DQN  # Example using Stable Baselines3 library

model = DQN('MlpPolicy', env, verbose=1)

model.learn(total_timesteps=10000)

obs = env.reset()
for i in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = env.step(action)
    env.render()


ModuleNotFoundError: No module named 'stable_baselines3'

In [None]:
"""from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy

# Adjust exploration settings
initial_eps = 1.0  # High initial exploration
final_eps = 0.1    # Lower final exploration
eps_decay = 0.995  # Decay rate

# Make a vectorized environment (if needed)
vec_env = make_vec_env(lambda: env, n_envs=1)

# Setup DQN model with custom exploration parameters
model = DQN('MlpPolicy', vec_env, verbose=1, exploration_initial_eps=initial_eps, 
            exploration_final_eps=final_eps, exploration_fraction=eps_decay)

# Train the model
model.learn(total_timesteps=10000)"""


"from stable_baselines3.common.env_util import make_vec_env\nfrom stable_baselines3 import DQN\nfrom stable_baselines3.common.evaluation import evaluate_policy\n\n# Adjust exploration settings\ninitial_eps = 1.0  # High initial exploration\nfinal_eps = 0.1    # Lower final exploration\neps_decay = 0.995  # Decay rate\n\n# Make a vectorized environment (if needed)\nvec_env = make_vec_env(lambda: env, n_envs=1)\n\n# Setup DQN model with custom exploration parameters\nmodel = DQN('MlpPolicy', vec_env, verbose=1, exploration_initial_eps=initial_eps, \n            exploration_final_eps=final_eps, exploration_fraction=eps_decay)\n\n# Train the model\nmodel.learn(total_timesteps=10000)"

In [None]:
from stable_baselines3 import DQN

model = DQN(
    "MlpPolicy", 
    env, 
    verbose=1, 
    exploration_fraction=0.2,  # Fraction of entire training period over which the exploration rate is reduced
    exploration_initial_eps=2.0,  # Initial value of epsilon in epsilon-greedy exploration
    exploration_final_eps=0.05   # Final value of epsilon after decay
)

model.learn(total_timesteps=10000)

obs = env.reset()
for i in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, dones, info = env.step(action)
    env.render()

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
buying
buying
Updated portfolio. Portfolio value: 9998.992664767993
buying
Updated portfolio. Portfolio value: 9998.992664767991
buying
buying
Updated portfolio. Portfolio value: 10006.685042903322
buying
Updated portfolio. Portfolio value: 10006.685042903322
selling
selling
Updated portfolio. Portfolio value: 10000.04578796509
selling
Updated portfolio. Portfolio value: 10000.04578796509
selling
selling
Updated portfolio. Portfolio value: 10000.41209168582
selling
Updated portfolio. Portfolio value: 10000.41209168582
buying
buying
Updated portfolio. Portfolio value: 9990.018223610105
buying
Updated portfolio. Portfolio value: 9990.018223610105
buying
buying
Updated portfolio. Portfolio value: 9993.86441267777
buying
Updated portfolio. Portfolio value: 9993.86441267777
buying
buying
Updated portfolio. Portfolio value: 10061.172721361918
buying
Updated portfolio. Portfolio value: 10061.17272136