# DDPG Main

## Data Preprocessing

In [2]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load data
#C:\Users\rohit\OneDrive\Documents\Asset-Pricing-with-Reinforcement-Learning\XOM_30_minute_6_month_data.csv
df = pd.read_csv('XOM_30_minute_6_month_data.csv', parse_dates=['Date'])
df.sort_values('Date', inplace=True)

# Normalize
scaler = MinMaxScaler()
df[['Last Price', 'Volume', 'SMAVG (15)']] = scaler.fit_transform(df[['Last Price', 'Volume', 'SMAVG (15)']])

# Split into training and testing sets
train_size = int(len(df) * 0.8)
train_df = df[:train_size]
test_df = df[train_size:]

print(train_df[10:])

                    Date  Last Price    Volume  SMAVG (15)
2331 2023-03-07 20:00:00    0.596608  0.049707    0.072420
2330 2023-03-07 20:30:00    0.606681  0.165330    0.086878
2329 2023-03-08 14:30:00    0.634154  0.134774    0.099523
2328 2023-03-08 15:00:00    0.608055  0.086473    0.077523
2327 2023-03-08 15:30:00    0.571654  0.099766    0.067301
...                  ...         ...       ...         ...
473  2023-09-29 20:30:00    0.881409  0.306496    0.180971
472  2023-10-02 14:30:00    0.794870  0.171930    0.194260
471  2023-10-02 15:00:00    0.755034  0.144825    0.184963
470  2023-10-02 15:30:00    0.762818  0.113712    0.166506
469  2023-10-02 16:00:00    0.764192  0.092697    0.157100

[1863 rows x 4 columns]


## Environment

In [103]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import gymnasium
from gymnasium import spaces
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


class DDPGTradingEnv(gymnasium.Env):

  def __init__(self, df):

    self.df = df

    # Extract prices and features
    self.prices = df['Last Price'].values
    # features = df[['Last Price', 'Volume', 'SMAVG (15)']].values
    self.features = df[['Last Price']].values

    # Negatives mean quantity to sell, 0 is hold, positives mean buy
    self.action_space = spaces.Box(low=-1, high=1, shape=(1, ), dtype=np.float32)
    # self.observation_space = spaces.Box(low=0, high=1, shape=self.shape, dtype=np.float32)

    self.reset()

  def reset(self, seed=None):
    super().reset(seed=1)
    self.current_step = 0
    self.total_profit = 0
    self.initial_balance = 10000
    self.cash_balance = self.initial_balance
    self.shares_held = 0
    self.portfolio_value = self.cash_balance
    self.previous_portfolio_value = self.portfolio_value
    # For plotting
    self.portfolio_history = [self.portfolio_value]

  def get_observation(self):
    current_price = self.prices[self.current_step]
    return [current_price, self.shares_held, self.cash_balance]

  def take_action(self, action_value):
    current_price = self.prices[self.current_step]
    if action_value > 0:
      self.buy_stock(action_value, current_price)
    elif action_value < 0:
      self.sell_stock(action_value, current_price)
    # Update the portfolio value
    self.previous_portfolio_value = self.portfolio_value
    self.portfolio_value = self.cash_balance + (self.shares_held * current_price)

  def buy_stock(self, num_stocks, current_price):
    self.cash_balance -= (num_stocks * current_price)
    self.shares_held += num_stocks

  def sell_stock(self, num_stocks, current_price):
    self.balance += (num_stocks * current_price)
    self.shares_held -= num_stocks

  def step(self, action):
    self.current_step += 1
    self.take_action(action)
    reward = self.calculate_reward(action)
    terminated = (self.current_step >= len(self.prices) - 1)
    observation = self.get_observation()
    info = {
        'current_step': self.current_step,
        'portfolio_value': self.portfolio_value
    }
    self.portfolio_history.append(self.portfolio_value)
    return observation, reward, terminated, info

  def render(self, mode='human'):
    if mode == 'human':
      print(f"Step: {self.current_step}, Portfolio Value: {self.portfolio_value}")

  # The reward is the change in portfolio value the next time_step
  def calculate_reward(self):
    reward = self.current_portfolio_value - self.previous_portfolio_value
    return reward

tensor([], size=(0, 1), grad_fn=<AddmmBackward0>)




In [None]:
# Intialise environment
env = DDPGTradingEnv(df)

# Example usage in your DDPGTradingEnv
state_dim = env.observation_space.shape  # Adjust based on your environment's state shape
action_dim = env.action_space.shape  # Adjust based on your environment's action shape

actor_model = Actor(input_dim=state_dim, output_dim=action_dim)
critic_model = Critic(input_dim=state_dim, output_dim=1)

# Example forward pass
state = torch.FloatTensor(
    env._get_observation())  # Assuming _get_observation() returns the state
action = actor_model.forward(state)
value_estimate = critic_model.forward(state)

print(value_estimate)

## DDPG Classes

### Actor Class

In [None]:
class Actor(nn.Module):

  def __init__(self, input_dim, output_dim, hidden_size=128):
    super(Actor, self).__init__()

    input_dim = input_dim[0]
    output_dim = output_dim[0]

    self.fc0 = nn.Linear(1, input_dim)
    self.fc1 = nn.Linear(input_dim, hidden_size)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, output_dim)
    self.tanh = nn.Tanh()

  def forward(self, state):
    x = self.relu(self.fc0(state))
    x = self.relu(self.fc1(x))
    x = self.tanh(self.fc2(x))
    return x

### Critic Class

In [None]:
class Critic(nn.Module):

  def __init__(self, input_dim, output_dim, hidden_size=128):
    super(Critic, self).__init__()

    input_dim = input_dim[0]
    self.fc0 = nn.Linear(1, input_dim)
    self.fc1 = nn.Linear(input_dim, hidden_size)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, output_dim)

  def forward(self, state):
    x = self.relu(self.fc0(state))
    x = self.relu(self.fc1(x))
    x = self.fc2(x)
    return x

## Training

In [None]:
import matplotlib.pyplot as plt

# Plotting the profit change
plt.plot(env.portfolio_history)
plt.title("Profit Change Over Time")
plt.xlabel("Steps")
plt.ylabel("Total Profit")
plt.show()