In [None]:
import gymnasium as gym
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise
from tensorflow.keras.models import load_model

# Load LSTM model and dataset
lstm_model = load_model('lstm_price_predictor.h5')
data=pd.read_csv("C:\\Users\\RST\Desktop\\Minor project\\Model 3\\combined_final2.csv")

# Drop irrelevant columns
data = data.drop(columns=['% Change', '% Change vs Average', 'Date'])

# Fit the scaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[['Open', 'High', 'Low', 'Close']])

# Define the LSTM prediction function
def predict_future_prices(lstm_model, recent_data, scaler):
    scaled_recent_data = scaler.transform(recent_data)
    input_data = np.array([scaled_recent_data[-60:]])  # Last 60 time steps
    input_data = input_data.reshape((1, input_data.shape[1], input_data.shape[2]))
    
    predicted_price = lstm_model.predict(input_data)
    predicted_price = scaler.inverse_transform([[0, 0, 0, predicted_price[0][0]]])[0][3]
    return predicted_price

# Batch-based LSTM prediction function
def predict_future_prices_batch(lstm_model, recent_data_batch, scaler):
    scaled_batch = [scaler.transform(data) for data in recent_data_batch]
    scaled_batch = np.array([data[-60:] for data in scaled_batch])  # Last 60 time steps in batch
    scaled_batch = scaled_batch.reshape((len(scaled_batch), 60, 4))  # Reshape for LSTM input

    # Predict prices for the entire batch
    predicted_batch = lstm_model.predict(scaled_batch)
    
    # Inverse scale the predicted prices
    predicted_prices = [scaler.inverse_transform([[0, 0, 0, pred[0]]])[0][3] for pred in predicted_batch]
    return predicted_prices

# Custom Stock Trading Environment
class StockTradingEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, data, lstm_model, scaler, batch_size, render_mode=None):
        super(StockTradingEnv, self).__init__()
        self.data = data
        self.lstm_model = lstm_model
        self.scaler = scaler
        self.batch_size = batch_size  # Batch size for processing data
        self.current_step = 0

        # Continuous action space for DDPG
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)

        # Observation space, including future price prediction
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(300,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        self.current_step = 0
        if seed is not None:
            np.random.seed(seed)
        return self._next_observation(), {}

    def step(self, action):
        obs, info = env.reset()
        
        # Simulation loop
        for step in range(len(data) - 60):
            # Get the recent market data (last 60 time steps)
            recent_data = data[step:step + 60]
        
            # Use LSTM to predict future price
            predicted_price = predict_future_prices(lstm_model, recent_data, scaler)
        
            # Use predicted price to make a decision (buy, sell, hold)
            action, _states = ddpg_model.predict(obs)
        
            # Execute the trade and move to the next state
            obs, reward, done, info = env.step(action)  # Unpack only 4 values
        
            if done:
                print("End of simulation")
                break
        
        # Close the environment properly
        env.close()

    def _next_observation(self):
        # Process data in batches to avoid memory issues
        start = self.current_step
        end = self.current_step + self.batch_size
        recent_data_batch = [self.data[i:i + 60] for i in range(start, end)]
        
        predicted_prices = predict_future_prices_batch(self.lstm_model, recent_data_batch, self.scaler)

        # Creating a batch of observations, flattening them for DDPG input
        obs_batch = []
        for i in range(len(recent_data_batch)):
            obs = np.hstack((recent_data_batch[i].values, np.full((60, 1), predicted_prices[i])))
            obs_batch.append(obs.flatten())
        
        return np.mean(obs_batch, axis=0)  # Return the mean of the batch as observation

    def _take_action(self, action_type):
        # Implement trading logic based on the action_type: buy, sell, or hold
        return 0

# Initialize environment
batch_size = 10  # Adjust this based on your system's memory capacity
env = StockTradingEnv(data, lstm_model, scaler, batch_size=batch_size)

# Add noise for exploration
n_actions = env.action_space.shape[0]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Create DDPG model
ddpg_model = DDPG('MlpPolicy', env, action_noise=action_noise, verbose=1)

# Train the DDPG model in batches
ddpg_model.learn(total_timesteps=10000)

# Save the trained model
ddpg_model.save("ddpg_stock_trader_batched")

# Close environment after training
env.close()


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [None]:
import gymnasium as gym
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise
from tensorflow.keras.models import load_model

# Load LSTM model and dataset
lstm_model = load_model('lstm_price_predictor.h5')
data = pd.read_csv('C:\\Users\\RST\\Desktop\\Minor project\\combined_final1.csv')

# Drop irrelevant columns
data = data.drop(columns=['% Change', '% Change vs Average', 'Date'])

# Fit the scaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data[['Open', 'High', 'Low', 'Close']])

# Define the LSTM prediction function
def predict_future_prices(lstm_model, recent_data, scaler):
    scaled_recent_data = scaler.transform(recent_data)
    input_data = np.array([scaled_recent_data[-60:]])  # Last 60 time steps
    input_data = input_data.reshape((1, input_data.shape[1], input_data.shape[2]))
    
    predicted_price = lstm_model.predict(input_data)
    predicted_price = scaler.inverse_transform([[0, 0, 0, predicted_price[0][0]]])[0][3]
    return predicted_price

# Custom Stock Trading Environment
class StockTradingEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, data, lstm_model, scaler, render_mode=None):
        super(StockTradingEnv, self).__init__()
        self.data = data
        self.lstm_model = lstm_model
        self.scaler = scaler
        self.current_step = 0

        # Define a continuous action space: Buy, Sell, Hold now become continuous
        # [-1, 1] range can represent: -1 (sell full), 1 (buy full), 0 (hold)
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)

        # Observation space remains the same
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(300,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        self.current_step = 0
        if seed is not None:
            np.random.seed(seed)
        return self._next_observation(), {}

    def step(self, action):
        obs, info = env.reset()
        
        # Simulation loop
        for step in range(len(data) - 60):
            # Get the recent market data (last 60 time steps)
            recent_data = data[step:step + 60]
        
            # Use LSTM to predict future price
            predicted_price = predict_future_prices(lstm_model, recent_data, scaler)
        
            # Use predicted price to make a decision (buy, sell, hold)
            action, _states = ddpg_model.predict(obs)
        
            # Execute the trade and move to the next state
            obs, reward, done, info = env.step(action)  # Unpack only 4 values
        
            if done:
                print("End of simulation")
                break
        
        # Close the environment properly
        env.close()


    def _next_observation(self):
        recent_data = self.data[self.current_step:self.current_step + 60]
        predicted_price = predict_future_prices(self.lstm_model, recent_data, self.scaler)
        obs = np.hstack((recent_data.values, np.full((60, 1), predicted_price)))
        return obs.flatten()

    def _take_action(self, action_type):
        # Implement trading logic based on the action_type: buy, sell, or hold
        return 0


# Initialize environment
env = StockTradingEnv(data, lstm_model, scaler)

# Add noise for exploration
n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Create DDPG model
ddpg_model = DDPG('MlpPolicy', env, action_noise=action_noise, verbose=1)

# Train the DDPG model
ddpg_model.learn(total_timesteps=1000)

# Save the trained model
ddpg_model.save("ddpg_stock_trader_new")

# Close environment after training
env.close()


In [None]:
class StockTradingEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, data, lstm_model, scaler, render_mode=None):
        super(StockTradingEnv, self).__init__()
        self.data = data
        self.lstm_model = lstm_model
        self.scaler = scaler
        self.current_step = 0

        # Define a continuous action space: Buy, Sell, Hold now become continuous
        # [-1, 1] range can represent: -1 (sell full), 1 (buy full), 0 (hold)
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)

        # Observation space remains the same
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(300,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        self.current_step = 0
        if seed is not None:
            np.random.seed(seed)
        return self._next_observation(), {}

    def step(self, action):
        obs, info = env.reset()
        
        # Simulation loop
        for step in range(len(data) - 60):
            # Get the recent market data (last 60 time steps)
            recent_data = data[step:step + 60]
        
            # Use LSTM to predict future price
            predicted_price = predict_future_prices(lstm_model, recent_data, scaler)
        
            # Use predicted price to make a decision (buy, sell, hold)
            action, _states = ddpg_model.predict(obs)
        
            # Execute the trade and move to the next state
            obs, reward, done, info = env.step(action)  # Unpack only 4 values
        
            if done:
                print("End of simulation")
                break
        
        # Close the environment properly
        env.close()


    def _next_observation(self):
        recent_data = self.data[self.current_step:self.current_step + 60]
        predicted_price = predict_future_prices(self.lstm_model, recent_data, self.scaler)
        obs = np.hstack((recent_data.values, np.full((60, 1), predicted_price)))
        return obs.flatten()

    def _take_action(self, action_type):
        # Implement trading logic based on the action_type: buy, sell, or hold
        return 0


In [None]:
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise

# Add noise for exploration
n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

# Create DDPG model
ddpg_model = DDPG('MlpPolicy', env, action_noise=action_noise, verbose=1)

# Train the DDPG model
ddpg_model.learn(total_timesteps=10000)

# Save the trained model
ddpg_model.save("ddpg_stock_trader_continuous")

# Close the environment after training
env.close()
