In [10]:
import pandas as pd

# Load dataset
file_path = "train_data.csv"  # Replace with actual filename
df = pd.read_csv(file_path, parse_dates=["date"])

# List of 10 stocks to keep
selected_stocks = ["AAPL", "MSFT", "TSLA", "AMZN", "GOOGL", "NVDA", "META", "JPM", "V", "UNH"]

# Filter only required stocks
df = df[df["tic"].isin(selected_stocks)]

# Pivot table to have stock symbols as columns and dates as index
df_pivot = df.pivot(index="date", columns="tic", values="close")

# Sort by date
df_pivot = df_pivot.sort_index()

# Save to CSV
df_pivot.to_csv("stock_prices.csv")

print("Processed data saved as stock_prices.csv")

Processed data saved as stock_prices.csv


In [12]:
d = pd.read_csv("stock_prices.csv")
d.head()

Unnamed: 0,date,AAPL,JPM,MSFT,UNH,V
0,2009-01-02,2.758536,22.201174,15.200124,22.703112,12.079281
1,2009-01-05,2.874956,20.714014,15.342181,22.332815,12.165176
2,2009-01-06,2.827537,21.16016,15.52162,21.806171,13.021852
3,2009-01-07,2.766438,19.892542,14.587044,21.641588,12.739308
4,2009-01-08,2.81781,19.276423,15.043111,21.978975,12.603688


In [14]:
d.shape

(2893, 6)

#DDPG Stock Allocation

In [None]:
!pip install stable-baselines3

In [None]:
!pip install --upgrade stable-baselines3 gym

In [None]:
!pip install shimmy

In [1]:
import numpy as np
import gym
import gym.spaces
from stable_baselines3 import DDPG
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.vec_env import DummyVecEnv

In [2]:
class AllocationEnv(gym.Env):
    def __init__(self, stock_data, initial_balance=10000):
        super(AllocationEnv, self).__init__()
        self.stock_data = stock_data  # DataFrame with historical prices
        self.initial_balance = initial_balance
        self.current_step = 0
        self.balance = initial_balance
        self.allocation = np.zeros(len(stock_data.columns))  # Fund allocation percentages

        # Action space: percentage allocation for each stock
        self.action_space = gym.spaces.Box(low=0, high=1, shape=(len(stock_data.columns),), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(len(stock_data.columns) + 1,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.balance = self.initial_balance
        self.allocation = np.zeros(len(self.stock_data.columns))
        return self._next_observation()

    def _next_observation(self):
        prices = self.stock_data.iloc[self.current_step].values
        return np.append(prices, self.balance)

    def step(self, action):
        """Executes allocation based on the action and calculates portfolio value."""
        action = np.clip(action, 0, 1)  # Ensure valid allocations
        action /= np.sum(action)  # Normalize to sum to 1

        prices = self.stock_data.iloc[self.current_step].values
        portfolio_value = self.balance * action
        self.allocation = portfolio_value / prices  # Shares allocated per stock

        self.current_step += 1
        done = self.current_step >= len(self.stock_data) - 1
        reward = np.sum(self.allocation * prices)  # Portfolio value as reward
        return self._next_observation(), reward, done, {}


In [6]:
import pandas as pd
stock_data = pd.read_csv("stock_prices.csv", index_col=0)  # Format: Columns = Stocks, Rows = Dates

# Create environment
env = DummyVecEnv([lambda: AllocationEnv(stock_data)])

# Set up DDPG with noise for exploration
action_noise = NormalActionNoise(mean=np.zeros(len(stock_data.columns)), sigma=0.1 * np.ones(len(stock_data.columns)))
model = DDPG("MlpPolicy", env, action_noise=action_noise, verbose=1)

# Train the model
model.learn(total_timesteps=100000)

# Save trained model
model.save("ddpg_allocation_model")



Using cpu device
----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 36        |
|    time_elapsed    | 319       |
|    total_timesteps | 11568     |
| train/             |           |
|    actor_loss      | -4.31e+05 |
|    critic_loss     | 1.66e+06  |
|    learning_rate   | 0.001     |
|    n_updates       | 11467     |
----------------------------------
----------------------------------
| time/              |           |
|    episodes        | 8         |
|    fps             | 36        |
|    time_elapsed    | 637       |
|    total_timesteps | 23136     |
| train/             |           |
|    actor_loss      | -6.78e+05 |
|    critic_loss     | 8.34e+05  |
|    learning_rate   | 0.001     |
|    n_updates       | 23035     |
----------------------------------
----------------------------------
| time/              |           |
|    episodes        | 12        |
|    fps             | 36        |
|  

#PPO Stock Buy/Sell Signals

In [7]:
from stable_baselines3 import PPO

In [8]:
class TradingEnv(gym.Env):
    def __init__(self, stock_data, initial_balance=10000):
        super(TradingEnv, self).__init__()
        self.stock_data = stock_data  # DataFrame with 'Close' prices per stock
        self.initial_balance = initial_balance
        self.current_step = 0
        self.balance = initial_balance
        self.holdings = np.zeros(len(stock_data.columns))  # Stocks held

        # Define action & observation spaces
        self.action_space = gym.spaces.Box(low=-1, high=1, shape=(len(stock_data.columns),), dtype=np.float32)  # Buy (-1) or Sell (+1)
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(len(stock_data.columns) + 1,), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.balance = self.initial_balance
        self.holdings = np.zeros(len(self.stock_data.columns))
        return self._next_observation()

    def _next_observation(self):
        prices = self.stock_data.iloc[self.current_step].values
        return np.append(prices, self.balance)

    def step(self, action):
        """Executes trades based on the action and updates the portfolio."""
        prices = self.stock_data.iloc[self.current_step].values
        for i in range(len(self.holdings)):
            if action[i] > 0:  # Buy
                num_shares = (self.balance * action[i]) / prices[i]
                self.holdings[i] += num_shares
                self.balance -= num_shares * prices[i]
            elif action[i] < 0:  # Sell
                num_shares = min(abs(action[i]) * self.holdings[i], self.holdings[i])
                self.balance += num_shares * prices[i]
                self.holdings[i] -= num_shares

        self.current_step += 1
        done = self.current_step >= len(self.stock_data) - 1
        reward = self.balance + np.sum(self.holdings * prices)  # Portfolio value
        return self._next_observation(), reward, done, {}


In [9]:
stock_data = pd.read_csv("stock_prices.csv", index_col=0)  # Format: Columns = Stocks, Rows = Dates

# Create environment
env = DummyVecEnv([lambda: TradingEnv(stock_data)])

# Train PPO model
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=100000)

# Save trained model
model.save("ppo_trading_model")



Using cpu device
-----------------------------
| time/              |      |
|    fps             | 826  |
|    iterations      | 1    |
|    time_elapsed    | 2    |
|    total_timesteps | 2048 |
-----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 720           |
|    iterations           | 2             |
|    time_elapsed         | 5             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 2.0800799e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -7.09         |
|    explained_variance   | 5.96e-08      |
|    learning_rate        | 0.0003        |
|    loss                 | 1.52e+11      |
|    n_updates            | 10            |
|    policy_gradient_loss | -9.05e-05     |
|    std                  | 1             |
|    value_loss          