1. Fetch Data Using Yahoo Finance
We will fetch historical stock prices for the assets in our portfolio.

In [1]:
import yfinance as yf
import pandas as pd

# List of tickers for the assets in the portfolio
tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA']

# Fetch historical data for the specified tickers
data = yf.download(tickers, start='2020-01-01', end='2023-01-01')['Adj Close']

# Fill missing data
data = data.fillna(method='ffill').fillna(method='bfill')

# Save the data to a CSV file for later use
data.to_csv('data.csv')


[*********************100%%**********************]  5 of 5 completed
  data = data.fillna(method='ffill').fillna(method='bfill')


2. Define the Custom Environment:

The custom environment will remain largely the same, but it will now use the Yahoo Finance data we fetched.

In [9]:
import gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback
import torch
import torch.nn as nn
import torch.nn.functional as F

class PortfolioEnv(gym.Env):
    def __init__(self, data, initial_cash=10000, transaction_cost=0.0025):
        super(PortfolioEnv, self).__init__()
        self.data = data
        self.initial_cash = initial_cash
        self.transaction_cost = transaction_cost
        self.current_step = 0
        self.num_assets = data.shape[1]
        self.action_space = gym.spaces.Box(low=0, high=1, shape=(self.num_assets + 1,), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(self.num_assets + 1,), dtype=np.float32)
        self.reset()

    def reset(self):
        self.cash = self.initial_cash
        self.asset_holdings = np.zeros(self.num_assets)
        self.current_step = 0
        return self._get_observation()

    def step(self, action):
        action = action / np.sum(action)  # Normalize the action to sum to 1
        asset_prices = self.data.iloc[self.current_step].values
        portfolio_value = self.cash + np.sum(self.asset_holdings * asset_prices)
        self.cash = portfolio_value * action[0]
        self.asset_holdings = (portfolio_value * action[1:]) / asset_prices
        self.current_step += 1
        reward = self._get_reward()
        done = self.current_step >= len(self.data) - 1
        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        asset_prices = self.data.iloc[self.current_step].values
        return np.concatenate(([self.cash], asset_prices))

    def _get_reward(self):
        asset_prices = self.data.iloc[self.current_step].values
        portfolio_value = self.cash + np.sum(self.asset_holdings * asset_prices)
        return portfolio_value - self.initial_cash

# Prepare the data
data = pd.read_csv('data.csv', index_col=0, parse_dates=True)
env = DummyVecEnv([lambda: PortfolioEnv(data)])

# Define the policy network
class CustomPolicy(nn.Module):
    def __init__(self, observation_space, action_space):
        super(CustomPolicy, self).__init__()
        self.fc1 = nn.Linear(observation_space.shape[0], 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, action_space.shape[0])

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x), dim=-1)
        return x


# Train the RL agent
policy_kwargs = dict(
    features_extractor_class=CustomPolicy,
    features_extractor_kwargs=dict(observation_space=env.observation_space, action_space=env.action_space)
)

model = PPO('MlpPolicy', env, policy_kwargs=policy_kwargs, verbose=1)
eval_callback = EvalCallback(env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=500, deterministic=True, render=False)
model.learn(total_timesteps=10000, callback=eval_callback)

# Save the model
model.save("ppo_portfolio")

# Test the trained agent
obs = env.reset()
for _ in range(len(data) - 1):
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    if done:
        break


Using cpu device




TypeError: CustomPolicy.__init__() got multiple values for argument 'observation_space'