In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install stable-baselines3 Shimmy #RL packages; Shimmy dependency for stable-baselines

Collecting stable-baselines3
  Downloading stable_baselines3-2.3.1-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.2/182.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting Shimmy
  Downloading Shimmy-1.3.0-py3-none-any.whl (37 kB)
Collecting gymnasium<0.30,>=0.28.1 (from stable-baselines3)
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m29.8 MB/s[0m eta [36m0:00:00[0m
Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable-baselines3)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13->stable-baselines3)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13->stable-baselines3)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-m

In [None]:
#Processing libraries
import numpy as np
import pandas as pd

#Gym stuff
import gym
from gym import spaces

# Stable baselines - rl stuff
from stable_baselines3 import A2C
from stable_baselines3.common.sb2_compat.rmsprop_tf_like import RMSpropTFLike

#Ignore the warnings
import warnings
warnings.filterwarnings('ignore')

#Set the seed for reproducibility
seed = 21
np.random.seed(seed)

class PortfolioEnv(gym.Env):
  def __init__(self, data, risk_free_rate=0):
    self.data = data
    self.current_step = 0
    self.risk_free_rate = risk_free_rate
    self.n_stocks = len(data.columns)
    self.action_space = spaces.Box(low=0, high=1, shape=(self.n_stocks,), dtype=np.float32)  # Weights between 0 and 1
    self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.n_stocks,), dtype=np.float32)

  def reset(self):
    self.current_step = 0
    return self.data.iloc[self.current_step,:]

  def step(self, action):
    # Normalize action weights to ensure they sum to 1
    action /= np.sum(action)

    # Get next day data
    next_day_data = self.data.iloc[self.current_step + 1,:]

    # Calculate daily returns
    daily_returns = (next_day_data - self.data.iloc[self.current_step,:]) / self.data.iloc[self.current_step,:]

    # Calculate portfolio return based on weights
    portfolio_return = np.dot(action, daily_returns)

    # Calculate excess return (return - risk-free rate)
    excess_return = portfolio_return - self.risk_free_rate

    # Calculate volatility (standard deviation of daily returns)
    window_size = 21  # Adjust window size as needed
    volatility = np.std(self.data.iloc[self.current_step-window_size+1:self.current_step+1,:].pct_change()) * np.sqrt(252)  # Annualize

    # Calculate Sharpe Ratio
    sharpe_ratio = excess_return / volatility

    # No reward as we're not buying/selling
    reward = 0.0

    # Update state (current day data)
    self.current_step += 1
    next_state = self.data.iloc[self.current_step,:]

    # Done flag (check if all data is consumed)
    done = self.current_step == len(self.data) - 1

    # Info (contains portfolio weights and Sharpe Ratio)
    info = {"weights": action, "sharpe_ratio": sharpe_ratio}

    return next_state, reward, done, info

  and should_run_async(code)


In [None]:
# Load historical price data
data = pd.read_csv("/content/drive/MyDrive/portfolio_optimization_data/sample_data_5_tic.csv", index_col="Date", parse_dates=True)
data.replace(np.nan, 0, inplace=True) ## ??
data.head()

# Create environment
env = PortfolioEnv(data)
env.seed(seed)

#Initialize our model and train
policy_kwargs = dict(optimizer_class=RMSpropTFLike, optimizer_kwargs=dict(eps=1e-5))
actor_critic = A2C('MlpPolicy', env, policy_kwargs=policy_kwargs, verbose=1)
actor_critic.learn(total_timesteps=100000)

  deprecation(


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  action /= np.sum(action)


------------------------------------
| time/                 |          |
|    fps                | 223      |
|    iterations         | 100      |
|    time_elapsed       | 2        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -7.09    |
|    explained_variance | -6.96    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.0107  |
|    std                | 1        |
|    value_loss         | 2.77e-06 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 165      |
|    iterations         | 200      |
|    time_elapsed       | 6        |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -7.09    |
|    explained_variance | -0.312   |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | -0.00763 |
|

<stable_baselines3.a2c.a2c.A2C at 0x7c432410f010>

In [None]:
# Use the trained RL agent to predict actions based on the environment's state
state = env.reset()
done = False
while not done:
    # Get action from the RL agent based on the current state
    action, _ = actor_critic.predict(state)

    # Check if the sum of action weights is not zero
    if np.sum(action) != 0:
        # Normalize the action weights to ensure they sum to 1
        action /= np.sum(action)
    else:
        # If the sum is zero, set all weights to equal distribution
        action = np.ones_like(action) / len(action)

    # Take a step in the environment based on the predicted action
    next_state, reward, done, info = env.step(action)

    # Update the state for the next iteration
    state = next_state

# Extract and print the optimal weights from the predicted actions
optimal_weights = action
print("Optimal Portfolio Weights:")
print(optimal_weights)

Optimal Portfolio Weights:
[0.32827896 0.13090199 0.32827896 0.         0.2125401 ]


In [None]:
# Extract the optimal weights and other information from the environment's info
tickers = data.columns.tolist()
returns = []
volatility = []
sharpe_ratio = []
optimal_weights = action.tolist()

for i, column in enumerate(data.columns):
    asset_returns = data[column].dropna()
    asset_mean_return = asset_returns.mean() * 252
    asset_volatility = asset_returns.std() * np.sqrt(252)
    asset_sharpe = asset_mean_return / asset_volatility
    returns.append(asset_mean_return)
    volatility.append(asset_volatility)
    sharpe_ratio.append(asset_sharpe)

# Create a DataFrame to display the asset information
asset_df = pd.DataFrame({
    'Tickers': tickers,
    'Returns': returns,
    'Volatility': volatility,
    'Sharpe Ratio': sharpe_ratio,
    'Optimal Weights': optimal_weights
})

# Set the "Tickers" column as the index of the DataFrame
asset_df.set_index('Tickers', inplace=True)

# Print the DataFrame
asset_df

Unnamed: 0_level_0,Returns,Volatility,Sharpe Ratio,Optimal Weights
Tickers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAPL,5118.924207,687.739461,7.443115,0.328279
AMZN,5574.315524,701.816994,7.942691,0.130902
GOOG,4936.936898,552.015684,8.943472,0.328279
MSFT,11054.123509,1280.225383,8.634514,0.0
TSLA,5835.042263,1059.243434,5.508689,0.21254


In [None]:
# Calculate portfolio return and risk
portfolio_return = np.dot(returns, optimal_weights)
portfolio_volatility = np.sqrt(np.dot(optimal_weights, np.dot(data.cov() * 252, optimal_weights)))

# Print portfolio return and risk
print("Portfolio Return:", portfolio_return)
print("Portfolio Risk:", portfolio_volatility)

Portfolio Return: 5270.99711630816
Portfolio Risk: 706.7548183892338
