# Portfolio Optimization with PPO
This notebook is used to train a PPO agent to optimize a portfolio using my custom environment `optiFolioEnv`.
We will:
- Load the dataset
- Initialize and wrap the environment
- Train a PPO agent
- Evaluate the trained agent


In [None]:
!python utils\dataHandler.py

In [None]:
import os
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from env.optiFolioEnv import optiFolioEnv

# Create the environment using the dataset
env = optiFolioEnv("data/dataset/dataset.csv")

# Create a directory for TensorBoard logs
log_dir = "ppo_tensorboard"
os.makedirs(log_dir, exist_ok=True)

# Wrap environment to record metrics for training monitoring
env = Monitor(env, log_dir)

# Wrap environment in a DummyVecEnv for Stable-Baselines3 compatibility
env = DummyVecEnv([lambda: env])

# Initialize the PPO model
model_ppo = PPO(
    "MlpPolicy",      # Policy type (Multi-Layer Perceptron)
    env,              # Environment
    verbose=1,        # Print training info
    tensorboard_log=log_dir,  # Path for TensorBoard logs
    device="cuda"     # Use GPU
)

# Train the PPO agent
model_ppo.learn(
    total_timesteps=5_000_000,  # Number of steps to train
    tb_log_name="PPO_Portfolio" # Name for TensorBoard run
)

# Test the trained model
obs = env.reset()  # Reset environment to initial state
done = False
while not done:
    # Predict action using the trained model
    action, _ = model_ppo.predict(obs)
    
    # Take a step in the environment using the predicted action
    obs, reward, terminated, info = env.step(action)
    done = terminated

# Print final portfolio statistics
print("Portfolio value:", info[0]["portfolio_value"])
print("Portfolio volatility:", info[0]["portfolio_vol"])
