### Download and compute data

Launch this next code line, if you didn't download data yet

In [59]:
!python utils/dataHandler.py
!python utils/datasetHandler.py


Annual Volatility :
USO     0.375213
URTH    0.169749
GLD     0.145473
dtype: float64
  return Index(sequences[0], name=names)
  return Index(sequences[0], name=names)


In [60]:
from env import OptiFolioEnv

from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Dirichlet

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor

import torch

In [100]:
dataset_path = "data/dataset/dataset.csv"
dataset_return_path = "data/returns/data_returns.csv"
df = pd.read_csv(dataset_path, index_col="Date")
return_df = pd.read_csv(dataset_return_path, index_col="Date")

train_df, test_df = train_test_split(df,test_size=0.2, shuffle=False)

train_df.to_csv("data/dataset/train/train.csv")
test_df.to_csv("data/dataset/train/test.csv")

In [101]:
class PPOActorCritic(nn.Module):
    def __init__(self, input_shape, action_dim):
        super().__init__()

        self.input_dim = input_shape[0] * input_shape[1]
        self.action_dim = action_dim

        self.shared = nn.Sequential(
            nn.Linear(self.input_dim, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
        )

        self.actor_alpha = nn.Sequential(
            nn.Linear(256, action_dim),
            nn.Softplus()  
        )

        self.critic = nn.Linear(256, 1)

        self._init_weights()

    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.orthogonal_(m.weight, gain=1.0)
                nn.init.constant_(m.bias, 0)

        nn.init.orthogonal_(self.actor_alpha[0].weight, gain=0.01)

    def forward(self, x):
        if x.dim() == 2:
            x = x.unsqueeze(0)

        x = x.reshape(x.size(0), -1)
        features = self.shared(x)

        alpha = self.actor_alpha(features) + 1e-6
        value = self.critic(features)

        return alpha, value

    def get_action(self, x):
        alpha, value = self.forward(x)
        dist = Dirichlet(alpha)

        action = dist.sample()
        log_prob = dist.log_prob(action)

        return action, log_prob, value

    def evaluate_actions(self, x, actions):
        alpha, value = self.forward(x)
        dist = Dirichlet(alpha)

        log_probs = dist.log_prob(actions)
        entropy = dist.entropy()

        return log_probs, entropy, value

In [None]:
train_env = OptiFolioEnv(dataframe=train_df, return_data=return_df, initial_amount=1000, lookback=62)
test_env = OptiFolioEnv(dataframe=test_df, return_data=return_df, initial_amount=1000, lookback=62)

m_train_env = Monitor(train_env)
m_test_env = Monitor(test_env)

model = PPO(
    policy="MlpPolicy", 
    env=m_train_env,
    learning_rate=1e-4,
    n_steps=2048,           
    batch_size=64,          
    gamma=0.99,             
    verbose=0,
    tensorboard_log="./ppo_trading_logs/"
)

#print("Début de l'apprentissage...")
#model.learn(total_timesteps=3_000_000, progress_bar=True)

#model.save("model/ppo_optifolio_v1")

print("Test du modèle...")
model.load("model/ppo_optifolio_v1")
obs, info = m_test_env.reset()
done = False
history_portfolio = []

while not done:
    action, _states = model.predict(obs, deterministic=True)
    weights = action / (np.sum(action) + 1e-8) 
    
    obs, reward, terminated, truncated, info = m_test_env.step(weights)
    history_portfolio.append(info['portfolio_value'])
    history_portfolio.append(info['weights'])
    done = terminated or truncated

print(f"Valeur finale du portefeuille : {info['portfolio_value']:.2f}")

Test du modèle...
Valeur finale du portefeuille : 1345.53
Poids Finaux : [0. 1. 0.]


In [127]:
import pandas as pd
import numpy as np

clean_weights = []
clean_values = []

for item in history_portfolio:
    if np.isscalar(item) or (isinstance(item, np.ndarray) and item.ndim == 0):
        clean_values.append(item)
    
    elif isinstance(item, (list, np.ndarray, tuple)):
        if hasattr(item, 'tolist'):
            clean_weights.append(item.tolist())
        else:
            clean_weights.append(item)

weights_df = pd.DataFrame(clean_weights)

weights_df.columns = ["USO","URTH","GLD"]

aligned_index_weights = test_df.index[-len(weights_df):]
weights_df.index = aligned_index_weights

weights_df.to_csv("model/results/Weights_evo.csv")

if len(clean_values) > 0:
    portfolio_df = pd.DataFrame(clean_values, columns=['Portfolio_Value'])
    
    aligned_index_vals = test_df.index[-len(portfolio_df):]
    portfolio_df.index = aligned_index_vals
    
    portfolio_df.to_csv("model/results/Portfolio_evo.csv")