In [None]:
import ppo
import numpy as np

def my_trained_model(observation):
    n_assets = observation.shape[1]
    return np.zeros(n_assets)

# Experiment arguments
args = ppo.Args(
    exp_name="experiment_0",         # name of this experiment
    seed=1,                          # random seed
    torch_deterministic=True,        # if True, sets torch.backends.cudnn.deterministic = False
    cuda=True,                       # if True, CUDA will be enabled by default
    track=False,                     # if True, track experiment with Weights & Biases

    # Environment specific arguments
    n_assets=3,                      # number of assets in the portfolio
    window_size=5,                   # size of the historical‚Äêprice window (observation dimension)
    action_step_size=0.1,            # step size for actions
    episode_length=100,              # length of each episode
    reward_method="portfolio_value", # reward method: "portfolio_value" or "sharpe_ratio"
    g1=0.5,                          # weight for Sharpe ratio in combined reward
    g2=0.5,                          # weight for portfolio return in combined reward
    closing_prices=np.array([]),     # array of historical closing prices for all assets
    prediction_model=None,           # optional external model for price predictions

    # Algorithm specific arguments
    env_id="PortfolioEnv-v0",        # gym environment ID
    total_timesteps=500000,          # total timesteps for training
    learning_rate=2.5e-4,            # learning rate for the optimizer
    num_envs=4,                      # number of parallel environments
    num_steps=128,                   # steps per environment rollout before update
    anneal_lr=True,                  # if True, linearly anneals learning rate
    gamma=0.99,                      # discount factor for rewards
    gae_lambda=0.95,                 # lambda parameter for GAE (advantage estimation)
    num_minibatches=4,               # number of minibatches per update epoch
    update_epochs=4,                 # number of update epochs (K epochs in PPO)
    norm_adv=True,                   # if True, normalize advantages
    clip_coef=0.2,                   # clipping coefficient for PPO surrogate objective
    clip_vloss=True,                 # if True, clip value function updates
    ent_coef=0.01,                   # entropy bonus coefficient
    vf_coef=0.5,                     # value loss coefficient
    max_grad_norm=0.5,               # max gradient norm (gradient clipping)
    target_kl=None,                  # KL divergence threshold for early stopping

    # Computed at runtime
    batch_size=0,                    # batch size (computed internally from num_envs & num_steps)
    minibatch_size=0,                # minibatch size (computed internally)
    num_iterations=0                 # number of iterations (computed internally)
)


In [None]:
ppo.main(args)

# Evaluate model

In [2]:
print("Hello")

Hello
