In [1]:
import numpy as np
import torch

from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback
from stable_baselines3.common.vec_env import VecNormalize

In [2]:
from stochastic_proc.midprice import BrownianMidprice
from stochastic_proc.arrivals import PoissonArrivals, HawkesArrivals
from stochastic_proc.dynamics import LimitOrderDynamics
from envs.trading import TradingEnv
from agents.Agents import AvellanedaStoikovAgent, GLFTAgent
from rewards.RewardFunctions import PnLReward, InventoryQuadraticPenalty, SpreadRegularizer, SumReward
from utils.plot_single import run_and_log, plot_single_episode
from utils.plot_batch import simulate_batch, plot_batch
from utils.plot_lite import plot_trajectory, generate_results_table_and_hist, compare_poisson_vs_hawkes
from utils.calibration import hawkes_params
from utils.calibration import hawkes_params, glft_half_spreads
from envs.SB3tradingenv import SB3TradingVecEnv

In [3]:
SEED, T, M, s0, sigma, A, k_fill, N = 42, 1.0, 200, 100.0, 2.0, 140.0, 1.5, 1
dt = T / M
mid = BrownianMidprice(s0, sigma, num_traj=N, dt=dt, T=T, seed=SEED)
arr = PoissonArrivals(A, A, num_traj=N, dt=dt, T=T, seed=SEED)
dyn = LimitOrderDynamics(mid, arr, fill_k=k_fill, max_depth=20.0)

# Single-trajectory plotting (RL-style)
env = TradingEnv(dynamics=dyn, T=T, M=M, seed=SEED, return_vectorized=False)
agent = AvellanedaStoikovAgent(env, gamma=0.1)
plot_trajectory(env, agent, show_reservation=T)

# Batch stats & PnL hist (vectorized episode)
mid = BrownianMidprice(s0, sigma, num_traj=512, dt=dt, T=T, seed=SEED)
arr = PoissonArrivals(A, A, num_traj=512, dt=dt, T=T, seed=SEED)
dyn = LimitOrderDynamics(mid, arr, fill_k=k_fill, max_depth=20.0)
env_vec = TradingEnv(dynamics=dyn, T=T, M=M, seed=SEED, return_vectorized=True)
agent = AvellanedaStoikovAgent(env_vec, gamma=0.1)
results, fig, totals = generate_results_table_and_hist(env_vec, agent)
print(results)

: 

In [None]:
SEED = 42
T, M = 1.0, 200
s0, sigma = 100.0, 2.0
A, k_fill = 140.0, 1.5
max_depth = 20.0
dt = T / M

# ------------------ TRAIN ENV (parallel) ------------------
N_train = 256  # number of parallel trajectories inside one env step
mid_tr = BrownianMidprice(s0, sigma, num_traj=N_train, dt=dt, T=T, seed=SEED)
arr_tr = PoissonArrivals(A, A, num_traj=N_train, dt=dt, T=T, seed=SEED)
dyn_tr = LimitOrderDynamics(mid_tr, arr_tr, fill_k=k_fill, max_depth=max_depth)

env_train = TradingEnv(dynamics=dyn_tr, T=T, M=M, seed=SEED, return_vectorized=True)
vec_train = SB3TradingVecEnv(env_train)

# Optional but recommended: running normalization of obs/reward
vec_train = VecNormalize(vec_train, norm_obs=True, norm_reward=False, clip_obs=10.0)

# ------------------ EVAL ENV (smaller batch) ------------------
N_eval = 64
mid_ev = BrownianMidprice(s0, sigma, num_traj=N_eval, dt=dt, T=T, seed=SEED + 1)
arr_ev = PoissonArrivals(A, A, num_traj=N_eval, dt=dt, T=T, seed=SEED + 1)
dyn_ev = LimitOrderDynamics(mid_ev, arr_ev, fill_k=k_fill, max_depth=max_depth)

env_eval = TradingEnv(dynamics=dyn_ev, T=T, M=M, seed=SEED + 1, return_vectorized=True)
vec_eval = SB3TradingVecEnv(env_eval)
vec_eval = VecNormalize(vec_eval, norm_obs=True, norm_reward=False, clip_obs=10.0)
# crucial: sync running stats from train to eval (so obs normalization is consistent)
vec_eval.obs_rms = vec_train.obs_rms

# ------------------ PPO ------------------
policy_kwargs = dict(net_arch=[128, 128])  # small MLP is usually enough here
model = PPO(
    policy="MlpPolicy",
    env=vec_train,
    learning_rate=3e-4,
    n_steps=M,                # collect exactly one full episode per update per "env"
    batch_size=4096,          # adjust for your GPU/CPU RAM
    n_epochs=10,
    gamma=1.0,                # episodic PnL, no discount across time within episode
    gae_lambda=0.95,
    clip_range=0.2,
    vf_coef=0.5,
    ent_coef=0.0,
    seed=SEED,
    tensorboard_log="./tb_mm/",
    policy_kwargs=policy_kwargs,
    device="auto",
)

# Callbacks
eval_cb = EvalCallback(
    vec_eval,
    best_model_save_path="./ckpt_best/",
    log_path="./eval_logs/",
    eval_freq=10_000 // M * M,  # every ~10k steps, aligned to episodes
    n_eval_episodes=5,
    deterministic=True,
)
ckpt_cb = CheckpointCallback(save_freq=50_000, save_path="./ckpt/", name_prefix="ppo_mm")

# Train
total_timesteps = 1_000_000
model.learn(total_timesteps=total_timesteps, callback=[eval_cb, ckpt_cb])

# Save policy + VecNormalize stats
model.save("./ppo_mm_final")
vec_train.save("./ppo_mm_vecnorm.pkl")

Infinite horizon

In [None]:
SEED, T, M, s0, sigma, A, k_fill, N = 42, 1.0, 200, 100.0, 2.0, 140.0, 1.5, 1
dt = T / M
mid = BrownianMidprice(s0, sigma, num_traj=N, dt=dt, T=T, seed=SEED)
arr = PoissonArrivals(A, A, num_traj=N, dt=dt, T=T, seed=SEED)
dyn = LimitOrderDynamics(mid, arr, fill_k=k_fill, max_depth=20.0)

# Single-trajectory plotting (RL-style)
env = TradingEnv(dynamics=dyn, T=T, M=M, seed=SEED, return_vectorized=False)
agent_inf = AvellanedaStoikovAgent(env, gamma=0.8, mode="infinite", q_max=100)
plot_trajectory(env, agent_inf, show_reservation=True)

# Batch stats & PnL hist (vectorized episode)
mid = BrownianMidprice(s0, sigma, num_traj=512, dt=dt, T=T, seed=SEED)
arr = PoissonArrivals(A, A, num_traj=512, dt=dt, T=T, seed=SEED)
dyn = LimitOrderDynamics(mid, arr, fill_k=k_fill, max_depth=20.0)
env_vec = TradingEnv(dynamics=dyn, T=T, M=M, seed=SEED, return_vectorized=True)
agent_inf = AvellanedaStoikovAgent(env_vec, gamma=0.8, mode="infinite", q_max=100)
results, fig, totals = generate_results_table_and_hist(env_vec, agent_inf)
print(results)

Hawkes distribution

In [None]:
SEED, T, M, s0, sigma, A, k_fill, N = 42, 1.0, 200, 100.0, 2.0, 140.0, 1.5, 1
dt = T / M
mu, kappa, jump = hawkes_params(dt=dt, p_target=0.30, eta=0.6, memory_steps=10)

mid = BrownianMidprice(s0, sigma, num_traj=N, dt=dt, T=T, seed=SEED)
arr = HawkesArrivals(A, A, num_traj=N, dt=dt, T=T, seed=SEED, kappa=kappa, jump=jump)
dyn = LimitOrderDynamics(mid, arr, fill_k=k_fill, max_depth=20.0)

# Single-trajectory plotting (RL-style)
env = TradingEnv(dynamics=dyn, T=T, M=M, seed=SEED, return_vectorized=False)
agent = AvellanedaStoikovAgent(env, gamma=0.1)
plot_trajectory(env, agent, show_reservation=True)

# Batch stats & PnL hist (vectorized episode)
mid = BrownianMidprice(s0, sigma, num_traj=512, dt=dt, T=T, seed=SEED)
arr = HawkesArrivals(A, A, num_traj=N, dt=dt, T=T, kappa=kappa, jump=jump, seed=SEED)
dyn = LimitOrderDynamics(mid, arr, fill_k=k_fill, max_depth=20.0)
env_vec = TradingEnv(dynamics=dyn, T=T, M=M, seed=SEED, return_vectorized=True)
agent = AvellanedaStoikovAgent(env_vec, gamma=0.1)
results, fig, totals = generate_results_table_and_hist(env_vec, agent)
print(results)

GLFT

In [None]:
SEED, T, M, s0, sigma, A, k_fill, N, k = 42, 1.0, 200, 100.0, 2.0, 140.0, 1.5, 1, 0.5
dt = T / M
mid = BrownianMidprice(s0, sigma, num_traj=N, dt=dt, T=T, seed=SEED)
arr = PoissonArrivals(A, A, num_traj=N, dt=dt, T=T, seed=SEED)
dyn = LimitOrderDynamics(mid, arr, fill_k=k_fill, max_depth=20.0)

# Single-trajectory plotting (RL-style)
env = TradingEnv(dynamics=dyn, T=T, M=M, seed=SEED, return_vectorized=False)
agent = GLFTAgent(env, gamma=0.1, A=A, k=k, xi=1.0, tick=1.0)
plot_trajectory(env, agent, show_reservation=True)

# Batch stats & PnL hist (vectorized episode)
mid = BrownianMidprice(s0, sigma, num_traj=512, dt=dt, T=T, seed=SEED)
arr = PoissonArrivals(A, A, num_traj=512, dt=dt, T=T, seed=SEED)
dyn = LimitOrderDynamics(mid, arr, fill_k=k_fill, max_depth=20.0)
env_vec = TradingEnv(dynamics=dyn, T=T, M=M, seed=SEED, return_vectorized=True)
GLFT_agent = GLFTAgent(env, gamma=0.1, A=A, k=k, xi=1.0, tick=1.0)
results, fig, totals = generate_results_table_and_hist(env_vec, GLFT_agent)
print(results)

Plotting difference between Poisson distribution and Hawkes distribution

In [None]:
compare_poisson_vs_hawkes(dt=0.01, steps=200, seed=42,
                          lam_buy=30, lam_sell=30,
                          mu=10, kappa=60, jump=40)

In [None]:
# auto-import path or pass your classes explicitly
from stochastic_proc.arrivals import PoissonArrivals, HawkesArrivals

compare_poisson_vs_hawkes(
    dt=0.01, steps=200, seed=42,
    lam_buy=30, lam_sell=30,
    mu=10, kappa=60, jump=40,
    poisson_cls=PoissonArrivals,
    hawkes_cls=HawkesArrivals,
)


In [None]:
import inspect
from stochastic_proc.arrivals import PoissonArrivals, HawkesArrivals
print("PoissonArrivals:", inspect.signature(PoissonArrivals.__init__))
print("HawkesArrivals :", inspect.signature(HawkesArrivals.__init__))
