In [1]:
from stochastic_proc.midprice import BrownianMidprice
from stochastic_proc.arrivals import PoissonArrivals
from stochastic_proc.dynamics import LimitOrderDynamics
from envs.trading import TradingEnv
from agents.avellaneda_stoikov import AvellanedaStoikovAgent
from rewards.RewardFunctions import PnLReward, InventoryQuadraticPenalty, SpreadRegularizer, SumReward

In [2]:
SEED = 7
T, M = 1.0, 200
dt = T / M
N = 256
s0, sigma = 100.0, 2.0
A, k_fill = 140.0, 1.5
gamma = 0.1

mid = BrownianMidprice(s0=s0, sigma=sigma, num_traj=N, dt=dt, T=T, seed=SEED)
arr = PoissonArrivals(lam_bid=A, lam_ask=A, num_traj=N, dt=dt, T=T, seed=SEED)
dyn = LimitOrderDynamics(mid_model=mid, arr_model=arr, fill_k=k_fill, max_depth=20.0)

# Risk-averse reward â‰ˆ PnL  -  (0.5*gamma*sigma^2 * q^2 * dt)
inv_pen = InventoryQuadraticPenalty(lam=0.5 * gamma * sigma**2, weight=dt, use_next_q=True)
spread_reg = SpreadRegularizer(alpha=1e-3)
reward = SumReward([PnLReward(), inv_pen, spread_reg], weights=[1.0, 1.0, 1.0])

env = TradingEnv(dynamics=dyn, T=T, M=M, reward_fn=reward, seed=SEED, return_vectorized=False)
agent = AvellanedaStoikovAgent(env, gamma=gamma)

obs, _ = env.reset()
done = False
cum_r = 0.0
while not done:
    action = agent.get_action(obs)
    obs, r, done, _, info = env.step(action)
    cum_r += r
print("Episode return:", cum_r)


Episode return: 87.09555505111348
