# Avellaneda-Stoikov

In [None]:
import sys
sys.path.append("../") # my version of this notebook is in the subfolder "notebooks" of the repo

import gym
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from copy import deepcopy

from stable_baselines3 import A2C, PPO, TD3
import stable_baselines3

from stochastic.processes.continuous import BrownianMotion, GeometricBrownianMotion, BesselProcess, BrownianBridge, BrownianMeander
from stochastic.processes.diffusion import ConstantElasticityVarianceProcess

from DRL4AMM.agents.Agent import Agent
from DRL4AMM.agents.AvellanedaStoikovAgent import AvellanedaStoikovAgent
from DRL4AMM.agents.BaselineAgents import RandomAgent, FixedSpreadAgent
from DRL4AMM.agents.SBAgent import SBAgent
from DRL4AMM.gym.ModelBasedEnvironment import ModelBasedEnvironment
from DRL4AMM.gym.models import *
from DRL4AMM.gym.AvellanedaStoikovEnvironment import AvellanedaStoikovEnvironment
from DRL4AMM.gym.helpers.generate_trajectory import generate_trajectory
from DRL4AMM.gym.helpers.plotting import *
from DRL4AMM.rewards.RewardFunctions import InventoryAdjustedPnL

## Random strategy

In [None]:
terminal_time = 1.0
n_steps = 200
seed = 42
timestamps = np.linspace(0, terminal_time, n_steps + 1)

as_env = AvellanedaStoikovEnvironment(terminal_time=terminal_time, n_steps=n_steps, seed=seed)

In [None]:
random = RandomAgent(as_env.action_space,seed=seed)

In [None]:
#len(timestamps)
as_env.action_space

In [None]:
np.random.seed(seed)
as_env = AvellanedaStoikovEnvironment(terminal_time=terminal_time, n_steps=n_steps, seed=42)

In [None]:
np.random.seed(seed)
observations, actions, rewards = generate_trajectory(as_env,random)

In [None]:
plot_as_trajectory(as_env, random, seed = seed)

## Fixed strategies

In [None]:
half_spreads = [0.25,0.5,1,2,4]

In [None]:
as_env.n_steps

In [None]:
from math import isclose
isclose(0.999999999,1)

In [None]:
performance_dict = {}

for hs in half_spreads:
    np.random.seed(42)
    agent = FixedSpreadAgent(half_spread=hs)
    performance_dict[hs] = {}
    performance_dict[hs]["observations"], performance_dict[hs]["actions"], rewards = generate_trajectory(as_env,agent)
    performance_dict[hs]["cum_rewards"] = np.cumsum(rewards)

In [None]:
len(performance_dict[hs]["cum_rewards"])

In [None]:
len(timestamps)

In [None]:
fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, figsize = (20,10))

ax1.title.set_text("cum_rewards")
ax2.title.set_text("asset_prices")
ax3.title.set_text("inventory")
ax4.title.set_text("cash_holdings")

for hs in half_spreads:
    ax1.plot(timestamps[1:],performance_dict[hs]["cum_rewards"], label = hs)
    ax2.plot(timestamps,performance_dict[hs]["observations"][:,0], label = hs)
    ax3.plot(timestamps,performance_dict[hs]["observations"][:,2], label = hs)
    ax4.plot(timestamps,performance_dict[hs]["observations"][:,1], label = hs)
ax1.legend()
ax2.legend()
ax3.legend()
ax4.legend()
plt.show()

## Avellaneda-Stoikov Optimal Strategy

In [None]:
N_STEPS = 200
as_agent = AvellanedaStoikovAgent(n_steps=N_STEPS)

In [None]:
observations, actions, rewards = generate_trajectory(as_env,as_agent)
cum_rewards = np.cumsum(rewards)
midprices=observations[:,0]
bid_half_spreads, ask_half_spreads = actions.T

In [None]:
len(actions)

In [None]:
len(midprices)

In [None]:
bid_prices = midprices[1:] - bid_half_spreads
ask_prices = midprices[1:] + ask_half_spreads

In [None]:
fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(2,2, figsize = (20,10))

ax1.title.set_text("cum_rewards")
ax2.title.set_text("asset_prices")
ax3.title.set_text("inventory")
ax4.title.set_text("cash_holdings")

ax1.plot(cum_rewards)
ax2.plot(midprices, label="midprice")
ax2.plot(bid_prices, label="quoted bid prices")
ax2.plot(ask_prices, label="quoted ask prices")
ax3.plot(observations[:,2])
ax4.plot(observations[:,1])

ax2.legend()

plt.show()

### Comparing the results to the Avellaneda Stoikov paper

In [None]:
results, fig, _ = generate_results_table_and_hist(agent=as_agent,env=as_env,n_episodes=1000)

In [None]:
results

These results look similar to Table 2 of Avellaneda and Stoikov. It is interesting that the agent **does** quote a negative spread sometimes, which could be interpreted as taking liquidity but then the model should possibly be changed.

In [None]:
fig

## The effect of increasing risk aversion

In [None]:
risk_aversions = [0.01,0.1,0.5,1]

In [None]:
total_rewards_dict = dict()
for risk_aversion in risk_aversions:
    agent = AvellanedaStoikovAgent(risk_aversion=risk_aversion)
    _,_,total_rewards_dict[risk_aversion] = generate_results_table_and_hist(agent=agent,env=as_env,n_episodes=1000);   

In [None]:
colors = ["r", "g", "b", "c"]

In [None]:
fig, ax = plt.subplots(1,1, figsize=(20,10))
for risk_aversion, color in zip(risk_aversions,colors):
    sns.histplot(total_rewards_dict[risk_aversion], label=f"risk-aversion {risk_aversion}", stat = "density", bins = 50, ax=ax, color=color)
ax.legend()
plt.show()

**Note, it is hard to argue that the risk-averse agent is outperforming the non risk-averse agent in these cases...**

### Training a stable baselines agent on the Avellaneda-Stoikov gym environment

See separate notebook.