In [None]:
import gym
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import tensorboard
import torch as th
from scipy import stats

from copy import deepcopy

from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.env_util import make_vec_env
import stable_baselines3

from stochastic.processes.continuous import BrownianMotion, GeometricBrownianMotion, BesselProcess, BrownianBridge, BrownianMeander
from stochastic.processes.diffusion import ConstantElasticityVarianceProcess

import sys
sys.path.append("../") # <-- Path to the main repo

from main.agents.Agent import Agent
from main.agents.AvellanedaStoikovAgent import AvellanedaStoikovAgent
from main.agents.BaselineAgents import RandomAgent, FixedSpreadAgent
from main.agents.SBAgent import SBAgent
from main.gym.ModelBasedEnvironment import ModelBasedEnvironment
from main.gym.models import *
from main.gym.wrappers import *
from main.gym.AvellanedaStoikovEnvironment import AvellanedaStoikovEnvironment
from main.gym.helpers.generate_trajectory import generate_trajectory
from main.rewards.RewardFunctions import InventoryAdjustedPnL
from main.gym.helpers.plotting import plot_stable_baselines_actions

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

### Investigating the expected rewards of fixed strategies by sampling

In [None]:
terminal_time = 1.0
n_steps = 200
timestamps = np.linspace(0, terminal_time, n_steps + 1)
env_params = dict(terminal_time=terminal_time, n_steps=n_steps, max_half_spread = 1)
as_env = AvellanedaStoikovEnvironment(**env_params)
reduced_env = ReduceStateSizeWrapper(as_env)

gym.envs.register(id="as-env-v0", entry_point="__main__:AvellanedaStoikovEnvironment", kwargs=env_params)

In [None]:
N_EPISODES = 100
def fixed_strat_neg_reward(half_spread:float, env:gym.Env=reduced_env):
    total_rewards = 0
    for _ in range(N_EPISODES):
        _,_,episode_rewards = generate_trajectory(env,FixedSpreadAgent(half_spread))
        total_rewards+= sum(episode_rewards)
    return -total_rewards/N_EPISODES    

In [None]:
fixed_spread_rewards = [-fixed_strat_neg_reward(hs) for hs in np.linspace(0,2,200)]

In [None]:
plt.plot(np.linspace(0,2,200), fixed_spread_rewards)

## Finding the best fixed strategy with CMA-ES

In [None]:
import cma

x0 = [1]
sigma0 = 1
es = cma.CMAEvolutionStrategy(2 * [x0], sigma0, {'CMA_on':0})
es.optimize(lambda x:fixed_strat_neg_reward(x[0])) # on the fly 2-D -> 1-D wrapper
es.logger.plot(xsemilog=True)

In [None]:
es.result_pretty()

In [None]:
as_agent = AvellanedaStoikovAgent(risk_aversion=0)
as_action = as_agent.get_action([0,0,0,0])[0]
cma_action = es.result.xbest[0]
print(f"Optimal strategy is {as_action, as_action}\nCMA-ES strategy is {cma_action,cma_action}")

In [None]:
print(f"Error is {round(abs(cma_action-as_action)/as_action*100, 2)}%")