In [None]:
from absl import logging
import tensorflow as tf
from tf_agents.environments import tf_py_environment
import copy
import numpy as np
from tf_agents.specs import BoundedArraySpec, BoundedTensorSpec
from tf_agents.trajectories.time_step import StepType
from tf_agents.trajectories.time_step import TimeStep

from adversaryEnv import *
from continualAgent import *
from allowNotQuoteEnv import *
from constants import *
from dynamics import *
from marketMakerEnv import *
from QuoteAgent import *
from strategies import *
from utils import *

In [None]:
def names(eta, zeta,experiment_number="Hawkes1",adversary="All"):
    eta_str = str(eta)
    zeta_str = str(zeta)

    adversary_name = "adversary_" + experiment_number + "_eta=" + eta_str + "_zeta=" + zeta_str + "_" + adversary
    adversary_policyname = adversary_name+'_saved_policy'

    MM_name = "MM_" + experiment_number + "_eta=" + eta_str + "_zeta=" + zeta_str + "_" + adversary
    MM_policyname = MM_name+'_saved_policy'

    MM_2actions_name = "2actionMM_" + experiment_number + "_eta=" + eta_str + "_zeta=" + zeta_str + "_" + adversary
    MM_2actions_policyname = MM_2actions_name+'_saved_policy'

    MM_4actions_name = "4actionMM_" + experiment_number + "_eta=" + eta_str + "_zeta=" + zeta_str + "_" + adversary
    MM_4actions_policyname = MM_4actions_name+'_saved_policy'

    return (adversary_name, adversary_policyname,
            MM_name, MM_policyname,
            MM_2actions_name, MM_2actions_policyname,
            MM_4actions_name, MM_4actions_policyname)

In [None]:
tf.random.set_seed(42)
logging.set_verbosity(logging.INFO)
tf.compat.v1.enable_v2_behavior()

In [None]:
def train_adversary_agent(env_class, eta, zeta, num_iterations,experiment_number="Hawkes1",adversary="All",continue_training=False, continue_policy=None):

    experiment_names = names(experiment_number=experiment_number, eta=eta, zeta=zeta, adversary=adversary)

    adversary_name = experiment_names[0]
    adversary_policyname = experiment_names[1]
    MM_name = experiment_names[2]
    MM_policyname = experiment_names[3]
    MM_2actions_name = experiment_names[4]
    MM_2actions_policyname = experiment_names[5]
    MM_4actions_name = experiment_names[6]
    MM_4actions_policyname = experiment_names[7]

    adversary_env = env_class(eta=eta, zeta=zeta)
    adversary_tf_env = tf_py_environment.TFPyEnvironment(adversary_env)
    adversary_eval_env = tf_py_environment.TFPyEnvironment(adversary_env)

    if continue_training and continue_policy is not None:
        adversary_agent = Agent(adversary_tf_env, adversary_eval_env, name=adversary_name,continue_saved_policy=continue_policy, initialize_replay_buffer=False)
    else:
        adversary_agent = Agent(adversary_tf_env, adversary_eval_env, name=adversary_name)
    
    adversary_agent.train(num_iterations)

    adversary_saved_policy = tf.saved_model.load(adversary_policyname)

    adversary_results = evaluation(policy=adversary_saved_policy, name=adversary_name, env=adversary_env, calculate_ratio=False, num_episodes=1000,num_times=100)

    adversary_validate_results = validate_with_random_policy(name=adversary_name, env=adversary_env,num_episodes=1000,num_times=100)

    return adversary_results, adversary_validate_results

In [None]:
eta=0.0
zeta=0.0

In [None]:
adversary_resultsALL, adversary_validate_resultsALL = train_adversary_agent(AdversaryEnvironmentWithControllingAll, eta, zeta,num_iterations=50000,experiment_number="Hawkes2",adversary="All",continue_training=True, continue_policy='adversary_Hawkes1_eta=0.0_zeta=0.0_All_saved_policy')