In [8]:
pip install torch

Collecting torch
[?25l  Downloading https://files.pythonhosted.org/packages/e0/17/ee73e3011b9f62919eb2991ed4c216b90285469c6d0b11c1cda6538819b1/torch-1.8.1-cp37-none-macosx_10_9_x86_64.whl (119.5MB)
[K     |████████████████████████████████| 119.5MB 16.4MB/s eta 0:00:01
[?25hCollecting typing-extensions (from torch)
  Downloading https://files.pythonhosted.org/packages/60/7a/e881b5abb54db0e6e671ab088d079c57ce54e8a01a3ca443f561ccadb37e/typing_extensions-3.7.4.3-py3-none-any.whl
Installing collected packages: typing-extensions, torch
Successfully installed torch-1.8.1 typing-extensions-3.7.4.3
Note: you may need to restart the kernel to use updated packages.


In [1]:
import utils

# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [2]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [3]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,500000,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,500000,Trader's Risk Aversion for Agent 1:,1e-06
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,0.0001
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [9]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1300
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

Episode [100/1300]	Average Shortfall for Agent1: $1,168,737.12
Episode [100/1300]	Average Shortfall for Agent2: $1,182,497.04
Episode [200/1300]	Average Shortfall for Agent1: $1,281,250.00
Episode [200/1300]	Average Shortfall for Agent2: $1,281,250.00
Episode [300/1300]	Average Shortfall for Agent1: $1,274,753.86
Episode [300/1300]	Average Shortfall for Agent2: $1,278,818.43
Episode [400/1300]	Average Shortfall for Agent1: $958,446.35
Episode [400/1300]	Average Shortfall for Agent2: $996,403.21
Episode [500/1300]	Average Shortfall for Agent1: $321,537.18
Episode [500/1300]	Average Shortfall for Agent2: $321,944.71
Episode [600/1300]	Average Shortfall for Agent1: $331,625.64
Episode [600/1300]	Average Shortfall for Agent2: $328,738.83
Episode [700/1300]	Average Shortfall for Agent1: $302,789.39
Episode [700/1300]	Average Shortfall for Agent2: $296,596.55
Episode [800/1300]	Average Shortfall for Agent1: $305,151.05
Episode [800/1300]	Average Shortfall for Agent2: $301,542.19
Episode [900

In [10]:
shortfall = np.array(shortfall_list)

In [11]:
np.save('1e-6_1e-6_cooporation_shorfall_list.npy',shortfall)

In [12]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))

1e-06 1e-06
[1. 1.]
[0.761694 0.656324]
[0.603648 0.454928]
[0.44365  0.334226]
[0.305346 0.25539 ]
[0.20247  0.202642]
[0.13316  0.148788]
[0.09197 0.10399]
[0.064072 0.074902]
[0.044238 0.052522]
[0.03257  0.036602]
[0.02397  0.024466]
[0.018556 0.01732 ]
[0.013314 0.011942]
[0.009696 0.008204]
[0.006774 0.005622]
[0.004728 0.003898]
[0.003236 0.002704]
[0.00228  0.001762]
[0.00165 0.00114]
[0.001234 0.000778]
[0.000932 0.000566]
[0.000674 0.000392]
[0.000506 0.00029 ]
[0.000376 0.000212]
[0.000294 0.00015 ]
[0.000224 0.000108]
[1.66e-04 7.40e-05]
[1.14e-04 5.40e-05]
[8.2e-05 4.2e-05]
[5.8e-05 3.4e-05]
[4.0e-05 2.8e-05]
[2.6e-05 2.2e-05]
[1.8e-05 1.8e-05]
[1.2e-05 1.4e-05]
[8.e-06 1.e-05]
[6.e-06 8.e-06]
[4.e-06 6.e-06]
[2.e-06 4.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-

In [13]:
np.save('1e-6_1e-6_competition_trajectory_1500.npy',trajectory)

In [1]:
import utils

# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [2]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [3]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,1000000,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,0.0001,Trader's Risk Aversion for Agent 1:,1e-06
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,0
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [4]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1300
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Episode [100/1300]	Average Shortfall for Agent1: $nan
Episode [100/1300]	Average Shortfall for Agent2: $nan
Episode [200/1300]	Average Shortfall for Agent1: $nan
Episode [200/1300]	Average Shortfall for Agent2: $nan
Episode [300/1300]	Average Shortfall for Agent1: $nan
Episode [300/1300]	Average Shortfall for Agent2: $nan
Episode [400/1300]	Average Shortfall for Agent1: $nan
Episode [400/1300]	Average Shortfall for Agent2: $nan
Episode [500/1300]	Average Shortfall for Agent1: $nan
Episode [500/1300]	Average Shortfall for Agent2: $nan
Episode [600/1300]	Average Shortfall for Agent1: $nan
Episode [600/1300]	Average Shortfall for Agent2: $nan
Episode [700/1300]	Average Shortfall for Agent1: $nan
Episode [700/1300]	Average Shortfall for Agent2: $nan
Episode [800/1300]	Average Shortfall for Agent1: $nan
Episode [800/1300]	Average Shortfall for Agent2: $nan
Episode [900/1300]	Average Shortfall for Agent1: $nan
Episode [900/1300]	Average Shortfall for Agent2: $nan
Episode [1000/1300]	Average 

In [5]:
shortfall = np.array(shortfall_list)

In [6]:
np.save('1e-6_shortfall_list.npy',shortfall)

In [7]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))

1e-06 1e-06
[1. 1.]
[0.778152 1.      ]
[0.627577 1.      ]
[0.4687 1.    ]
[0.327324 1.      ]
[0.219856 1.      ]
[0.146201 1.      ]
[0.101884 1.      ]
[0.071515 1.      ]
[0.049697 1.      ]
[0.036779 1.      ]
[0.027186 1.      ]
[0.021121 1.      ]
[0.015205 1.      ]
[0.011104 1.      ]
[0.007776 1.      ]
[0.00544 1.     ]
[0.003729 1.      ]
[0.002632 1.      ]
[0.001907 1.      ]
[0.001427 1.      ]
[0.001078 1.      ]
[7.81e-04 1.00e+00]
[5.87e-04 1.00e+00]
[4.36e-04 1.00e+00]
[3.4e-04 1.0e+00]
[2.6e-04 1.0e+00]
[1.92e-04 1.00e+00]
[1.33e-04 1.00e+00]
[9.7e-05 1.0e+00]
[6.8e-05 1.0e+00]
[4.6e-05 1.0e+00]
[3.e-05 1.e+00]
[2.e-05 1.e+00]
[1.3e-05 1.0e+00]
[9.e-06 1.e+00]
[6.e-06 1.e+00]
[4.e-06 1.e+00]
[3.e-06 1.e+00]
[2.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-0

In [1]:
import utils

# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [2]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [2]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,300000,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,700000,Trader's Risk Aversion for Agent 1:,1e-06
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,1e-06
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [4]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1300
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

Episode [100/1300]	Average Shortfall for Agent1: $705,887.29
Episode [100/1300]	Average Shortfall for Agent2: $1,665,016.69
Episode [200/1300]	Average Shortfall for Agent1: $768,639.85
Episode [200/1300]	Average Shortfall for Agent2: $1,793,241.57
Episode [300/1300]	Average Shortfall for Agent1: $768,750.00
Episode [300/1300]	Average Shortfall for Agent2: $1,793,750.00
Episode [400/1300]	Average Shortfall for Agent1: $768,750.00
Episode [400/1300]	Average Shortfall for Agent2: $1,793,750.00
Episode [500/1300]	Average Shortfall for Agent1: $768,750.00
Episode [500/1300]	Average Shortfall for Agent2: $1,793,750.00
Episode [600/1300]	Average Shortfall for Agent1: $768,750.00
Episode [600/1300]	Average Shortfall for Agent2: $1,793,750.00
Episode [700/1300]	Average Shortfall for Agent1: $768,750.00
Episode [700/1300]	Average Shortfall for Agent2: $1,793,750.00
Episode [800/1300]	Average Shortfall for Agent1: $749,953.03
Episode [800/1300]	Average Shortfall for Agent2: $1,771,843.57
Episode 

In [5]:
shortfall = np.array(shortfall_list)

In [6]:
np.save('1e-6_shortfall_list 0.3M.npy',shortfall)

In [7]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))

1e-06 1e-06
[1. 1.]
[0.         0.65632429]
[0.         0.45492857]
[0.         0.33422571]
[0.      0.25539]
[0.         0.20264286]
[0.         0.14878714]
[0.         0.10398857]
[0.     0.0749]
[0.         0.05252143]
[0.         0.03660143]
[0.         0.02446571]
[0.      0.01732]
[0.         0.01194286]
[0.         0.00820429]
[0.         0.00562286]
[0.         0.00389857]
[0.         0.00270286]
[0.      0.00176]
[0.         0.00113857]
[0.         0.00077714]
[0.         0.00056571]
[0.         0.00039143]
[0.         0.00028857]
[0.         0.00021143]
[0.      0.00015]
[0.         0.00010857]
[0.00000000e+00 7.57142857e-05]
[0.00000000e+00 5.42857143e-05]
[0.00000000e+00 4.14285714e-05]
[0.00000000e+00 3.28571429e-05]
[0.00000000e+00 2.57142857e-05]
[0.e+00 2.e-05]
[0.00000000e+00 1.57142857e-05]
[0.00000000e+00 1.28571429e-05]
[0.e+00 1.e-05]
[0.00000000e+00 7.14285714e-06]
[0.00000000e+00 5.71428571e-06]
[0.00000000e+00 4.28571429e-06]
[0.00000000e+00 2.85714286e-06]
[0.0

In [1]:
import utils

# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [2]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [3]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,700000,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,300000,Trader's Risk Aversion for Agent 1:,1e-06
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,1e-06
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [4]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1300
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

Episode [100/1300]	Average Shortfall for Agent1: $1,636,719.61
Episode [100/1300]	Average Shortfall for Agent2: $708,470.86
Episode [200/1300]	Average Shortfall for Agent1: $1,793,749.54
Episode [200/1300]	Average Shortfall for Agent2: $768,749.40
Episode [300/1300]	Average Shortfall for Agent1: $1,793,750.00
Episode [300/1300]	Average Shortfall for Agent2: $768,750.00
Episode [400/1300]	Average Shortfall for Agent1: $1,793,414.77
Episode [400/1300]	Average Shortfall for Agent2: $768,695.70
Episode [500/1300]	Average Shortfall for Agent1: $1,777,672.76
Episode [500/1300]	Average Shortfall for Agent2: $765,635.58
Episode [600/1300]	Average Shortfall for Agent1: $1,047,445.39
Episode [600/1300]	Average Shortfall for Agent2: $487,752.08
Episode [700/1300]	Average Shortfall for Agent1: $1,416,671.63
Episode [700/1300]	Average Shortfall for Agent2: $301,228.20
Episode [800/1300]	Average Shortfall for Agent1: $1,444,264.43
Episode [800/1300]	Average Shortfall for Agent2: $265,269.88
Episode 

In [5]:
shortfall = np.array(shortfall_list)

In [6]:
np.save('1e-6_shortfall_list 0.7M.npy',shortfall)

In [7]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))

1e-06 1e-06
[1. 1.]
[0.         0.65632333]
[0.      0.45493]
[0.         0.33422667]
[0.      0.25539]
[0.         0.20264333]
[0.         0.14878667]
[0.      0.10399]
[0.     0.0749]
[0.      0.05252]
[0.     0.0366]
[0.         0.02446333]
[0.         0.01731667]
[0.      0.01194]
[0.         0.00820333]
[0.         0.00562333]
[0.     0.0039]
[0.         0.00270333]
[0.      0.00176]
[0.      0.00114]
[0.         0.00077667]
[0.         0.00056333]
[0.      0.00039]
[0.         0.00028667]
[0.      0.00021]
[0.      0.00015]
[0.         0.00010667]
[0.00000000e+00 7.33333333e-05]
[0.00000000e+00 5.33333333e-05]
[0.e+00 4.e-05]
[0.e+00 3.e-05]
[0.00000000e+00 2.33333333e-05]
[0.00000000e+00 1.66666667e-05]
[0.00000000e+00 1.33333333e-05]
[0.e+00 1.e-05]
[0.00000000e+00 6.66666667e-06]
[0.00000000e+00 6.66666667e-06]
[0.00000000e+00 3.33333333e-06]
[0.00000000e+00 3.33333333e-06]
[0.00000000e+00 3.33333333e-06]
[0.00000000e+00 3.33333333e-06]
[0.00000000e+00 3.33333333e-06]
[0.00000

In [1]:
import utils

# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [1]:
financial_params

NameError: name 'financial_params' is not defined

In [3]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,1000000,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,0.0001,Trader's Risk Aversion for Agent 1:,0.0001
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,0
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [4]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1300
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Episode [100/1300]	Average Shortfall for Agent1: $nan
Episode [100/1300]	Average Shortfall for Agent2: $nan
Episode [200/1300]	Average Shortfall for Agent1: $nan
Episode [200/1300]	Average Shortfall for Agent2: $nan
Episode [300/1300]	Average Shortfall for Agent1: $nan
Episode [300/1300]	Average Shortfall for Agent2: $nan
Episode [400/1300]	Average Shortfall for Agent1: $nan
Episode [400/1300]	Average Shortfall for Agent2: $nan
Episode [500/1300]	Average Shortfall for Agent1: $nan
Episode [500/1300]	Average Shortfall for Agent2: $nan
Episode [600/1300]	Average Shortfall for Agent1: $nan
Episode [600/1300]	Average Shortfall for Agent2: $nan
Episode [700/1300]	Average Shortfall for Agent1: $nan
Episode [700/1300]	Average Shortfall for Agent2: $nan
Episode [800/1300]	Average Shortfall for Agent1: $nan
Episode [800/1300]	Average Shortfall for Agent2: $nan
Episode [900/1300]	Average Shortfall for Agent1: $nan
Episode [900/1300]	Average Shortfall for Agent2: $nan
Episode [1000/1300]	Average 

In [5]:
shortfall = np.array(shortfall_list)

In [6]:
np.save('1e-6_shortfall_optimal.npy',shortfall)

In [7]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))

1e-06 1e-06
[1. 1.]
[0.778152 1.      ]
[0.627577 1.      ]
[0.4687 1.    ]
[0.327324 1.      ]
[0.219856 1.      ]
[0.146201 1.      ]
[0.101884 1.      ]
[0.071515 1.      ]
[0.049697 1.      ]
[0.036779 1.      ]
[0.027186 1.      ]
[0.021121 1.      ]
[0.015205 1.      ]
[0.011104 1.      ]
[0.007776 1.      ]
[0.00544 1.     ]
[0.003729 1.      ]
[0.002632 1.      ]
[0.001907 1.      ]
[0.001427 1.      ]
[0.001078 1.      ]
[7.81e-04 1.00e+00]
[5.87e-04 1.00e+00]
[4.36e-04 1.00e+00]
[3.4e-04 1.0e+00]
[2.6e-04 1.0e+00]
[1.92e-04 1.00e+00]
[1.33e-04 1.00e+00]
[9.7e-05 1.0e+00]
[6.8e-05 1.0e+00]
[4.6e-05 1.0e+00]
[3.e-05 1.e+00]
[2.e-05 1.e+00]
[1.3e-05 1.0e+00]
[9.e-06 1.e+00]
[6.e-06 1.e+00]
[4.e-06 1.e+00]
[3.e-06 1.e+00]
[2.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-0

In [1]:
import utils

# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [2]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [3]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,0.0001,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,1000000,Trader's Risk Aversion for Agent 1:,0
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,1e-09
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [4]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1300
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Episode [100/1300]	Average Shortfall for Agent1: $nan
Episode [100/1300]	Average Shortfall for Agent2: $nan
Episode [200/1300]	Average Shortfall for Agent1: $0.01
Episode [200/1300]	Average Shortfall for Agent2: $2,562,500.00
Episode [300/1300]	Average Shortfall for Agent1: $0.00
Episode [300/1300]	Average Shortfall for Agent2: $2,562,500.00
Episode [400/1300]	Average Shortfall for Agent1: $0.00
Episode [400/1300]	Average Shortfall for Agent2: $2,562,500.00
Episode [500/1300]	Average Shortfall for Agent1: $0.00
Episode [500/1300]	Average Shortfall for Agent2: $2,562,500.00
Episode [600/1300]	Average Shortfall for Agent1: $0.00
Episode [600/1300]	Average Shortfall for Agent2: $2,562,500.00
Episode [700/1300]	Average Shortfall for Agent1: $0.00
Episode [700/1300]	Average Shortfall for Agent2: $2,562,500.00
Episode [800/1300]	Average Shortfall for Agent1: $0.00
Episode [800/1300]	Average Shortfall for Agent2: $2,562,500.00
Episode [900/1300]	Average Shortfall for Agent1: $0.00
Episode [90

In [5]:
shortfall = np.array(shortfall_list)

In [6]:
np.save('1e-9_shortfall_optimal.npy',shortfall)

In [7]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))

1e-06 1e-06
[1. 1.]
[1.       0.635651]
[1.      0.42943]
[1.       0.309079]
[1.       0.232251]
[1.       0.181777]
[1.     0.1318]
[1.       0.091089]
[1.       0.065006]
[1.       0.045217]
[1.       0.031294]
[1.       0.020791]
[1.       0.014646]
[1.       0.010056]
[1.       0.006883]
[1.       0.004702]
[1.       0.003252]
[1.      0.00225]
[1.       0.001463]
[1.00e+00 9.45e-04]
[1.00e+00 6.44e-04]
[1.00e+00 4.68e-04]
[1.00e+00 3.24e-04]
[1.00e+00 2.39e-04]
[1.00e+00 1.75e-04]
[1.00e+00 1.24e-04]
[1.0e+00 8.9e-05]
[1.0e+00 6.2e-05]
[1.0e+00 4.5e-05]
[1.0e+00 3.4e-05]
[1.0e+00 2.7e-05]
[1.e+00 2.e-06]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
[1. 0.]
Episode [1300/1300]	Average Shortfall for Agent1: $0.00
Episode [1300/1300]	Average Shortfall for Agent2: $2,562,500.00


In [1]:
import utils

# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [2]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [3]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,500000,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,500000,Trader's Risk Aversion for Agent 1:,0.0001
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,1e-09
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [4]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1300
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

Episode [100/1300]	Average Shortfall for Agent1: $1,168,737.12
Episode [100/1300]	Average Shortfall for Agent2: $1,182,497.04
Episode [200/1300]	Average Shortfall for Agent1: $1,281,250.00
Episode [200/1300]	Average Shortfall for Agent2: $1,281,250.00
Episode [300/1300]	Average Shortfall for Agent1: $1,274,753.86
Episode [300/1300]	Average Shortfall for Agent2: $1,278,818.43
Episode [400/1300]	Average Shortfall for Agent1: $958,446.35
Episode [400/1300]	Average Shortfall for Agent2: $996,403.21
Episode [500/1300]	Average Shortfall for Agent1: $321,537.18
Episode [500/1300]	Average Shortfall for Agent2: $321,944.71
Episode [600/1300]	Average Shortfall for Agent1: $331,625.64
Episode [600/1300]	Average Shortfall for Agent2: $328,738.83
Episode [700/1300]	Average Shortfall for Agent1: $302,789.39
Episode [700/1300]	Average Shortfall for Agent2: $296,596.55
Episode [800/1300]	Average Shortfall for Agent1: $305,151.05
Episode [800/1300]	Average Shortfall for Agent2: $301,542.19
Episode [900

In [5]:
shortfall = np.array(shortfall_list)

In [6]:
np.save('1e-4_le-9_shortfall_optimal.npy',shortfall)

In [7]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))

1e-06 1e-06
[1. 1.]
[0.761694 0.656324]
[0.603648 0.454928]
[0.44365  0.334226]
[0.305346 0.25539 ]
[0.20247  0.202642]
[0.13316  0.148788]
[0.09197 0.10399]
[0.064072 0.074902]
[0.044238 0.052522]
[0.03257  0.036602]
[0.02397  0.024466]
[0.018556 0.01732 ]
[0.013314 0.011942]
[0.009696 0.008204]
[0.006774 0.005622]
[0.004728 0.003898]
[0.003236 0.002704]
[0.00228  0.001762]
[0.00165 0.00114]
[0.001234 0.000778]
[0.000932 0.000566]
[0.000674 0.000392]
[0.000506 0.00029 ]
[0.000376 0.000212]
[0.000294 0.00015 ]
[0.000224 0.000108]
[1.66e-04 7.40e-05]
[1.14e-04 5.40e-05]
[8.2e-05 4.2e-05]
[5.8e-05 3.4e-05]
[4.0e-05 2.8e-05]
[2.6e-05 2.2e-05]
[1.8e-05 1.8e-05]
[1.2e-05 1.4e-05]
[8.e-06 1.e-05]
[6.e-06 8.e-06]
[4.e-06 6.e-06]
[2.e-06 4.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-

In [8]:
np.save('1e-4_le-9_trajectory.npy',trajectory)

In [1]:
import utils

# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [2]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [3]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,500000,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,500000,Trader's Risk Aversion for Agent 1:,1e-06
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,1e-06
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [4]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1300
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

Episode [100/1300]	Average Shortfall for Agent1: $1,168,737.12
Episode [100/1300]	Average Shortfall for Agent2: $1,182,497.04
Episode [200/1300]	Average Shortfall for Agent1: $1,281,250.00
Episode [200/1300]	Average Shortfall for Agent2: $1,281,250.00
Episode [300/1300]	Average Shortfall for Agent1: $1,274,753.86
Episode [300/1300]	Average Shortfall for Agent2: $1,278,818.43
Episode [400/1300]	Average Shortfall for Agent1: $958,446.35
Episode [400/1300]	Average Shortfall for Agent2: $996,403.21
Episode [500/1300]	Average Shortfall for Agent1: $321,537.18
Episode [500/1300]	Average Shortfall for Agent2: $321,944.71
Episode [600/1300]	Average Shortfall for Agent1: $331,625.64
Episode [600/1300]	Average Shortfall for Agent2: $328,738.83
Episode [700/1300]	Average Shortfall for Agent1: $302,789.39
Episode [700/1300]	Average Shortfall for Agent2: $296,596.55
Episode [800/1300]	Average Shortfall for Agent1: $305,151.05
Episode [800/1300]	Average Shortfall for Agent2: $301,542.19
Episode [900

In [5]:
shortfall = np.array(shortfall_list)

In [6]:
np.save('1e-6_le-6_competition_shortfall_list.npy',shortfall)

In [7]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))

1e-06 1e-06
[1. 1.]
[0.761694 0.656324]
[0.603648 0.454928]
[0.44365  0.334226]
[0.305346 0.25539 ]
[0.20247  0.202642]
[0.13316  0.148788]
[0.09197 0.10399]
[0.064072 0.074902]
[0.044238 0.052522]
[0.03257  0.036602]
[0.02397  0.024466]
[0.018556 0.01732 ]
[0.013314 0.011942]
[0.009696 0.008204]
[0.006774 0.005622]
[0.004728 0.003898]
[0.003236 0.002704]
[0.00228  0.001762]
[0.00165 0.00114]
[0.001234 0.000778]
[0.000932 0.000566]
[0.000674 0.000392]
[0.000506 0.00029 ]
[0.000376 0.000212]
[0.000294 0.00015 ]
[0.000224 0.000108]
[1.66e-04 7.40e-05]
[1.14e-04 5.40e-05]
[8.2e-05 4.2e-05]
[5.8e-05 3.4e-05]
[4.0e-05 2.8e-05]
[2.6e-05 2.2e-05]
[1.8e-05 1.8e-05]
[1.2e-05 1.4e-05]
[8.e-06 1.e-05]
[6.e-06 8.e-06]
[4.e-06 6.e-06]
[2.e-06 4.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-

In [8]:
np.save('1e-6_trajectory_fixed-competitor.npy',trajectory)

In [9]:
import utils

# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [10]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [11]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,500000,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,500000,Trader's Risk Aversion for Agent 1:,1e-06
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,1e-06
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [12]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1500
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

Episode [100/1500]	Average Shortfall for Agent1: $1,168,737.12
Episode [100/1500]	Average Shortfall for Agent2: $1,182,497.04
Episode [200/1500]	Average Shortfall for Agent1: $1,281,250.00
Episode [200/1500]	Average Shortfall for Agent2: $1,281,250.00
Episode [300/1500]	Average Shortfall for Agent1: $1,274,753.86
Episode [300/1500]	Average Shortfall for Agent2: $1,278,818.43
Episode [400/1500]	Average Shortfall for Agent1: $958,446.35
Episode [400/1500]	Average Shortfall for Agent2: $996,403.21
Episode [500/1500]	Average Shortfall for Agent1: $321,537.18
Episode [500/1500]	Average Shortfall for Agent2: $321,944.71
Episode [600/1500]	Average Shortfall for Agent1: $331,625.64
Episode [600/1500]	Average Shortfall for Agent2: $328,738.83
Episode [700/1500]	Average Shortfall for Agent1: $302,789.39
Episode [700/1500]	Average Shortfall for Agent2: $296,596.55
Episode [800/1500]	Average Shortfall for Agent1: $305,151.05
Episode [800/1500]	Average Shortfall for Agent2: $301,542.19
Episode [900

In [13]:
shortfall = np.array(shortfall_list)

In [14]:
np.save('1e-6_le-6_competition_shortfall_list_1500.npy',shortfall)

In [15]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))

1e-06 1e-06
[1. 1.]
[0.769192 0.70972 ]
[0.591748 0.51279 ]
[0.450466 0.390248]
[0.326848 0.284012]
[0.24921  0.197246]
[0.190718 0.142906]
[0.140292 0.097372]
[0.105616 0.06362 ]
[0.07375  0.041506]
[0.048    0.026404]
[0.029694 0.016936]
[0.019016 0.011058]
[0.011524 0.0077  ]
[0.007654 0.00507 ]
[0.004818 0.003454]
[0.002948 0.002304]
[0.00176  0.001582]
[0.00102  0.001078]
[0.00064  0.000704]
[0.000388 0.000446]
[0.000252 0.000288]
[0.000156 0.000186]
[9.40e-05 1.28e-04]
[5.6e-05 9.0e-05]
[3.4e-05 6.6e-05]
[2.e-05 5.e-05]
[1.2e-05 3.6e-05]
[8.0e-06 2.8e-05]
[6.e-06 2.e-05]
[4.0e-06 1.4e-05]
[2.e-06 1.e-05]
[2.e-06 8.e-06]
[2.e-06 6.e-06]
[2.e-06 4.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06

In [16]:
np.save('1e-6_le-6_competition_trajectory_1500.npy.npy',trajectory)

In [2]:
import utils

# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [3]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [4]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,500000,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,500000,Trader's Risk Aversion for Agent 1:,1e-06
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,1e-06
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [5]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1300
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

Episode [100/1300]	Average Shortfall for Agent1: $1,177,354.62
Episode [100/1300]	Average Shortfall for Agent2: $1,183,580.64
Episode [200/1300]	Average Shortfall for Agent1: $1,281,250.00
Episode [200/1300]	Average Shortfall for Agent2: $1,281,250.00
Episode [300/1300]	Average Shortfall for Agent1: $1,281,250.00
Episode [300/1300]	Average Shortfall for Agent2: $1,281,250.00
Episode [400/1300]	Average Shortfall for Agent1: $1,257,995.97
Episode [400/1300]	Average Shortfall for Agent2: $1,272,524.00
Episode [500/1300]	Average Shortfall for Agent1: $695,096.68
Episode [500/1300]	Average Shortfall for Agent2: $801,598.95
Episode [600/1300]	Average Shortfall for Agent1: $329,916.66
Episode [600/1300]	Average Shortfall for Agent2: $337,108.06
Episode [700/1300]	Average Shortfall for Agent1: $302,789.39
Episode [700/1300]	Average Shortfall for Agent2: $296,596.55
Episode [800/1300]	Average Shortfall for Agent1: $305,151.05
Episode [800/1300]	Average Shortfall for Agent2: $301,542.19
Episode 

In [6]:
shortfall = np.array(shortfall_list)

In [7]:
np.save('1e-6_le-6_corporatition_shortfall_list.npy',shortfall)

In [8]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))

1e-06 1e-06
[1. 1.]
[0.761694 0.656324]
[0.603648 0.454928]
[0.44365  0.334226]
[0.305346 0.25539 ]
[0.20247  0.202642]
[0.13316  0.148788]
[0.09197 0.10399]
[0.064072 0.074902]
[0.044238 0.052522]
[0.03257  0.036602]
[0.02397  0.024466]
[0.018556 0.01732 ]
[0.013314 0.011942]
[0.009696 0.008204]
[0.006774 0.005622]
[0.004728 0.003898]
[0.003236 0.002704]
[0.00228  0.001762]
[0.00165 0.00114]
[0.001234 0.000778]
[0.000932 0.000566]
[0.000674 0.000392]
[0.000506 0.00029 ]
[0.000376 0.000212]
[0.000294 0.00015 ]
[0.000224 0.000108]
[1.66e-04 7.40e-05]
[1.14e-04 5.40e-05]
[8.2e-05 4.2e-05]
[5.8e-05 3.4e-05]
[4.0e-05 2.8e-05]
[2.6e-05 2.2e-05]
[1.8e-05 1.8e-05]
[1.2e-05 1.4e-05]
[8.e-06 1.e-05]
[6.e-06 8.e-06]
[4.e-06 6.e-06]
[2.e-06 4.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-06]
[2.e-06 2.e-

In [9]:
np.save('1e-6_trajectory_fixed-corporation.npy',trajectory)

In [1]:
import utils

# Get the default financial and AC Model parameters
financial_params, ac_params = utils.get_env_param()

In [2]:
financial_params

0,1,2,3
Annual Volatility:,12%,Bid-Ask Spread:,0.125
Daily Volatility:,0.8%,Daily Trading Volume:,5000000.0


In [3]:
ac_params

0,1,2,3
Total Number of Shares for Agent1 to Sell:,1000000,Fixed Cost of Selling per Share:,$0.062
Total Number of Shares for Agent2 to Sell:,0.0001,Trader's Risk Aversion for Agent 1:,1e-06
Starting Price per Share:,$50.00,Trader's Risk Aversion for Agent 2:,0
Price Impact for Each 1% of Daily Volume Traded:,$2.5e-06,Permanent Impact Constant:,2.5e-07
Number of Days to Sell All the Shares:,60,Single Step Variance:,0.144
Number of Trades:,60,Time Interval between trades:,1.0


In [4]:
import numpy as np

import syntheticChrissAlmgren as sca
from ddpg_agent import Agent

from collections import deque

# Create simulation environment
env = sca.MarketEnvironment()

# Initialize Feed-forward DNNs for Actor and Critic models. 
agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)
agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)
# Set the liquidation time
lqt = 60

# Set the number of trades
n_trades = 60

# Set trader's risk aversion
tr1 = 1e-6
tr2 = 1e-6

# Set the number of episodes to run the simulation
episodes = 1300
shortfall_list = []
shortfall_hist1 = np.array([])
shortfall_hist2 = np.array([])
shortfall_deque1 = deque(maxlen=100)
shortfall_deque2 = deque(maxlen=100)
for episode in range(episodes): 
    # Reset the enviroment
    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
    env.start_transactions()

    for i in range(n_trades + 1):
      
        # Predict the best action for the current state. 
        cur_state1 = np.delete(cur_state,8)
        cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
        action1 = agent1.act(cur_state1, add_noise = True)
        action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
        new_state1 = np.delete(new_state,8)
        new_state2 = np.delete(new_state,7)
        agent1.step(cur_state1, action1, reward1, new_state1, done1)
        agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
        cur_state = new_state

        if info.done1 and info.done2:
            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
            shortfall_deque1.append(info.implementation_shortfall1)
            
            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
            shortfall_deque2.append(info.implementation_shortfall2)
            break
        
    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
        print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))
        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])
print('\nAverage Implementation Shortfall for Agent1: ${:,.2f} \n'.format(np.mean(shortfall_hist1)))
print('\nAverage Implementation Shortfall for Agent2: ${:,.2f} \n'.format(np.mean(shortfall_hist2)))

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Episode [100/1300]	Average Shortfall for Agent1: $nan
Episode [100/1300]	Average Shortfall for Agent2: $nan
Episode [200/1300]	Average Shortfall for Agent1: $nan
Episode [200/1300]	Average Shortfall for Agent2: $nan
Episode [300/1300]	Average Shortfall for Agent1: $nan
Episode [300/1300]	Average Shortfall for Agent2: $nan
Episode [400/1300]	Average Shortfall for Agent1: $nan
Episode [400/1300]	Average Shortfall for Agent2: $nan
Episode [500/1300]	Average Shortfall for Agent1: $nan
Episode [500/1300]	Average Shortfall for Agent2: $nan
Episode [600/1300]	Average Shortfall for Agent1: $nan
Episode [600/1300]	Average Shortfall for Agent2: $nan
Episode [700/1300]	Average Shortfall for Agent1: $nan
Episode [700/1300]	Average Shortfall for Agent2: $nan
Episode [800/1300]	Average Shortfall for Agent1: $nan
Episode [800/1300]	Average Shortfall for Agent2: $nan
Episode [900/1300]	Average Shortfall for Agent1: $nan
Episode [900/1300]	Average Shortfall for Agent2: $nan
Episode [1000/1300]	Average 

In [5]:
shortfall = np.array(shortfall_list)

In [6]:
np.save('1e-6_shortfall_list.npy',shortfall)

In [7]:
print(tr1,tr2)
cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)

    # set the environment to make transactions
env.start_transactions()

trajectory = np.zeros([n_trades+1,2])
for i in range(n_trades + 1):
    trajectory[i] = cur_state[7:]
    
    print(cur_state[7:])
        # Predict the best action for the current state. 
    cur_state1 = np.delete(cur_state,8)
    cur_state2 = np.delete(cur_state,7)
        #print(cur_state[5:])
    action1 = agent1.act(cur_state1, add_noise = True)
    action2 = agent2.act(cur_state2, add_noise = True)
        #print(action1,action2)
        # Action is performed and new state, reward, info are received. 
    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)
        
        # current state, action, reward, new state are stored in the experience replay
    new_state1 = np.delete(new_state,8)
    new_state2 = np.delete(new_state,7)
    agent1.step(cur_state1, action1, reward1, new_state1, done1)
    agent2.step(cur_state2, action2, reward2, new_state2, done2)
        # roll over new state
    cur_state = new_state

    if info.done1 and info.done2:
        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)
        shortfall_deque1.append(info.implementation_shortfall1)
            
        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)
        shortfall_deque2.append(info.implementation_shortfall2)
        break
        
if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        
    print('\rEpisode [{}/{}]\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))

1e-06 1e-06
[1. 1.]
[0.778152 1.      ]
[0.627577 1.      ]
[0.4687 1.    ]
[0.327324 1.      ]
[0.219856 1.      ]
[0.146201 1.      ]
[0.101884 1.      ]
[0.071515 1.      ]
[0.049697 1.      ]
[0.036779 1.      ]
[0.027186 1.      ]
[0.021121 1.      ]
[0.015205 1.      ]
[0.011104 1.      ]
[0.007776 1.      ]
[0.00544 1.     ]
[0.003729 1.      ]
[0.002632 1.      ]
[0.001907 1.      ]
[0.001427 1.      ]
[0.001078 1.      ]
[7.81e-04 1.00e+00]
[5.87e-04 1.00e+00]
[4.36e-04 1.00e+00]
[3.4e-04 1.0e+00]
[2.6e-04 1.0e+00]
[1.92e-04 1.00e+00]
[1.33e-04 1.00e+00]
[9.7e-05 1.0e+00]
[6.8e-05 1.0e+00]
[4.6e-05 1.0e+00]
[3.e-05 1.e+00]
[2.e-05 1.e+00]
[1.3e-05 1.0e+00]
[9.e-06 1.e+00]
[6.e-06 1.e+00]
[4.e-06 1.e+00]
[3.e-06 1.e+00]
[2.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-06 1.e+00]
[1.e-0

In [8]:
np.save('1e-6_optimal.npy.npy',trajectory)

In [9]:
import utils
import ddpg_agent
import model 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.rc('xtick',labelsize=14)
plt.rc('ytick',labelsize=14)
plt.rc('legend',fontsize = 14)
