# Playback saved model

In [33]:
################################################################################
#                           1 Import packages                                  #
################################################################################

from amboworld.environment import Env

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.optim as optim

# Use a double ended queue (deque) for memory
# When memory is full, this will replace the oldest value with the new one
from collections import deque

# Supress all warnings (e.g. deprecation warnings) for regular use
import warnings

warnings.filterwarnings("ignore")

In [41]:
MODEL_PATH = './models/'

MODELS = {
    'ddqn':'ddqn_policy.pt',
    '3dqn': '3dqn_policy.pt'
}

# Replicate runs
TEST_EPISODES = 30

# Set whether to display on screen (slows model)
DISPLAY_ON_SCREEN = False

# SIM PARAMETERS
RANDOM_SEED = 42
SIM_DURATION = 5000
NUMBER_AMBULANCES = 3
NUMBER_INCIDENT_POINTS = 1
INCIDENT_RADIUS = 2
NUMBER_DISPTACH_POINTS = 100
AMBOWORLD_SIZE = 200
INCIDENT_INTERVAL = 60
EPOCHS = 2
AMBO_SPEED = 60
AMBO_FREE_FROM_HOSPITAL = False

In [42]:
all_results = dict()

## Random action model

In [43]:
results = dict()
results['call_to_arrival'] = []
results['assign_to_arrival'] = []
results['demand_met'] = []

sim = Env(
        random_seed = RANDOM_SEED,
        duration_incidents = SIM_DURATION,
        number_ambulances = NUMBER_AMBULANCES,
        number_incident_points = NUMBER_INCIDENT_POINTS,
        incident_interval = INCIDENT_INTERVAL,
        number_epochs = EPOCHS,
        number_dispatch_points = NUMBER_DISPTACH_POINTS,
        incident_range = INCIDENT_RADIUS,
        max_size = AMBOWORLD_SIZE,
        ambo_kph = AMBO_SPEED,
        ambo_free_from_hospital = AMBO_FREE_FROM_HOSPITAL
    )

for run in range(TEST_EPISODES):
    
    # Reset game environment and get first state observations
    state = sim.reset()

    # Continue loop until episode complete
    while True:    
        action = random.randint(0, sim.action_number -1)
        state_next, reward, terminal, info = sim.step(action)
        
        # Actions to take if end of game episode
        if terminal:
          
            print(f'Run: {run}, ', end='')
            mean_assignment_to_arrival = np.mean(info['assignment_to_arrival'])
            print(f'Mean assignment to arrival: {mean_assignment_to_arrival:4.1f}, ', end='')
            mean_call_to_arrival = np.mean(info['call_to_arrival'])
            print(f'Mean call to arrival: {mean_call_to_arrival:4.1f}, ', end='')
            demand_met = info['fraction_demand_met']
            print(f'Demand met {demand_met:0.3f}')
            
            results['call_to_arrival'].append(mean_call_to_arrival)
            results['assign_to_arrival'].append(mean_assignment_to_arrival)
            results['demand_met'].append(demand_met)
            
            break
            
results = pd.DataFrame(results)
filename = './output/results_random_action.csv'
results.to_csv(filename, index=False)

In [44]:
sim = Env(
        random_seed = RANDOM_SEED,
        duration_incidents = SIM_DURATION,
        number_ambulances = NUMBER_AMBULANCES,
        number_incident_points = NUMBER_INCIDENT_POINTS,
        incident_interval = INCIDENT_INTERVAL,
        number_epochs = EPOCHS,
        number_dispatch_points = NUMBER_DISPTACH_POINTS,
        incident_range = INCIDENT_RADIUS,
        max_size = AMBOWORLD_SIZE,
        ambo_kph = AMBO_SPEED,
        ambo_free_from_hospital = AMBO_FREE_FROM_HOSPITAL
    )


In [55]:
#state = sim.reset()
action = random.randint(0, sim.action_number -1)
state_next, reward, terminal, info = sim.step(action)

In [56]:
print(state_next)
print(len(state_next))

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         1.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         1.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.5        

print(state)

state

In [5]:
results

Unnamed: 0,call_to_arrival,assign_to_arrival,demand_met
0,30.723799,24.914537,1.0
1,31.000709,24.746962,1.0
2,30.758675,24.92852,1.0
3,30.266405,24.891355,1.0
4,31.213184,24.735062,1.0
5,30.133407,24.639431,1.0
6,30.304308,24.851729,0.999
7,29.905955,24.85742,1.0
8,30.270451,25.173151,1.0
9,30.482036,24.922084,1.0


In [6]:
results.describe()

Unnamed: 0,call_to_arrival,assign_to_arrival,demand_met
count,30.0,30.0,30.0
mean,30.401779,24.82499,0.999967
std,0.365954,0.120878,0.000183
min,29.905955,24.582487,0.999
25%,30.086363,24.748674,1.0
50%,30.363205,24.829068,1.0
75%,30.636157,24.88064,1.0
max,31.213637,25.173151,1.0
