# Playback saved model

In [1]:
################################################################################
#                           1 Import packages                                  #
################################################################################

from amboworld.environment import Env

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random

import torch
import torch.nn as nn
import torch.optim as optim

# Use a double ended queue (deque) for memory
# When memory is full, this will replace the oldest value with the new one
from collections import deque

# Supress all warnings (e.g. deprecation warnings) for regular use
import warnings

warnings.filterwarnings("ignore")

In [2]:
MODEL_PATH = './models/'

MODELS = {
    'ddqn':'ddqn_policy.pt',
    '3dqn': '3dqn_policy.pt'
}

# Replicate runs
TEST_EPISODES = 30

# Set whether to display on screen (slows model)
DISPLAY_ON_SCREEN = False

# SIM PARAMETERS
RANDOM_SEED = 42
SIM_DURATION = 5000
NUMBER_AMBULANCES = 9
NUMBER_INCIDENT_POINTS = 3
INCIDENT_RADIUS = 2
NUMBER_DISPTACH_POINTS = 25
AMBOWORLD_SIZE = 50
INCIDENT_INTERVAL = 20
EPOCHS = 2
AMBO_SPEED = 60
AMBO_FREE_FROM_HOSPITAL = False

In [3]:
all_results = dict()

## Random action model

In [4]:
results = dict()
results['call_to_arrival'] = []
results['assign_to_arrival'] = []
results['demand_met'] = []

sim = Env(
        random_seed = RANDOM_SEED,
        duration_incidents = SIM_DURATION,
        number_ambulances = NUMBER_AMBULANCES,
        number_incident_points = NUMBER_INCIDENT_POINTS,
        incident_interval = INCIDENT_INTERVAL,
        number_epochs = EPOCHS,
        number_dispatch_points = NUMBER_DISPTACH_POINTS,
        incident_range = INCIDENT_RADIUS,
        max_size = AMBOWORLD_SIZE,
        ambo_kph = AMBO_SPEED,
        ambo_free_from_hospital = AMBO_FREE_FROM_HOSPITAL
    )

for run in range(TEST_EPISODES):
    
    # Reset game environment and get first state observations
    state = sim.reset()

    # Continue loop until episode complete
    while True:    
        action = random.randint(0, sim.action_number -1)
        state_next, reward, terminal, info = sim.step(action)
        
        # Actions to take if end of game episode
        if terminal:
          
            print(f'Run: {run}, ', end='')
            mean_assignment_to_arrival = np.mean(info['assignment_to_arrival'])
            print(f'Mean assignment to arrival: {mean_assignment_to_arrival:4.1f}, ', end='')
            mean_call_to_arrival = np.mean(info['call_to_arrival'])
            print(f'Mean call to arrival: {mean_call_to_arrival:4.1f}, ', end='')
            demand_met = info['fraction_demand_met']
            print(f'Demand met {demand_met:0.3f}')
            
            results['call_to_arrival'].append(mean_call_to_arrival)
            results['assign_to_arrival'].append(mean_assignment_to_arrival)
            results['demand_met'].append(demand_met)
            
            break
            
results = pd.DataFrame(results)
filename = './output/results_random_action.csv'
results.to_csv(filename, index=False)

Run: 0, Mean assignment to arrival: 18.4, Mean call to arrival: 19.0, Demand met 1.000
Run: 1, Mean assignment to arrival: 18.4, Mean call to arrival: 18.9, Demand met 1.000
Run: 2, Mean assignment to arrival: 18.5, Mean call to arrival: 19.1, Demand met 1.000
Run: 3, Mean assignment to arrival: 18.6, Mean call to arrival: 19.1, Demand met 1.000
Run: 4, Mean assignment to arrival: 18.4, Mean call to arrival: 19.0, Demand met 1.000
Run: 5, Mean assignment to arrival: 18.3, Mean call to arrival: 18.9, Demand met 1.000
Run: 6, Mean assignment to arrival: 18.0, Mean call to arrival: 18.5, Demand met 1.000
Run: 7, Mean assignment to arrival: 18.2, Mean call to arrival: 18.7, Demand met 1.000
Run: 8, Mean assignment to arrival: 18.5, Mean call to arrival: 19.0, Demand met 1.000
Run: 9, Mean assignment to arrival: 18.6, Mean call to arrival: 19.1, Demand met 1.000
Run: 10, Mean assignment to arrival: 18.4, Mean call to arrival: 18.9, Demand met 1.000
Run: 11, Mean assignment to arrival: 18.5,

In [5]:
results

Unnamed: 0,call_to_arrival,assign_to_arrival,demand_met
0,18.956942,18.436138,1.0
1,18.850616,18.351715,1.0
2,19.062242,18.547764,1.0
3,19.086208,18.581995,1.0
4,18.958761,18.443757,1.0
5,18.857146,18.349258,1.0
6,18.472739,17.974449,1.0
7,18.726316,18.218529,1.0
8,19.010335,18.508884,1.0
9,19.082648,18.572006,1.0


In [6]:
results.describe()

Unnamed: 0,call_to_arrival,assign_to_arrival,demand_met
count,30.0,30.0,30.0
mean,18.97036,18.464876,0.999867
std,0.162865,0.162407,0.000346
min,18.472739,17.974449,0.999
25%,18.873491,18.370422,1.0
50%,18.972356,18.463838,1.0
75%,19.059722,18.555847,1.0
max,19.402174,18.898577,1.0
