# Epidemic Environment Tests 
This notebook contains tests simple policies (observant, random, regular_culls) and scripted policies (SP1, SP2, SP3) for the environments.  
Before use, please edit the path to the environment folder.  
Then, please follow the instructions in "Initialization".

In [None]:
# Path to environment folder
import sys
sys.path.insert(1, '/home/jovyan/Masterarbeit/reinforce-one/Environments')
sys.path.insert(1, '/home/jovyan/Masterarbeit/reinforce-one/Environments/Variations')

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import functools
import os
import time

from absl import app
from absl import logging

import gin
from six.moves import range
import tensorflow as tf  # pylint: disable=g-explicit-tensorflow-version-import

import matplotlib
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
matplotlib.rcParams.update({'font.size': 17})
#plt.ioff() 

from tf_agents.agents.ddpg import actor_rnn_network
from tf_agents.agents.ddpg import critic_rnn_network
from tf_agents.agents.ddpg import ddpg_agent
from tf_agents.drivers import dynamic_episode_driver
from tf_agents.environments import suite_dm_control
from tf_agents.environments import tf_py_environment
from tf_agents.environments import py_environment
from tf_agents.environments import wrappers
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.utils import common

import numpy as np
from tf_agents.environments import utils
from tf_agents.trajectories.time_step import StepType
from tf_agents.trajectories import TimeStep
from tf_agents.policies import scripted_py_policy
from tf_agents.policies import random_py_policy
from tf_agents.policies import policy_saver
from tf_agents.metrics import py_metrics
from tf_agents.drivers import py_driver
from tf_agents.specs import tensor_spec
from tf_agents.networks import sequential

from EE1 import EE1
from EE0 import EE0
from EE0_5 import EE0_5
from EE0_A import EE0_A
from EE0_NT import EE0_NT
from EE1_A import EE1_A

## Initialization  
Initialize an environment with parameters used globally (set py_env and global params).  
Each section has comments that introduce them in detail.  

num_episodes determines the number of episodes that each policy is tested across in py_env.  
Executing all cells tests the random policy, SP1, SP2 and SP3 across num_episodes in py_env.  

Results of these can be viewed in "Testing" section.  
Afterwards, cull threshhold, test interval and test percentage are optimized for py_env.  
Then, the newly optimized scripted policy is tested across num_episodes in py_env.

In [None]:
# Global parameters
num_herds = 2
total_population = 300
weeks_until_testresults = 0.
average_episode_length = 200
# Optimize a "regularly cull 2 herds" - policy, takes a while
opt_regular_cull_policy = False
# If runtime too long, reduce "num episodes"
num_episodes = 1000
make_plots = False

py_env = EE0(num_herds = num_herds, 
             total_population = total_population, 
             fix_episode_length=True, 
             weeks_until_testresults = weeks_until_testresults,
             average_episode_length = average_episode_length)

## Plotting

First, define plot functions.  
plot_infectious: plots the number of positive tests as a percentage of all tests (for observant policy: number of infectious).  
plot_actions_and_states: plots test sizes and frequency, then plots positive tests and culls.

In [None]:
def plot_infectious(inf_list, num_herds):
    colors = ['b', 'g', 'r', 'y', 'k']
    t = np.linspace(0, len(inf_list), num=len(inf_list))
    plt.figure(figsize=(16,7))
    plt.title('Infectious Subjects over Time')
    plt.xlabel('Time Steps')
    plt.ylabel('Percentage of Herd')
    ymax = min(1.1, np.amax(inf_list)+0.01)
    plt.ylim(-0.01, ymax)
    inf_list = np.array(inf_list)
    print(inf_list.shape)
    for k in range (0, num_herds):
        tmp_label = 'Infectious Subjects Herd ' + str(k+1)
        plt.plot(t, inf_list[:,k], color=colors[(k % len(colors))], label = tmp_label, marker = '', linestyle = '-',alpha=0.7)            
    plt.legend()
    plt.show()
    plt.close()
    return None

In [None]:
def plot_actions_and_states(action_list, inf_list, tests_list):
        if (num_herds != 2):
            print('Plotting only works for 2 herds.')
            return None
        t = np.linspace(0, len(action_list), num=len(action_list))
        plt.figure(figsize=(20,10))
        plt.title('(A) Tests over Time')
        plt.xlabel('Time Step')
        plt.ylabel('Percentage of Herd')
        plt.ylim(-0.01, 1.01)
        #p3.set_yscale('log')
        n_tests_h1, n_tests_h2, replace_h1, replace_h2, inf_h1, inf_h2 = [], [], [], [], [], []
        tests_h1, tests_h2 = [],[]
        for i in range(len(action_list)):
            n_tests_h1.append(action_list[i][0]/150)
            n_tests_h2.append(action_list[i][1]/150)
            '''            
            if action_list[i][0] < (1/3):
                n_tests_h1.append(0)
            elif action_list[i][0] < (2/3):
                n_tests_h1.append(0.5)
            else: 
                n_tests_h1.append(1)
            if action_list[i][1] < (1/3):
                n_tests_h2.append(0)
            elif action_list[i][1] < (2/3):
                n_tests_h2.append(0.5)
            else: 
                n_tests_h2.append(1)
            '''

            replace_h1.append(action_list[i][2])
            replace_h2.append(action_list[i][3])
            inf_h1.append(inf_list[i][0])
            inf_h2.append(inf_list[i][1])
            tests_h1.append(tests_list[i][0])
            tests_h2.append(tests_list[i][1])            
        plt.plot(t, n_tests_h1, color='fuchsia', label = 'Number of Tests Herd 1', marker = '', linestyle = '-', alpha=0.7)
        plt.plot(t, n_tests_h2, color='mediumblue', label = 'Number of Tests Herd 2', marker = '', linestyle = '-', alpha=0.7)
        plt.legend()
        plt.show()
        plt.close()
        
        plt.figure(figsize=(20,10))
        plt.title('(B) Correlation of Testresults and Culls')
        plt.xlabel('Time Step')
        plt.ylabel('Percentage of Tests')
        ymax_p3 = min(1.1, max(max(inf_h1)+0.01,max(inf_h2)+0.01))
        plt.ylim(-0.01, ymax_p3+0.01)
        plt.plot(t, inf_h1, color='fuchsia', label = 'Positive Tests Herd 1', marker = '', linestyle = '-', alpha=0.7)
        plt.plot(t, inf_h2, color='mediumblue', label = 'Positive Tests Herd 2', marker = '', linestyle = '-', alpha=0.7)
        first = True
        second = True
        for j in range(0,len(replace_h1)):
            if first:
                if replace_h1[j] == 1:
                    plt.scatter(x=j, y=inf_h1[j], s = 20+tests_h1[j]*300, c='limegreen', marker = 'o', edgecolors='black', label = 'Culls Herd 1')
                    if(tests_h1[j] > 0.):
                        plt.annotate(str(round(tests_h1[j],2)),(j+3, inf_h1[j]))
                    first = False
            else: 
                if replace_h1[j] == 1:
                    plt.scatter(x=j, y=inf_h1[j], s = 20+tests_h1[j]*300, c='limegreen', marker = 'o', edgecolors='black')
                    if(tests_h1[j] > 0.):
                        plt.annotate(str(round(tests_h1[j],2)),(j+3, inf_h1[j]))
            if second:
                if replace_h2[j] == 1:
                    plt.scatter(x=j, y=inf_h2[j], s = 20+tests_h2[j]*300, c='yellow', marker = 'o',edgecolors='black', label = 'Culls Herd 2')
                    if(tests_h1[j] > 0.):
                        plt.annotate(str(round(tests_h2[j],2)),(j+3, inf_h2[j]))
                    second = False
            else: 
                if replace_h2[j] == 1:
                    plt.scatter(x=j, y=inf_h2[j], s = 20+tests_h2[j]*300, c='yellow', marker = 'o',edgecolors='black')
                    if(tests_h1[j] > 0.):
                        plt.annotate(str(round(tests_h2[j],2)),(j+3, inf_h2[j]))
        plt.legend()
        plt.show()
        plt.close()
        return None

## Simple Policies
make_scripted_policy: Creates scripted policies that cull both herds in regular intervals.

In [None]:
def make_scripted_policy(environment, num_zero_actions, max_episode_length):
    
    action_script = [(num_zero_actions, [0,0,0,0]),  
                      (1, [0,0,1,1])] * int(1+max_episode_length)
    
    scr_pol = scripted_py_policy.ScriptedPyPolicy(
                time_step_spec=environment.time_step_spec(),
                action_spec=environment.action_spec(),
                action_script=action_script)
    return scr_pol

Create a random policy.

In [None]:
random_policy = random_py_policy.RandomPyPolicy(time_step_spec=py_env.time_step_spec(), 
                                                action_spec=py_env.action_spec())

## Testing

First, write a function that tests an environment with any policy.  
test_env: Outputs average return across a set number of episodes with var and stddev, also returns average number of culls.

In [None]:
def test_env(environment, policy, num_episodes = 50, num_herds = 2):
    if isinstance(environment, py_environment.PyEnvironment):
        total_return = []
        culls = 0 
        for e in range(num_episodes):
            time_step = environment.reset()
            if isinstance(policy, scripted_py_policy.ScriptedPyPolicy):
                policy_state = policy.get_initial_state() 
            else:
                policy_state = policy.get_initial_state(batch_size=1) 
            episode_return = 0.0
            i=0
            while not time_step.is_last():
                i+=1
                action_step = policy.action(time_step, policy_state)
                for j in range (num_herds, num_herds*2):
                    if action_step.action[j] >= 0.5:
                        culls += 1
                policy_state = action_step.state
                time_step = environment.step(action_step.action)

                episode_return += time_step.reward

            total_return.append(episode_return)
        culls /= num_episodes
        average_return = np.average(total_return)
        variance = np.var(total_return)
        standard_deviation = np.std(total_return)
        return culls, average_return, variance, standard_deviation
    else:
        return None

Tests can begin, plot results of observant policy for one episode.

In [None]:
if(num_herds==2 and make_plots):
    step = py_env.reset()
    inf = []
    inf_temp = np.zeros(num_herds, np.float32)
    inf_temp[0] = step.observation[2]
    inf_temp[1] = step.observation[6]
    inf.append([step.observation[2],step.observation[6]])
    returns = 0
    while not step.is_last():
        step = py_env.step([1.,1.,0.,0.])
        inf_temp = np.zeros(num_herds, np.float32)
        inf_temp[0] = step.observation[2]
        inf_temp[1] = step.observation[6]
        inf.append(np.array(inf_temp))
        returns += step.reward
        #print('Step: ', step.observation)''
    print('Return: ', returns)
    plot_infectious(inf, num_herds = 2)
else:
    print('Not for more than 2 herds.')

Then, optimize a scripted policy that regularly culls both herds.  
Warning: this can take a while (reliability of optimization and runtime depend on num_episodes in line 5 and range).

In [None]:
if(num_herds == 2 and opt_regular_cull_policy):
    returns = []
    for i in range (20,80):
        scr_pol = make_scripted_policy(py_env, i, num_episodes)
        culls, avg_return, var, std = test_env(py_env, scr_pol, num_episodes = 100)
        returns.append(avg_return)
    maximum_avg_return = np.amax(returns)
    index = returns.index(maximum_avg_return)
    steps_before_cull = 20 + index
    print('Maximum Average Return = {0}, Steps before Cull = {1}'.format(maximum_avg_return, steps_before_cull))
    scr_pol = make_scripted_policy(py_env, steps_before_cull, 1000)
    culls, avg_return, var, std = test_env(py_env, scr_pol, num_episodes = 10000)
    print('Re-Tested Maximum Average Return = ', avg_return)
    print('Variance = ', var)
    print('Stddev = ', std)

Test random policy.

In [None]:
culls, avg_return, var, std = test_env(py_env, random_policy , num_episodes = num_episodes)
print('average return = {0} culls = {1}'.format(avg_return, culls))
print('Variance = ', var)
print('Stddev = ', std)

Additionally, create a Pseudo-Agent that culls a herd if a threshhold is breached,  
and tests a fixed percentage of each herd in a fixed interval (SP1, SP2, SP3). 
Works for n herds.

In [None]:
# Test j percent of all every k steps, cull if more than l percent of tests positive
def PseudoAgent(env, num_episodes, cull_threshhold = 0.026, test_percentage = 1., test_interval = 1, plots = False):
    assert num_episodes >= 0, "Please enter a positive integer for episode number."
    # Outputs
    total_culls = np.zeros((num_herds,), np.int32)
    total_return = []
    if(plots):
        actionss = []
        infectious = []
        perc_tested = []
    testresults_time = weeks_until_testresults / average_episode_length
    test_inter = test_interval
    for i in range (0, num_episodes):
        time_step = env.reset()
        episode_return = 0.
        counter = 0
        while not time_step.is_last():
            counter += 1
            act = np.zeros((num_herds*2,), np.float32)
            if (test_inter > 0):
                if (counter % test_inter == 0):
                    for k in range (0, num_herds):
                        act[k] = test_percentage
            for l in range (0, num_herds):
                if (test_inter > 0):
                    if (np.float32(time_step.observation[(l*4)+2]) >= np.float32(cull_threshhold) 
                        and np.float32(time_step.observation[(l*4)]) == np.float32(testresults_time)):
                        act[l+num_herds] = 1.
                        total_culls[l] += 1 
                else:
                    if np.float32(time_step.observation[(l*4)+2]) >= np.float32(cull_threshhold):
                        act[l+num_herds] = 1.
                        total_culls[l] += 1 
            # makeplots
            if i == np.int32(num_episodes/2) and plots:
                actionss.append(act)
                inf_percentages = np.zeros(num_herds, np.float32)
                perc_test = np.zeros(num_herds, np.float32)
                # Take observation instead of actual state, since get_state doesnt work
                state = time_step.observation
                for d in range (0, num_herds):
                    # Assumes Env has observations as in EE0
                    inf_percentages[d] = (state[(d*4)+2])
                    perc_test[d] = state[(d*4)+1]
                infectious.append(inf_percentages)
                perc_tested.append(perc_test)
            time_step = env.step(act)
            episode_return += time_step.reward
        total_return.append(episode_return)

    if plots:
        plot_actions_and_states(action_list = actionss, inf_list = infectious, tests_list=perc_tested)
    culls = np.sum(total_culls) / num_episodes
    average_return = np.average(total_return)
    variance = np.var(total_return)
    standard_deviation = np.std(total_return)
    return culls, average_return, variance, standard_deviation

Execute PseudoAgent with settings for SP1, SP2 and SP3.  
Creates a plot of the actions across one episode in the end.  
Runtime scales quickly with the number of episodes.

In [None]:
# SP3
c , ar, var, std = PseudoAgent(py_env, num_episodes, 0.016, 1., 1, make_plots)    
print('Average Culls per Episode: ', c)
print('Average Return: ', ar)
print('Variance: ', var)
print('Standard Deviation: ', std)

In [None]:
# SP2
c , ar, var, std = PseudoAgent(py_env, num_episodes, 0.016, 1., 10, make_plots) 
print('Average Culls per Episode: ', c)
print('Average Return: ', ar)
print('Variance: ', var)
print('Standard Deviation: ', std)

In [None]:
# SP1
c , ar, var, std = PseudoAgent(py_env, num_episodes, 0.016, 0.8, 10, make_plots)
print('Average Culls per Episode: ', c)
print('Average Return: ', ar)
print('Variance: ', var)
print('Standard Deviation: ', std)

## Optimization  
Define functions that optimize the parameters of the PseudoAgent for an environment.

In [None]:
# Optimize PAs for cull threshhold
def iterate_cts(ct_start = 0.01, 
                ct_end = 0.1, 
                steps = 0.01, 
                environment = py_env, 
                num_episodes = 200, 
                fn = PseudoAgent):
    ar_list = []
    not_end = True
    i = ct_start
    while not_end:
        if i >= (ct_end):
            not_end = False
        c, ar, var, std = fn(environment, num_episodes, i)
        ar_list.append(ar)
        if (np.amax(ar_list)-ar) > 15:
            not_end = False
        i += steps
    ar_max = np.amax(ar_list)
    max_index = ar_list.index(ar_max)
    ct_max = ct_start + (steps * max_index)
    return ar_max, ar_list, ct_max

def optimize_cull_threshhold(ct_start = 0.01, 
                             ct_end = 0.1, 
                             steps = 0.01, 
                             environment = py_env, 
                             num_episodes = 1000, 
                             fn = PseudoAgent):
    
    ar_max, ar_list, ct_max = iterate_cts(ct_start, 
                                          ct_end, 
                                          steps, 
                                          py_env, 
                                          num_episodes, 
                                          fn = fn)
    ct_start = max(0, ct_max - (3*0.01))
    ct_end = ct_max + (3*0.01)

    ar_max, ar_list, ct_max = iterate_cts(ct_start, 
                                          ct_end, 
                                          0.002, 
                                          py_env, 
                                          num_episodes, 
                                          fn = fn)
    max_index = ar_list.index(ar_max)
    low = max(0, max_index - 5)
    high = min(len(ar_list), max_index + 5)
    print('Average Returns of PseudoAgent with different cull threshholds: ', ar_list[low:high])
    print('Maximum average return: {0}, and corresponding cull threshhold: {1}.'.format(ar_max, ct_max))
    return ar_max, ar_list, ct_max

def optimize_test_percentage(tp_start = 0.01, 
                             tp_end = 0.5, 
                             steps = 0.01, 
                             environment = py_env, 
                             num_episodes = 1000, 
                             fn = PseudoAgent, 
                             ct = 0.01,
                             ti = 1):
    ar_list = []
    not_end = True
    i = tp_start
    while not_end:
        if i >= (tp_end):
            not_end = False
        c, ar, var, std = fn(environment, num_episodes*2, ct, i, ti)
        ar_list.append(ar)
        if (np.amax(ar_list)-ar) > 15:
            not_end = False
        i += steps
    ar_max = np.amax(ar_list)
    max_index = ar_list.index(ar_max)
    tp_max = tp_start + (steps * max_index)
    return ar_max, ar_list, tp_max

def optimize_test_interval(ti_start = 1, 
                           ti_end = 50, 
                           steps = 1, 
                           environment = py_env, 
                           num_episodes = 1000, 
                           fn = PseudoAgent, 
                           ct = 0.01,
                           tp = 1.):
    ar_list = []
    not_end = True
    i = ti_start
    while not_end:
        if i >= (ti_end):
            not_end = False
        c, ar, var, std = fn(environment, num_episodes*2, ct, tp, i) 
        ar_list.append(ar)
        if (np.amax(ar_list)-ar) > 15:
            not_end = False
        i += steps
    ar_max = np.amax(ar_list)
    max_index = ar_list.index(ar_max)
    ti_max = ti_start + (steps * max_index)
    return ar_max, ar_list, ti_max    

def opt_PA(environment = py_env, 
           num_episodes = 500, 
           fn = PseudoAgent,
           opt_ct = True,
           opt_tp = False,
           opt_ti = False):
    if opt_ct:
        ar_max, ar_list, ct_max = optimize_cull_threshhold(0.01,
                                                           0.2,
                                                           0.01,
                                                           environment,
                                                           num_episodes,
                                                           fn)
        max_index = ar_list.index(ar_max)
        low = max(0, max_index - 5)
        high = min(len(ar_list), max_index + 5)
        print('Average Returns of PseudoAgent with different cull threshholds: ', ar_list[low:high])
        print('Maximum average return: {0}, and corresponding cull threshhold: {1}.'.format(ar_max, ct_max))
        if opt_ti:
            ar_max, ar_list, ti_max = optimize_test_interval(1,
                                                             20,
                                                             1,
                                                             environment,
                                                             num_episodes,
                                                             fn,
                                                             ct_max)
            max_index = ar_list.index(ar_max)
            low = max(0, max_index - 5)
            high = min(len(ar_list), max_index + 5)
            print('Average Returns of PseudoAgent with different test interval: ', ar_list[low:high])
            print('Maximum average return: {0}, and corresponding test interval: {1}.'.format(ar_max, ti_max))
            if opt_tp:
                ar_max, ar_list, tp_max = optimize_test_percentage(0.05,
                                                                   1.0,
                                                                   0.05,
                                                                   environment,
                                                                   num_episodes,
                                                                   fn,
                                                                   ct_max,
                                                                   ti_max)
                max_index = ar_list.index(ar_max)
                low = max(0, max_index - 5)
                high = min(len(ar_list), max_index + 5)
                print('Average Returns of PseudoAgent with different test percentages: ', ar_list[low:high])
                print('Maximum average return: {0}, and corresponding test percentage: {1}.'.format(ar_max, tp_max))
                return ar_max, ar_list, ct_max, ti_max, tp_max
            else:
                return ar_max, ar_list, ct_max, ti_max, -1
                
        else: 
            return ar_max, ar_list, ct_max, -1, -1
    else:
        return -1, -1, -1, -1, -1

In [None]:
'''
ar_max = maximum average return.
ar_list = avg. returns with slightly different test percentages/test intervals/cull threshholds, depending on what is last optimized.
ct_max = cull threshhold that achieved the maximum average return across num_episodes episodes.
ti_max = test interval that achieved the maximum average return across num_episodes episodes (with ct_max fixed).
tp_max = test percentage that achieved the maximum average return across num_episodes episodes (with ct_max, ti_max fixed).
''' 
ar_max, ar_list, ct_max, ti_max, tp_max=  opt_PA(environment = py_env, 
                                                  num_episodes = num_episodes,
                                                  fn = PseudoAgent,
                                                  opt_ct = True,
                                                  opt_tp = True,
                                                  opt_ti = True)

c , ar, var, std = PseudoAgent(py_env, num_episodes, ct_max, tp_max, ti_max, make_plots)  
print('Average Culls per Episode: ', c)
print('Average Return: ', ar)
print('Variance: ', var)
print('Standard Deviation: ', std)