In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import itertools
import time
from tqdm import tqdm
import sympy as sp
x = sp.var('x')
from sympy import Symbol
from sympy.solvers import solve
import math
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

from tqdm import tqdm

# Import Markov Decision Module
import MDP

# Logger
from scipy.integrate import odeint
from datetime import datetime
import os

In [24]:
# Initial parameters
mu = 0.00015
gamma = 0.0027
beta = 0.0096
phi = 0.0044

kappa = 0.8  #C^I/C^S
theta = 1.7 #C^Z/C^S

# R values:
Rmu = mu/(mu+ gamma)
Rphi = phi/(mu + gamma)

### Adaptive parameters
b_s = 24
b_i = 24
b_z = 24
a_i = 0
a_z = 0
a_s = 0 
nu = 0.1375
tau = 3
delta = 0.9986

# Immediate rewards:
def u_s(a):
    return (b_s*a - a**2)**nu - a_s
def u_i(a):
    return 0
def u_z(a):
    return (b_z*a - a**2)**nu - a_z

### Simulation parameters
t_max = 1000
steps = 100
x00 = [9999, 1, 0]
max_contacts = 30

actions = np.linspace(0, max_contacts, 100)

In [25]:
cs_max = actions[np.nanargmax([u_s(a) for a in actions])]
ci_max = actions[np.nanargmax([u_i(a) for a in actions])]
if ci_max == 0:
    ci_max = cs_max
cz_max = actions[np.nanargmax([u_z(a) for a in actions])]

In [26]:
horizon = 5
cs_star, ci_star, cz_star = cs_max, ci_max, cz_max

xt0 = [1000, 5000, 4000]
states = [0, 1, 2]

S = xt0[0]
I = xt0[1]
Z = xt0[2]

phi_t = S*cs_star + I*ci_star + Z*cz_star

# Transition Probabilities:
def P_si(a):
    P_it = 1 - math.exp(-1*(beta*ci_star*I*a)/phi_t)
    return P_it

def P_ss(a):
    return 1 - P_si(a)

def P_sz(a):
    return 0

def P_is(a):
    return 0

def P_ii(a):
    return math.exp(-1*gamma)

def P_iz(a):
    return 1 - math.exp(-1*gamma)

def P_zs(a):
    return 0

def P_zi(a):
    return P_si(a)*0.97

def P_zz(a):
    return 1 - P_zi(a)

trans_prob_mat = np.array([
    [P_ss, P_si, P_sz],
    [P_is, P_ii, P_iz],
    [P_zs, P_zi, P_zz]
])

reward_vector = np.array([u_s, u_i, u_z])
trans_probs = [trans_prob_mat]*horizon
rewards = [reward_vector]*horizon

init_point_use = [np.nanmax([u_s(a) for a in actions]),
                  0,
                  np.nanmax([u_z(a) for a in actions])]

# Use a Markov Decision Process with finite horizon to obtain the optimal policy and decision.
MDP_adaptive = MDP.MDP(states, actions, rewards, trans_probs, horizon, delta)
MDP_adaptive.fit_optimal_values(init_point = init_point_use)

In [27]:
MDP_adaptive.policies

[{0: 4.545454545454546, 1: 0.0, 2: 4.848484848484849},
 {0: 5.454545454545455, 1: 0.0, 2: 5.454545454545455},
 {0: 6.363636363636364, 1: 0.0, 2: 6.363636363636364},
 {0: 7.878787878787879, 1: 0.0, 2: 7.878787878787879},
 {0: 9.696969696969697, 1: 0.0, 2: 9.696969696969697}]

In [28]:
u_s_vals = [u_s(a) for a in actions]
p_ss_vals = [P_ss(a) for a in actions]
p_si_vals = [P_si(a) for a in actions]
# plt.plot(np.linspace(0, max(actions), len(actions)), u_s_vals, label="Utility u_s")
# plt.plot(np.linspace(0, max(actions), len(actions)), p_ss_vals, label="P_ss")
# plt.plot(np.linspace(0, max(actions), len(actions)), p_si_vals, label="P_si")
# plt.xlabel("Actions")
# plt.ylabel("Utility")
# plt.legend(loc = "upper right")
# plt.rcParams["figure.figsize"] = (10,6)
# plt.show()

In [29]:
np.nanmean(p_si_vals)

0.0686486666064388

In [29]:
u_s_vals = [u_s(a) for a in actions]
p_si_vals = [P_si(a) for a in actions]
u_s_gradient = np.gradient(u_s_vals)
p_si_gradient = np.gradient(p_si_vals)
values = [u_s_gradient[i]/(delta*p_si_gradient[i]) for i in range(len(u_s_gradient))]
max_values = np.nanmax(values)
max_values

903.4862512790107

array([ 1.31136804e+00,  7.19975427e-01,  1.04207594e-01,  6.91424329e-02,
        5.23169852e-02,  4.21571819e-02,  3.52695624e-02,  3.02532487e-02,
        2.64150558e-02,  2.33698018e-02,  2.08852336e-02,  1.88124887e-02,
        1.70515037e-02,  1.55323951e-02,  1.42047975e-02,  1.30314409e-02,
        1.19841182e-02,  1.10410618e-02,  1.01851849e-02,  9.40287073e-03,
        8.68312032e-03,  8.01693994e-03,  7.39689281e-03,  6.81676566e-03,
        6.27131712e-03,  5.75608510e-03,  5.26723764e-03,  4.80145592e-03,
        4.35584162e-03,  3.92784264e-03,  3.51519304e-03,  3.11586391e-03,
        2.72802280e-03,  2.34999976e-03,  1.98025867e-03,  1.61737276e-03,
        1.26000321e-03,  9.06880367e-04,  5.56786839e-04,  2.08541974e-04,
       -1.39012666e-04, -4.87027273e-04, -8.36657532e-04, -1.18907851e-03,
       -1.54549906e-03, -1.90717706e-03, -2.27543607e-03, -2.65168367e-03,
       -3.03743224e-03, -3.43432281e-03, -3.84415282e-03, -4.26890889e-03,
       -4.71080591e-03, -