In [16]:
from cProfile import label
import numpy as np
import matplotlib.pyplot as plt
import nashpy as nash
import itertools
import cvxpy as cp
import pickle

from compute_solns import *

# Set up game

game_types = ["RHRHU", "HRHU", "RHU", "HU", "U_pos", "U_neg", "RU", "HRU", "RHRU", "NULL"]
abs_util = [0.1,0.2,0.5]
state_names = []
for g in game_types:
    for u in abs_util:
        state_names.append((g,u))

round = 2
risk_levels = np.round(np.linspace(0,5,51),round)
states = []
for name in state_names:
    for risk in risk_levels:
        states.append((name, risk))

human_actions = []
for r in abs_util:
    for a in [1,2]:
        human_actions.append((a,r))

robot_actions = [1,2]

gamma = 1
T = 10

def project_risk(r):
    # r = np.round(ra,round)
    # print(ra,r,end=",")
    return np.max([l for l in risk_levels if l <= r ]) # TODO : Change projection to take worst case

def tr(state, uH, uR, dr, check=False, initial="NULL"):
    (g,u) = state[0]
    g_next = g
    u_next = u
    if g == "RHRHU":
        if uR == 1:
            g_next = "HRHU"
        else:
            g_next = "NULL"
    elif g == "HRHU":
        if uH[0] == 1:
            g_next = "RHU"
        else:
            g_next = "NULL"
        u_next = uH[1]
    elif g == "RHU":
        if uR == 1:
            g_next = "HU"
        else:
            g_next = "NULL"
    elif g == "HU":
        if uH[0] == 1:
            g_next = "U_pos"
        else:
            g_next = "U_neg"
    elif g == "U_pos":
        g_next = "NULL"
    elif g == "NULL":
        g_next = initial
    elif g == "RHRU":
        if uR == 1:
            g_next = "HRU"
        else:
            g_next = "NULL"
    elif g == "HRU":
        if uH[0] == 1:
            g_next = "RU"
        else:
            g_next = "NULL"
        u_next = uH[1]
    elif g == "RU":
        if uR == 1:
            g_next = "U_pos"
        else:
            g_next = "U_neg"
    elif g == "U_neg":
        g_next = "NULL"

    if check and state[1] + dr < 0:
        print("RIP: Risking more than you've won")

    r = project_risk(state[1]+dr) # risk_levels[np.argmin([np.abs(state[1]+dr-l) for l in risk_levels])]
    return ((g_next,u_next),r)
        
def reward(state, uH, uR):
    (g,u) = state[0]
    if g == "U_pos":
        social = u
    elif g == "U_neg":
        social = -u
    else:
        social = 0
    return (social,social,social)

initial = "RHU"
game = {
    "states":states, "risk_levels":risk_levels,
    "human_actions":human_actions, "robot_actions":robot_actions,
    "transition":(lambda state, uH, uR, dr : tr(state, uH, uR, dr, check=False, initial=initial)), 
    "reward":reward,
    "gamma":gamma, "T":T,
    "misc": {"round":round}
}

In [13]:
v_funs_adv = compute_baseline(game)

9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 

In [14]:
states,human_actions,robot_actions,transition,reward,gamma,T = game["states"],game["human_actions"],game["robot_actions"],game["transition"],game["reward"],game["gamma"],game["T"]

# Initialize value functions and optimal control storage dictionaries
v_funs = {i:{s:None for s in states} for i in range(T+1)}
v_funs_adv_p = {i:{s:None for s in states} for i in range(T+1)}
uHs = {i:{s:None for s in states} for i in range(T)}
uRs_coop = {i:{s:None for s in states} for i in range(T)}
uRs_adv = {i:{s:None for s in states} for i in range(T)}

for s in states:
    v_funs[T][s] = 0
    v_funs_adv_p[T][s] = 0

In [17]:
# Compute ex post solution
t = T-1
while t >= 0:
    for s in states:
        m,n = len(human_actions), len(robot_actions)
        A_S = np.zeros(shape=(m,n))
        A_H = np.zeros(shape=(m,n))

        p_H = cp.Variable((m,1))
        constraints = []

        for i in range(m):
            for j in range(n):
                uH = human_actions[i]
                uR = robot_actions[j]

                dr = v_funs_adv[t+1][transition(s,uH,uR,0)] - v_funs_adv[t][s]
                s_next = transition(s,uH,uR,dr)
                q_coop = reward(s, uH, uR)[0] + gamma*v_funs[t+1][s_next]
                A_S[i,j] = q_coop

                q_adv = reward(s, uH, uR)[1] + gamma*v_funs_adv_p[t+1][s_next]
                A_H[i,j] = q_adv

        constraints.extend([p_H>=0, cp.sum(p_H)==1])
        for j in range(n):
            constraints.append(p_H.T @ A_H[:,j] >= v_funs_adv[t][s] - s[1])

        vals = []
        p_Hs = []
        for j in range(n):
            objective = cp.Maximize(p_H.T @ A_S[:,j])
            prob = cp.Problem(objective, constraints)
            val_j = prob.solve()
            vals.append(val_j)
            p_Hs.append(p_H.value)

        opt_val = np.max(vals)
        opt_p_H = p_Hs[np.argmax(vals)]
        v_funs[t][s] = opt_val
        v_funs_adv_p[t][s] = np.min(opt_p_H.T @ A_H)
        
        uR_adv = np.zeros((n,1))
        uR_coop = np.zeros((n,1))

        uR_adv[np.argmin(opt_p_H.T @ A_H),0] = 1
        uR_coop[np.argmax(vals),0] = 1

        uRs_adv[t][s] = uR_adv
        uRs_coop[t][s] = uR_coop
        uHs[t][s] = opt_p_H
    t = t - 1

0.0 0.0,-1.8349909302677534e-11 -0.0,0.0 0.0,-1.8349909302677534e-11 -0.0,0.0 0.0,-1.8349909302677534e-11 -0.0,0.0 0.0,-1.8349909302677534e-11 -0.0,0.0 0.0,-1.8349909302677534e-11 -0.0,0.0 0.0,-1.8349909302677534e-11 -0.0,0.0 0.0,-1.8349909302677534e-11 -0.0,0.0 0.0,-1.8349909302677534e-11 -0.0,0.0 0.0,-1.8349909302677534e-11 -0.0,0.0 0.0,-1.8349909302677534e-11 -0.0,0.0 0.0,-1.8349909302677534e-11 -0.0,0.0 0.0,-1.8349909302677534e-11 -0.0,0.1 0.1,0.0999999999816501 0.1,0.1 0.1,0.0999999999816501 0.1,0.1 0.1,0.0999999999816501 0.1,0.1 0.1,0.0999999999816501 0.1,0.1 0.1,0.0999999999816501 0.1,0.1 0.1,0.0999999999816501 0.1,0.1 0.1,0.0999999999816501 0.1,0.1 0.1,0.0999999999816501 0.1,0.1 0.1,0.0999999999816501 0.1,0.1 0.1,0.0999999999816501 0.1,0.1 0.1,0.0999999999816501 0.1,0.1 0.1,0.0999999999816501 0.1,0.2 0.2,0.1999999999816501 0.2,0.2 0.2,0.1999999999816501 0.2,0.2 0.2,0.1999999999816501 0.2,0.2 0.2,0.1999999999816501 0.2,0.2 0.2,0.1999999999816501 0.2,0.2 0.2,0.1999999999816501 0.

ValueError: zero-size array to reduction operation maximum which has no identity