In [23]:
import time
import random
import numpy as np
import pandas as pd
from scipy import stats
from datetime import datetime
import matplotlib.pyplot as plt
from collections import namedtuple

In [24]:
num_signals = [2,5]
max_value = 100
min_value = 10
step_size = 5
max_unc = 50
num_uncs = 5

In [25]:
State = namedtuple('State', ['num_signals', 'unc', 'mid', 'clock_price'])
Mid_unc_prob = namedtuple('Mid_unc_prob', ['num_signals', 'unc', 'mid', 'true_value'])
Opp_in = namedtuple('Opp_in', ['num_signals', 'unc', 'mid', 'clock_price', 'true_value'])
True_prob = namedtuple('True_prob', ['num_signals', 'unc', 'mid', 'clock_price', 'true_value'])

In [26]:
#true value is uniformly drawn from min_value and max_value
def prob_true(true_value):
  t_range = range(min_value, max_value + step_size, step_size)
  return 1/len(t_range) if true_value in t_range else 0

In [27]:
def possible_midpoint(true_value, unc):
    #Given an uncertainty, we can find a set of possible midpoints as the true value is in (min_value, max_value)
    possible_midpoint = [i for i in range(true_value - int(unc * 0.5), true_value + int(unc * 0.5) + step_size, step_size)]
    return possible_midpoint

In [28]:
min_mid = possible_midpoint(min_value, max_unc)[0]
max_mid = possible_midpoint(max_value, max_unc)[-1]

In [29]:
def prob_midpoint(true_value, mid, unc):
    #Return P(mid|num_signals, uncertainty, true_value)
    #Midpoint is uniformly drawn from the possible midpoint given an uncertainty
    if mid in possible_midpoint(true_value, unc):
        return 1/len(possible_midpoint(true_value, unc))
    else:
        return 0

In [30]:
def prob_unc(unc, num_signals):
    #Return P(unc|true_value, num_signals)
    #Uncertainty follows a Beta distribution
    uncs = np.linspace(min_value, max_unc + 2 * step_size, num_uncs + 1)
    unc_dist = stats.beta(num_signals, 2)
    unc_likes = unc_dist.pdf(1 - uncs/(max_unc + 2 * step_size))/(max_unc + 2 * step_size)
    unc_probs = unc_likes/np.sum(unc_likes)
    uncs_index = uncs.tolist().index(unc)
    return unc_probs[uncs_index]

In [31]:
def prob_mid_unc(true_value, mid, unc, num_signals):
    #P(mid, unc|num_signals, uncertainty, true_value)
    return prob_midpoint(true_value, mid, unc) * prob_unc(unc, num_signals)

In [32]:
mid_unc = []
for unc in range(min_value, max_unc + 2 * step_size, 2 * step_size):
    for mid in range(min_mid, max_mid + step_size, step_size):
      mid_unc.append([mid, unc])

In [33]:
#Return P(mid, unc|true) - given a true value, what is the probability of a pair of mid and unc?

# mid_unc_prob_table = {num_signals[0]: {}, num_signals[1]: {}}
# for signal in num_signals:
#     for unc in range(min_value, max_unc + 2 * step_size, 2 * step_size):
#         for mid in range(min_mid, max_mid + step_size, step_size):
#             for true in range(min_value, max_value + step_size, step_size):
#               mid_unc_prob_table[signal][Mid_unc_prob(signal, unc, mid, true)] = prob_mid_unc(true_value=true,mid=mid, unc=unc, num_signals=signal)

In [34]:
#Return P(mid, unc|true) - given a true value, what is the probability of a pair of mid and unc?

mid_unc_prob_table = {num_signals[0]: {}, num_signals[1]: {}}
for signal in num_signals:
  for true in range(min_value, max_value + step_size, step_size):
    for pair in mid_unc:
      mid_unc_prob_table[signal][Mid_unc_prob(signal, pair[1], pair[0], true)] = prob_mid_unc(true_value=true, mid=pair[0], unc=pair[1], num_signals=signal)

In [35]:
state_table = {num_signals[0]: [], num_signals[1]: []}
for signal in num_signals:
    for unc in range(min_value, max_unc + 2 * step_size, 2 * step_size):
        for mid in range(min_mid, max_mid + step_size, step_size):
            for price in range(min_value - step_size, max_value + step_size, step_size):
                state_table[signal].append(State(signal, unc, mid, price))

In [36]:
#value iteration - generate random value table and policy table at the beginning

init_value_table = {num_signals[0]: {}, num_signals[1]: {}}
for signal in num_signals:
    for unc in range(min_value, max_unc + 2 * step_size, 2 * step_size):
        for mid in range(min_mid, max_mid + step_size, step_size):
            for price in range(min_value - step_size, max_value + step_size, step_size):
                init_value_table[signal][State(signal, unc, mid, price)] = max(mid + 0.5 * unc - price,0) #initialy, a player drops when clock price approaches hid midpoint. Thus, his value is zero for all prices over his midpoint 

In [37]:
init_policy_table = {num_signals[0]: {}, num_signals[1]: {}}
for signal in num_signals:
    for unc in range(min_value, max_unc + 2 * step_size, 2 * step_size):
        for mid in range(min_mid, max_mid + step_size, step_size):
            for price in range(min_value - step_size, max_value + step_size, step_size):
                init_policy_table[signal][State(signal, unc, mid, price)] = 1 if (mid + int(0.5 * unc)) >= price else 0

In [38]:
def transition_matrix(state, policy_table, decision_table):
    #Return a matrix: [[In-In, Out-In], [In-Out, Out-Out]]
    #It's a subjective transition matrix. Given a state, I know my decision and need to guess his decision
    p1 = policy_table[state.num_signals][state]
    p2 = decision_table[state.num_signals][state]

    decision1 = np.array([p1, 1 - p1])
    decision2 = np.array([p2, 1 - p2]).reshape(2,1)
    trans_matrix = decision1 * decision2
    return trans_matrix

In [39]:
def expected_value(state, true_value_dist_table):
    total = 0
    for true in range(state.mid - int(0.5 * state.unc), state.mid + int(0.5 * state.unc) + step_size, step_size):
      if true in range(min_value, max_value + step_size, step_size):
        total += (true - state.clock_price) * true_value_dist_table[state.num_signals][True_prob(state.num_signals, state.unc, state.mid, state.clock_price, true)]
    return total

In [40]:
#Return P(Opp_in|true) - Given a true value, I need to guess the probability that he will stay in at a state.
#{'2': 'What probability I guess about his action based on my state and my range of true value'}
#I know the true value, his #signal and current price. I must scan all possible pairs of mid_unc
#Given the true value, I get probabilities of different pairs of mid_unc
#Given a pair of mid_unc and price, I look at his policy table
#policy_table = init_policy_table

def opp_in(state_table, policy_table, mid_unc):
  prop_opp_in_table = {num_signals[0]: {}, num_signals[1]: {}}
  for signal in num_signals:
    comp_signal = num_signals[~num_signals.index(signal)]
    for state in state_table[signal]:
      for true in range(state.mid - int(0.5 * state.unc), state.mid + int(0.5 * state.unc) + step_size, step_size):
        if true in range(min_value, max_value + step_size, step_size):
          p = 0
          for pair in mid_unc:
            mid_unc_prob = mid_unc_prob_table[comp_signal][Mid_unc_prob(num_signals=comp_signal, unc=pair[1], mid=pair[0], true_value=true)]
            indicator = policy_table[comp_signal][State(num_signals=comp_signal,unc=pair[1],mid=pair[0],clock_price=state.clock_price)]
            p += mid_unc_prob * indicator
          prop_opp_in_table[signal][Opp_in(num_signals=signal,unc=state.unc,mid=state.mid,clock_price=state.clock_price,true_value=true)] = p
  return prop_opp_in_table

In [41]:
#Return P(true|state)

def true_value_dist(state_table, prop_opp_in_table):
  true_value_dist_table = {num_signals[0]: {}, num_signals[1]: {}}
  for signal in num_signals:
    comp_signal = num_signals[~num_signals.index(signal)]
    for state in state_table[signal]:
      t = {}
      for true in range(state.mid - int(0.5 * state.unc), state.mid + int(0.5 * state.unc) + step_size, step_size):
        if true in range(min_value, max_value + step_size, step_size):
          if state.clock_price <= min_value:
            t[True_prob(signal,state.unc,state.mid,state.clock_price,true)] = mid_unc_prob_table[signal][Mid_unc_prob(signal,state.unc,state.mid,true)] * prob_true(true) * prop_opp_in_table[comp_signal][Opp_in(comp_signal,state.unc,state.mid,state.clock_price,true)]
          else:
            t[True_prob(signal,state.unc,state.mid,state.clock_price,true)] = mid_unc_prob_table[signal][Mid_unc_prob(signal,state.unc,state.mid,true)] * prob_true(true) * prop_opp_in_table[comp_signal][Opp_in(comp_signal,state.unc,state.mid,state.clock_price-step_size,true)]
      if not all(x == 0 for x in list(t.values())):
        t = {k: v/total for total in (sum(t.values()),) for k, v in t.items()}
      true_value_dist_table[signal].update(t)
  return true_value_dist_table

In [42]:
#Return P(decision|state) - decisions of competitors
#{'2': what a player with 2 signals think about the decision of the other player with 5 signals at a state}
#given a state, I know my decision based on my policy table, but I need to guess his decision
#I scan all possible true values with my true value distribution. Given these true values, I compute his probability of staying in given a true value and the state's price

def decision(state_table, prop_opp_in_table, true_value_dist_table):
  decision_table = {num_signals[0]: {}, num_signals[1]: {}}
  for signal in num_signals:
    comp_signal = num_signals[~num_signals.index(signal)]
    for state in state_table[signal]:
      d = 0
      for true in range(state.mid - int(0.5 * state.unc), state.mid + int(0.5 * state.unc) + step_size, step_size):
        if true in range(min_value, max_value + step_size, step_size):
          prob_dec = prop_opp_in_table[comp_signal][Opp_in(comp_signal,state.unc,state.mid,state.clock_price,true)]
          prob_tr = true_value_dist_table[signal][True_prob(state.num_signals,state.unc,state.mid,state.clock_price,true)]
          d += prob_dec * prob_tr
      decision_table[signal][state] = d
  return decision_table

In [43]:
start_time = datetime.now()
policy_table = init_policy_table
value_table = init_value_table

done = False
table = {}
# while not done:

for _ in range(10):  
  prop_opp_in_table = opp_in(state_table, init_policy_table, mid_unc)
  true_value_dist_table = true_value_dist(state_table, prop_opp_in_table)
  decision_table = decision(state_table,prop_opp_in_table,true_value_dist_table)

  for signal in num_signals:
    comp_signals = num_signals[~num_signals.index(signal)]
    delta = [0 for _ in range(len(state_table[signal]))]
    for i in range(len(state_table[signal])-1,-1,-1):
      state = state_table[signal][i]
      init_value = value_table[signal][state]

      exp_value = expected_value(state, true_value_dist_table)
      trans_matrix = transition_matrix(state,policy_table,decision_table)

      if state.clock_price < max_value:
        if (state.mid + 0.5 * state.unc) >= state.clock_price + step_size:
          val_out = trans_matrix[1][1] * exp_value * 0.5
          val_in = trans_matrix[1][0] * exp_value + trans_matrix[0][0] * value_table[signal][state_table[signal][i+1]]
          value_table[signal][state] = max(val_out, val_in)
          policy_table[signal][state] = 1 if val_in > val_out else 0
        else:
          val_out = val_in = exp_value * 0.5 * (decision_table[signal][state])
          value_table[signal][state] = val_in
          policy_table[signal][state] = 0
      else:
        value_table[signal][state] = 0
        policy_table[signal][state] = 0

      delta[i] = max(delta[i], abs(init_value - value_table[signal][state]))

    table[signal] = delta


  # theta = 0.5
  # if (all(x < theta for x in table[2])) and (all(y < theta for y in table[5])):
  #   done = True
  # else:
  #   done = False

end_time = datetime.now()
print(f'The execution time is {end_time - start_time}, from {start_time} to {end_time} ')

The execution time is 0:01:25.190409, from 2021-08-24 03:09:24.333496 to 2021-08-24 03:10:49.523905 


In [44]:
#Outcomes after 10 periods

unc = 20; mid = 60
for price in range(min_value - step_size, max_value + step_size, step_size):
  print(f'{State(2,unc,mid,price)} = {value_table[2][State(2,unc,mid,price)]}')

State(num_signals=2, unc=20, mid=60, clock_price=5) = 31.755564577059374
State(num_signals=2, unc=20, mid=60, clock_price=10) = 27.88149200656928
State(num_signals=2, unc=20, mid=60, clock_price=15) = 24.19507400766417
State(num_signals=2, unc=20, mid=60, clock_price=20) = 20.727586342274876
State(num_signals=2, unc=20, mid=60, clock_price=25) = 17.515517399320697
State(num_signals=2, unc=20, mid=60, clock_price=30) = 14.601436965874154
State(num_signals=2, unc=20, mid=60, clock_price=35) = 12.035009793519851
State(num_signals=2, unc=20, mid=60, clock_price=40) = 9.737483500219575
State(num_signals=2, unc=20, mid=60, clock_price=45) = 7.461380186150077
State(num_signals=2, unc=20, mid=60, clock_price=50) = 5.162409586819804
State(num_signals=2, unc=20, mid=60, clock_price=55) = 2.876742307182276
State(num_signals=2, unc=20, mid=60, clock_price=60) = 0.8666228187307912
State(num_signals=2, unc=20, mid=60, clock_price=65) = -0.0
State(num_signals=2, unc=20, mid=60, clock_price=70) = -1.1