In [1]:
from MADRLIV import plot_singlepref
from MADRLIV import unique_nontrival_winner

from MADRLIV import util_from_ranks
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

In [2]:
mems = [False] #types of agent memory to record

agent_types = ['tabular'] #types of agent

N_pref=2500

DP=False
if N_pref>1:
    DP = True

#produce the preference profile
C = 3 #must be 3
V = 7

CV = (C,V)


alpha = 0.1
nt = 5
epslen = 500

In [3]:
vote_histories = {}

In [4]:
for tests in tqdm(range(N_pref)):
    opt,vr, metric_results,plurality_results = unique_nontrival_winner(CV[0],CV[1],'borda',restrict=True)
    pp = [opt,vr]

    results_mix = plot_singlepref(fold=None,mems=mems,agent_types=agent_types,pref_profile=pp,agent_alpha=alpha,N_tests=nt,percent=10,metric='borda_score',eps_len=epslen,updateinterval=2,disablep=DP)

    for k in results_mix.keys():
        reslist = results_mix[k]
        scraped = [result[1] for result in reslist]
        results_mix[k] = scraped

    if vote_histories == {}:
        vote_histories = {key : [] for key in results_mix.keys()}

    for k in results_mix.keys():
        vote_history = [(v.vote_history,v.vote_winners,opt,vr) for v in results_mix[k]]
        vh = vote_histories[k]
        vh = vote_history + vh
        vote_histories[k] = vh

HBox(children=(FloatProgress(value=0.0, max=2500.0), HTML(value='')))




In [2]:
#%store vote_histories
#%store -r vote_histories
#vote_histories

no stored variable or alias vote_histories


In [None]:
#leave alone

In [None]:
k = (False, 'tabular')
hist = vote_histories[k]

In [None]:
#“A-ratio,” which is the fraction of instances where action A was played out of all instances where it was available.
#WHERE IT WAS AVAILABLE
#for the 3 candidate situation:

#TRT - Truthful action - voting for candidate with utility 1.00
def TRT(utility):
    return float(utility==1.00)

#CMP - Compromise action - voting for utility 0.5 when utility 1.00 (most preferred option) is ranked last in poll (is losing consistently over a large enough past interval??)


#LB - Leader bias - voting for the leader of the poll = (consistent winner over a large enough past interval) that is either 0.5 or 0.25 utility
def LB(utility,poll):
    if utility < 1.00:
        if poll[utility] == max(poll.values()):
            return True
    return False

#DOM - Dominated actions - another action yields a higher expected utility "under very weak assumptions" = voting for the worst possible candidate (i.e. the last ranked option)??
#need to investigate - does 'no rational movitation' ONLY mean voting for last ranked option? Or could voting for 2nd option be DOMinated (probably no under 'very weak assumptions')

def DOM(utility,poll):
    if utility == 0.25:
        #action is dominated if you choose worst possible option
        return True
    else:
        if utility == 0.5:
            #if you choose 2nd best option when maximised score is ahead in the polls
            score_choice = poll[0.5]
            score_max = poll[1.00]
            if score_max>=score_choice:
                return True
    return False

def CMP(utility,poll):
    if utility == 0.5:
        score_choice = poll[0.5]
        score_max = poll[1.00]
        if score_choice>score_max:
            return True
    return False


#note that DOM and LB can go together, as can CMP and Leader bias

#so we have DLB (dominated leader bias) and CLB (compromise leader bias) for if you're voting for the poll leader (consistent leader) as a dominated or a compromise action

#NEED - history of vote winners to estimate LB. 'poll' has no direct equivalent

#In the second game a voter directly
#observes the current votes of her peers, but does not know
#how they will vote eventually at the final round (or when
#will the final round arrive).

#suggests the A ratios are based on past votes
"""
Since only one voter may change her vote at each step
at, at+1 differ by at most one entry
"""

"""
we computed and analyzed A-ratios in the same way as
we did for one-shot voting, except that instead of a poll we used the current voting profile
At. More specifically, we counted each step by player i as a separate decision, classifying it
into one of six scenarios as in Table 3 and checking the action classes from A to which it applies

"""

#we also want to determine move types

"""
Following [18], we denote a compromise move as a change
in vote to a less-preferred candidate, and an opportunity
move as a change in vote to a more-preferred candidate. We
denote a stay move as no change in voting compared to the
previous round.
"""
def determine_move_type_from_utility_of_vote_assigned(utility,utility_last,move):
    #takes in utility score of the chosen option for this and last round - to see if agent moved or stayed and if they compromised or not
    if utility_last == None:
        #first move
        out= 0
    elif utility == utility_last:
        #stay move
        out= 0
    elif utility > utility_last:
        #opportunity move
        out= 1
    elif utility < utility_last:
        #compromise move
        out= -1
    elif (utility,utility_last) == (None,None):
        #fails
        raise InterruptedError
    
    if move == "opportunity":
        return(out==1)
    elif move == "stay":
        return(out==0)
    elif move == "compromise":
        return(out==-1)
    else:
        print(move)
        print("Invalid move/Atype")
        raise InterruptedError
    

def determine_A_type_from_utility_of_vote_assigned(utility,utility_last,winner_history, util_of_vote,A_type,cutoff=10):
    if len(winner_history)>cutoff:
        reward_history = [util_of_vote[w] for w in winner_history]
        reward_history = reward_history[int(-1*cutoff):]
        poll = {idv : 0 for idv in [1.00,0.5,0.25]}
        for i in reward_history:
            poll[i] = poll.get(i, 0) + 1

        #print()
        #print(utility,poll)
        #print("TRT:{}\nDOM:{}\nLB:{}\nCMP:{}".format(float(TRT(utility)),float(DOM(utility,poll)),float(LB(utility,poll)),float(CMP(utility,poll))))
        #print()
      

        if A_type == "TRT":
            return float(TRT(utility))
        elif A_type == "DOM":
            return float(DOM(utility,poll))
        elif A_type == "CMP":
            return float(CMP(utility,poll))
        elif A_type == "LB":
            return float(LB(utility,poll))
        elif A_type == "DLB":
            return float(DOM(utility,poll) and LB(utility,poll))
        elif A_type == "CLB":
            return float(LB(utility,poll) and CMP(utility,poll))
        else:
            return determine_move_type_from_utility_of_vote_assigned(utility,utility_last,A_type)
    else:
        return False

def convert_to_numbers(winlist,options):
    return np.array([opt.index(i) for i in winlist])

In [None]:
AP = 50
def moving_average(a, n=1):
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

In [None]:
sample = hist

In [None]:
def analyse_action_type(history,A_type,poll_cutoff=10):
    final_results = [np.zeros(epslen+1) for v in range(len(vr))]

    for history_comps in tqdm(history,desc=A_type):
        run,options_round,vr_round = history_comps[0],history_comps[2],history_comps[3]
        entire_winner_history = convert_to_numbers(history_comps[1],options=options_round)
        
        agent_vote_utilities = [np.zeros(epslen+1) for v in range(len(vr_round))]
        
        for eps_number,step in enumerate(run):
            if eps_number>1:
                winner_history = entire_winner_history[:eps_number-1]
            else:
                winner_history = []

            for agent_num,vote in enumerate(step):
                agents_prefs = vr_round[agent_num]
                #need to store specific agent rank not overall!!
                util_of_vote = util_from_ranks(rank_list=agents_prefs,options=options_round,use_exp=True)
                
                utility = util_of_vote[vote]
                
                #store for this run agent utility and get last utility
                v=agent_vote_utilities[agent_num]
                if eps_number == 0:
                    utility_last = None
                else:
                    utility_last = v[eps_number-1]
                v[eps_number] = utility
                agent_vote_utilities[agent_num] = v
                
                
                #record for the whole thing final a ratio type
                individual = final_results[agent_num]

                #takes in utility of this last vote, the history of which option numbers won and the preference profile over numbers (util of vote) to determine agent behaviour
                if A_type == "reward":
                    thiswin = entire_winner_history[eps_number]
                    A = util_of_vote[thiswin]
                else:
                    A = determine_A_type_from_utility_of_vote_assigned(utility,utility_last,winner_history, util_of_vote,A_type,poll_cutoff)

                individual[eps_number] += A    
    final_results = [a/len(history) for a in final_results]
    return final_results

In [None]:
"""
plt.figure(figsize=(15,15))
opp = analyse_action_type(sample,'opportunity')
stay = analyse_action_type(sample,'stay')
comp = analyse_action_type(sample,'compromise')

def ratio_for_agent(opp,stay,comp,number,start,end):
    opp,stay,comp = opp[number],stay[number],comp[number]
    start = int(start*len(opp))
    end = int(end*len(opp)) - 1
    opp = round(np.mean(opp[start:end]),4)
    stay = round(np.mean(stay[start:end]),4)
    comp = round(np.mean(comp[start:end]),4)

    print("{} - Opp: {}, stay: {}, comp: {}...  O/C: {}".format(number,opp,stay,comp,round(opp/comp,2)))

ags = len(opp)

s = 0.9
e = 1.0
print(s,e)

for agent in range(ags):
    ratio_for_agent(opp=opp,stay=stay,comp=comp,number=agent,start=s,end=e)

s = 0.0
e = 0.1

print(s,e)

for agent in range(ags):
    ratio_for_agent(opp=opp,stay=stay,comp=comp,number=agent,start=s,end=e)

#ARE THESE RATES JUST DETERMINED BY EPS??
"""

In [None]:
A_starts = {}
A_ends = {}
testfrac = 0.1
A_list = ['reward','TRT','LB','DLB','CMP','CLB','DOM']
POLC=20

for A in A_list:
    A_result = analyse_action_type(sample,A,poll_cutoff=POLC)
    interval = int(len(A_result[0])*testfrac) + POLC
    minterval = int(-1*len(A_result[0])*testfrac)
    agent_a_start = [np.mean(agent_a[POLC:interval]) for agent_a in A_result]
    agent_a_end = [np.mean(agent_a[minterval:]) for agent_a in A_result]
    A_starts[A] = tuple(agent_a_start)
    A_ends[A] = tuple(agent_a_end)


In [None]:
AE = []
for k in A_list:
    AE.append(np.mean(A_ends[k]))

AS = []
for k in A_list:
    AS.append(np.mean(A_starts[k]))

x = np.arange(len(AS))

plt.figure(figsize=(10,10))
plt.bar(x,AS,tick_label=A_list,width=0.2,label='RL/simultaneous, {}-{}'.format(POLC,POLC+interval))
plt.bar(x+0.2,AE,tick_label=A_list,width=0.2,label='RL/simultaneous, {}-{}'.format(minterval+epslen,epslen))
paper_vals = [0,0.7,0.48,0.12,0.48,0.72,0.02]
plt.bar(x+0.4,paper_vals,width=0.2,label='Humans/sequential, 0-5/10')
plt.legend()
plt.show()

print("RL Agents")
print("Modelling current seq as poll with cutoff = {}".format(POLC))
print("candidates: {}\nvoters: {}\nnumber of iterations: {}\nnumber of preference profiles: {}\nnumber of repeats per profile: {}\ntotal number of runs: {}\ntotal rounds: {}\ntotal data points (runs * voters * eps length): {}".format(\
    CV[0],CV[1],interval,N_pref,nt,int(N_pref*nt),int(N_pref*nt*interval),int(N_pref*nt*V*interval)))

print("\n\nHumans - Meir 2020")
print("candidates: {}\nvoters: {}\nnumber of iterations: {}\nnumber of preference profiles: {}\nnumber of repeats per profile: {}".format(\
    3,7,"5-10",6,"?"))

#began with agents voting truthfully
#voting was sequential - 'Actions_now', not 'poll' estimate
#no tiebreak
#linear rewards
#terminated on convergence

# Subjects could play up to 6 games in a sequence, each time with a different preference profile
#2 with no cw, 2 with cw=pw, 2 with cw not plurality

In [None]:
print(POLC,interval)
A_starts

In [None]:
%store vote_histories