# Imports

In [162]:
from itertools import permutations
from collections import Counter
from copy import deepcopy
import random

# Generating Profiles

In [163]:
# generate all possible partial preference profiles for one agent
def generate_preferences(house_amt):
    return list(permutations(range(house_amt)))

# generate partial preference profile
def generate_preference(house_amt):
    return random.sample(range(house_amt), house_amt)

# generate preference profile
def generate_profile(agent_amt, house_amt):
    profile = []
    for agent in range(agent_amt):
        profile.append(generate_preference(house_amt))
    return profile

# generates a hash from some profile, useful for preventing non-halting best-response learning
def hash_profile(profile):
    return hash(tuple(tuple(agent) for agent in profile))

# Probabilistic Serial Rule

In [164]:
# calculate probability matrix using the probabilistic serial rule
def probability_matrix(profile):
    agent_amt = len(profile)
    house_amt = len(profile[0])
    # init data of how much of each house is remaining and who ate how much
    houses = [1] * house_amt
    agents = [[0] * house_amt for _ in range(agent_amt)]

    # probabilistic serial rule
    while max(houses) != 0:
        # for each player, find out which house they're eating
        houses_selected = [next(i for i in preference if houses[i] != 0) for preference in profile]

        # how long does it take to eat each house
        house_counts = Counter(houses_selected)
        total_rate_eaten = [house_counts.get(i, 0) for i in range(house_amt)]
        time_to_eat = [None if total_rate_eaten[i] == 0 else houses[i] / total_rate_eaten[i] # Condition to avoid division by zero
                 for i in range(house_amt)]

        # take the lowest time to eat and step forward that amount of time
        t = min(t for t in time_to_eat if t is not None) # poor second condition to avoid floating point errors
        for agent in range(agent_amt):
            house = houses_selected[agent]
            houses[house] -= t
            agents[agent][house] += t
        houses = [house if house > 1e-9 else 0 for house in houses] # 
    return agents

# calculate expected utility for one player from applying Borda scores of a profile to a probability matrix
# Borda score starts at 0
def expected_utility(matrix, profile, agent):
    agent_amt = len(profile)
    house_amt = len(profile[agent])
    utility = 0
    for house in range(house_amt):
        house_value = house_amt - 1 - profile[agent].index(house)
        utility += matrix[agent][house] * house_value
    return utility

# calculate expected utility for all players, see def expected_utility for detail
def expected_utilities(matrix, profile):
    agent_amt = len(profile)
    return [expected_utility(matrix, profile, agent) for agent in range(agent_amt)]

# Best-Response Learning

In [165]:
# Given a certain true profile, and a certain player, find a best response for that player and its utility
def find_best_response(true_profile, profile, agent):
    house_amt = len(profile[agent])
    profile = deepcopy(profile)
    # Prioritize the current action, if supplied
    best_eu = -1
    best_a = profile[agent]
    if profile[agent] != None:
        matrix = probability_matrix(profile)
        best_eu = expected_utility(matrix, true_profile, agent)

    # Loop over the agent's action space
    for action in generate_preferences(house_amt):
        profile[agent] = action
        matrix = probability_matrix(profile)
        eu = expected_utility(matrix, true_profile, agent)
        if eu > best_eu:
            best_eu = eu
            best_a = action
    return [list(best_a), best_eu]

# Performs best-response learning, returning the PNE and the amount of steps to reach it, or None and the amount of steps to reach a loop
def best_response_learning(true_profile, profile, visited=None, depth = 0):
    # print("Current profile: ", profile)
    if visited is None:
        visited = []
    # Loop detected, no PNE found here
    h = hash_profile(profile)
    if h in visited:
        return [None, depth]
    
    visited.append(h)
    agent_amt = len(profile)
    house_amt = len(profile[0])
    
    matrix = probability_matrix(profile)
    eu = expected_utilities(matrix, true_profile)
    # print("Current EUs: ", eu)
    for agent in range(agent_amt):
        br, br_eu = find_best_response(true_profile, profile, agent)
        if br_eu > eu[agent]:
            # print(agent, br_eu, eu)
            profile = deepcopy(profile)
            profile[agent] = br
            return best_response_learning(true_profile, profile, visited, depth + 1)
    return [profile, depth]
        

# Experiments

In [None]:
# TODO