In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import math

# Part 1: Outcomes from No-regret Learning in Games

## Set Up and Algorithm Definition

In [49]:
class EWAlg:
    def __init__(self, epsilon, k, h, myBids, myValue):
        self.weights = np.ones(k)
        self.payoffs = np.zeros(k)
        self.h = h
        self.k = k
        self.epsilon = epsilon
        self.sumWeights = np.sum(self.weights)
        self.probs = self.weights/self.sumWeights
        self.myValue = myValue
        self.myBids = myBids
        self.myTotalValue = 0

    def getBids(self):
        return self.myBids

    def getValue(self):
        return self.myValue

    def getTotalValue(self):
        return self.myTotalValue

    def getAction(self):
        j = np.random.choice(self.k, 1, p = self.probs)
        return j.item()
    
    def update(self, payoffs):
        for j in range(len(payoffs)):
            curPayoff = payoffs[j]
            self.payoffs[j] = self.payoffs[j] + curPayoff
            newWeight = (1+self.epsilon)**(self.payoffs[j]/self.h)
            self.weights[j] = newWeight
        self.sumWeights = np.sum(self.weights)
        if(self.epsilon > 1):
            self.weights = self.weights/self.sumWeights
            self.sumWeights = np.sum(self.weights)
        self.probs = self.weights/self.sumWeights
        return

    def generatePayoffs(self, winningBid, meWin):
        if meWin:
            self.myTotalValue += self.myValue - winningBid
        payoffs = np.zeros(len(self.myBids))
        for count, bid in enumerate(self.myBids):
            if bid >= winningBid:
                payoffs[count] = bid - self.myValue
            else:
                payoffs[count] = 0
        return payoffs
    
    def reset(self):
        self.weights = np.ones(self.k)
        self.payoffs = np.zeros(self.k)
        self.sumWeights = np.sum(self.weights)
        self.probs = self.weights/self.sumWeights
        self.myTotalValue = 0


In [50]:
class FirstPriceReserve:
    # bidders have to be ordered the same way every time
    # reserve given as a raw value
    def __init__(self, numBidders, reserve = 0):
        self.numBidders = numBidders
        self.totalPayoffs = np.zeros(self.numBidders)
        self.reserve = reserve
        
    def generate(self, bids):
        winningBid = 0
        winner = -1
        tiedBidders = []
        tied = False
        # check bids of all bidders
        for count, bid in enumerate(bids):
            if bid > winningBid:
                winningBid = bid
                winner = count
            elif (bid == winningBid) and (winner != -1):
                tiedBidders.append(count)
                tied = True
        if tied : winner = random.choice(tiedBidders)
        # see if winning bid is greater than reserve price
        if (winningBid < self.reserve): 
            winningBid = self.reserve
            winner = -1
        return winningBid, winner

In [72]:
numPlayers = 2
epsilons = [2,5]
h = 5
k = 100
stepSize = 1/k * (np.log(h))
reserveFrac = 0 #between 0 and 1
reserveValue = reserveFrac * h

def setUpPlayers() :
    # set up our players
    players = []
    for count in range(numPlayers):
    # pick a distribution and numPlayers values from it
        playerValue = random.uniform(0,h)
        # create possible bids using geometric discretization
        playerBids = []
        for j in range(k):
            playerBids.append(playerValue - (1 + stepSize)**(j+1))
        # create player
        player = EWAlg(epsilons[count], k, h, playerBids, playerValue, )
        players.append(player)
    return players

def conductAuction(players, auction, n = 100):
    # do the action n times
    allBids = []
    allWinners = []
    for i in range(n):
        bids = []
        # generate bids
        for count, player in enumerate(players):
            bids.append(player.getAction())
        # conduct auction
        winningBid, winner = auction.generate(bids)
        # update payoffs
        for count, player in enumerate(players):
            if count == winner:
                payoffs = player.generatePayoffs(winningBid, True)
            else:
                payoffs = player.generatePayoffs(winningBid, False)
            player.update(payoffs)
        allBids.append(bids)
        allWinners.append(winner)
    return allBids, allWinners

In [75]:
stepSize = 1/k * (np.log(h))
print(stepSize)
playerBids = []
playerValue = random.uniform(0,h)
print(playerValue)
for j in range(k):
    playerBids.append(playerValue - (1 + stepSize)**(j + 1))
print(playerBids)

0.016094379124341
1.2991357539802573
[0.2830413748559162, 0.2666879666921771, 0.2500713605774756, 0.23318732050420432, 0.21603154228884325, 0.19859965247471112, 0.18088720721705642, 0.16288969115020602, 0.14460251623647946, 0.12602102059657794, 0.10714046732115068, 0.08795604326323136, 0.06846285781124184, 0.04865594164224696, 0.028530245455144154, 0.008080638683464647, -0.012698091812541668, -0.033811243074873376, -0.05526419739813071, -0.07706242370160354, -0.09921147892344284, -0.12171700943726815, -0.14458475249157754, -0.16782053767232097, -0.1914302883890151, -0.21542002338477473, -0.23979585827064875, -0.2645640070846482, -0.2897307838758685, -0.3153026043141043, -0.3412859873253724, -0.36768755675375697, -0.3945140430499996, -0.42177228498726804, -0.44946923140453765, -0.47761194297803344, -0.5062075940211801, -0.5352634743135225, -0.5647869909590812, -0.5947856702746175, -0.6252671597082875, -0.6562392297891775, -0.6877097761082152, -0.7196868213309615, -0.7521785172427988, -0

In [56]:
MCBound = 500
n = 100
# set up players
players = setUpPlayers()
# set up the auction 
auction = FirstPriceReserve(2, reserveValue)

values = []
totalWinnings = [[], []]
for player in players:
    values.append(player.getValue())

MCBids = []
MCWinners = []
for i in range(1):
    # conduct auction
    allBids, allWinners = conductAuction(players, auction, n)
    for count, player in enumerate(players):
        totalWinnings[count].append(player.getTotalValue())
    # reset players -> they keep same learning rate and values
    for player in players:
        player.reset()
    MCBids.append(allBids)
    MCWinners.append(allWinners)

    
avgBids = np.array(MCBids)
avgBids = np.mean(avgBids, 0)
totalWinnings = np.mean(np.array(totalWinnings), 1)

print(values)
print(totalWinnings)
# print(avgBids)

[-0.3199825126459085, -0.3199825126459085, -0.3199825126459085, -0.3199825126459085, -0.3199825126459085, -0.3199825126459085, -0.3199825126459085, -0.3199825126459085, -0.3199825126459085, -0.3199825126459085]
[-0.3336099346955608, -0.3336099346955608, -0.3336099346955608, -0.3336099346955608, -0.3336099346955608, -0.3336099346955608, -0.3336099346955608, -0.3336099346955608, -0.3336099346955608, -0.3336099346955608]
[0.6800174873540915, 0.6663900653044392]
[-303.95907317 -283.67966693]


In [None]:
print(values)
print(totalWinnings)
print(MCWinners)

In [3]:
def MonteCarlo(numTrials, payoffGenerator, epsilon, k, h, n):
    avgFinalPayoff = 0
    avgRegretPerRound = [[] for i in range(numTrials)]
    for trial in range(numTrials):
        alg = EWAlg(epsilon, k, h)
        finalPayoff = 0
        actionPayoffs = np.zeros(k)
        generator = payoffGenerator(k)
        regretPerRound = np.zeros(n)
        for i in range(n):
            payoffs = generator.generate()
            j = alg.getAction()
            myPayoff = payoffs[j]
            actionPayoffs += payoffs
            alg.update(payoffs)
            finalPayoff += myPayoff
            OPT = max(actionPayoffs)
            regret = (OPT - finalPayoff).item() / (i+1)
            regretPerRound[i] = regret
        avgFinalPayoff += finalPayoff
        avgRegretPerRound[trial] = regretPerRound
    return avgFinalPayoff/numTrials, np.mean(avgRegretPerRound, axis=0)

In [3]:
def MonteCarloTrackActions(numTrials, payoffGenerator, epsilon, k, h, n):
    avgFinalPayoff = 0
    avgRegretPerRound = [[] for i in range(numTrials)]
    actionTrial = []
    for trial in range(numTrials):
        alg = EWAlg(epsilon, k, h)
        finalPayoff = 0
        actionPayoffs = np.zeros(k)
        generator = payoffGenerator(k)
        regretPerRound = np.zeros(n)
        actions = np.zeros(n)
        for i in range(n):
            payoffs = generator.generate()
            j = alg.getAction()
            actions[i] = j
            myPayoff = payoffs[j]
            actionPayoffs += payoffs
            alg.update(payoffs)
            finalPayoff += myPayoff
            OPT = max(actionPayoffs)
            regret = (OPT - finalPayoff).item() / (i+1)
            regretPerRound[i] = regret
        actionTrial.append(actions)
        avgFinalPayoff += finalPayoff
        avgRegretPerRound[trial] = regretPerRound
    return avgFinalPayoff/numTrials, np.mean(avgRegretPerRound, axis=0), np.array(actionTrial)

## Adversarial Fair Payoffs

In each round i:

Draw a payoff x ~ U[0,1] (i.e., from the uniform distribution on interval [0,1])

Assign this payoff to the action j* that has the smallest total payoff so far, i.e., j* = argminj Vji-1 where Vji = Σir=1 vji. 
(All other actions get 0 payoff in round i.)