In [125]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import math
from scipy.stats import bernoulli, binom

# Part 1: Empirical Learning Rates for the Exponential Weights Algorithm

In [122]:
class EWAlg:
    def __init__(self, epsilon, k, h):
        self.weights = np.ones(k)
        self.payoffs = np.zeros(k)
        self.h = h
        self.k = k
        self.epsilon = epsilon
        self.sumWeights = np.sum(self.weights)
        self.probs = self.weights/self.sumWeights

    def getAction(self):
        j = np.random.choice(self.k, 1, p = self.probs)
        return j.item()
    
    def update(self, payoffs):
        for j in range(len(payoffs)):
            curPayoff = payoffs[j]
            self.payoffs[j] = self.payoffs[j] + curPayoff
            newWeight = (1+self.epsilon)**(self.payoffs[j]/self.h)
            self.weights[j] = newWeight
        self.sumWeights = np.sum(self.weights)
        self.probs = self.weights/self.sumWeights
        return

## Adversarial Fair Playoffs

In each round i:

Draw a payoff x ~ U[0,1] (i.e., from the uniform distribution on interval [0,1])

Assign this payoff to the action j* that has the smallest total payoff so far, i.e., j* = argminj Vji-1 where Vji = Σir=1 vji. 
(All other actions get 0 payoff in round i.)

In [124]:
h = 1 # fixed
# hyperparameters
k = 5
n = 100
epsilons = [0, 0.1, 0.1517, 0.2, 0.4, 0.6, 0.9, 1.5, 4, 100] # to be studied
monteCarloBound = 1000

avgPayoffs = []
avgRegrets = []
for epsilon in epsilons:
    sumPayoff = 0
    sumRegret = 0   
    for i in range(monteCarloBound):
        alg = EWAlg(epsilon, k, h)
        totalPayoffs = np.zeros(k)
        myPayoff = 0
        for i in range(n):
            # generate adversarial payoffs
            adversarialPayoffs = np.zeros(k)
            x = np.random.rand(1)
            least = np.argmin(totalPayoffs)
            adversarialPayoffs[least] = x
            totalPayoffs = totalPayoffs + adversarialPayoffs
            j = alg.getAction()
            myPayoff += adversarialPayoffs[j]
            alg.update(adversarialPayoffs)

        OPT = max(totalPayoffs)
        regret = (OPT - myPayoff).item() / n
        sumPayoff += myPayoff
        sumRegret += regret
    avgPayoffs.append(sumPayoff/n)
    avgRegrets.append(sumRegret/n)

print(epsilons)
print(avgPayoffs)
print(avgRegrets)

[0, 0.1, 0.1517, 0.2, 0.4, 0.6, 0.9, 1.5, 4, 100]
[99.78362498821083, 97.06588871794594, 96.69334793772201, 94.601482178476, 90.54497551040038, 87.97257700243796, 84.18906211784811, 76.36223572753096, 61.82921488266603, 21.370843055415985]
[0.03536252535241939, 0.06369790588813184, 0.06447168656262929, 0.08830487969886638, 0.12786208941140625, 0.15173943718852717, 0.19118807634729307, 0.2702902367503487, 0.4148605477995773, 0.8202424557385144]


In [143]:
h = 1 # fixed
# hyperparameters
k = 10
n = 100
epsilons = [0, 0.1, 0.1517, 0.2, 0.4, 0.6, 0.9, 1.5, 4, 100] # to be studied
monteCarloBound = 1000

avgPayoffs = []
avgRegrets = []
for epsilon in epsilons:
    sumPayoff = 0
    sumRegret = 0   
    for i in range(monteCarloBound):
        payoffProbs = np.random.uniform(0, 0.5, k)
        bernouliPayoffs = [bernoulli(p) for p in payoffProbs]
        alg = EWAlg(epsilon, k, h)
        totalPayoffs = np.zeros(k)
        myPayoff = 0
        for i in range(n):
            # generate bernouli payoffs
            iPayoffs = np.array([b.rvs(1) for b in bernouliPayoffs]).flatten()
            totalPayoffs = totalPayoffs + iPayoffs
            j = alg.getAction()
            myPayoff += iPayoffs[j]
            alg.update(iPayoffs)

        OPT = max(totalPayoffs)
        regret = (OPT - myPayoff).item() / n
        sumPayoff += myPayoff
        sumRegret += regret
    avgPayoffs.append(sumPayoff/n)
    avgRegrets.append(sumRegret/n)

print(epsilons)
print(avgPayoffs)
print(avgRegrets)

[0, 0.1, 0.1517, 0.2, 0.4, 0.6, 0.9, 1.5, 4, 100]
[247.03, 325.97, 352.0, 363.78, 392.33, 403.64, 408.36, 415.5, 420.89, 427.35]
[2.260699999999998, 1.4889000000000012, 1.226599999999999, 1.0959999999999988, 0.8125000000000004, 0.6921000000000005, 0.6348000000000009, 0.5774000000000002, 0.5265000000000001, 0.4776000000000004]
