In [4]:
import numpy
import pandas
from matplotlib import pyplot
from scipy.stats import norm, binom
import math
import json

In [17]:
class UpperConfidenceBoundPolicy():
    def __init__(self):
        self.ads = [
            {
                'name': 'A',
                'n_suc': 0,
                'n':1,
                'p': 0
            },
            {
                'name': 'B',
                'n_suc': 0,
                'n':1,
                'p': 0
            },
            {
                'name': 'C',
                'n_suc': 0,
                'n':1,
                'p': 0
            },
            
        ]
        self.total_n = 1
        
    def get_action(self):
        # return numpy.argmax([(ad['p'] + numpy.sqrt(2 * numpy.log10(self.total_n) / ad['n'])) for ad in self.ads])
        return numpy.argmax([(ad['p'] + (numpy.log10(self.total_n) / ad['n'])) for ad in self.ads])

    def update(self, action, reward):
        self.ads[action]['n_suc'] += reward
        self.ads[action]['n'] += 1
        self.ads[action]['p'] = self.ads[action]['n_suc'] / self.ads[action]['n']
        self.total_n += 1

class Agent():
    def __init__(self, policy):
        self.policy = policy

    def get_action(self):
        return self.policy.get_action()

    def update(self, action, reward):
        self.policy.update(action, reward)

class Environment():
    def __init__(self):
        self.hidden_probs = [8/1000, 4/1000, 1/10000]

    def perform(self, action):
        return int(numpy.random.rand() < self.hidden_probs[action])

class Experiment():
    def __init__(self, environment, agent):
        self.environment = environment
        self.agent = agent
        
    def run_trails(self, n):
        for ni in range(n):
            action = self.agent.get_action()
            reward = self.environment.perform(action)
            self.agent.update(action, reward)

    def print_policy(self):
        policy = self.agent.policy
        print('total n', policy.total_n)
        print(json.dumps(policy.ads, indent=2))

In [18]:
policy = UpperConfidenceBoundPolicy()
agent = Agent(policy)
environment = Environment()
experiment = Experiment(environment, agent)

In [19]:
for n_sub in [10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000, 10**5]:
    print('n_trails', n_sub)
    experiment.run_trails(n_sub)
    experiment.print_policy()
    input("Press Enter to continue...")

n_trails 10
total n 11
[
  {
    "name": "A",
    "n_suc": 0,
    "n": 5,
    "p": 0.0
  },
  {
    "name": "B",
    "n_suc": 0,
    "n": 4,
    "p": 0.0
  },
  {
    "name": "C",
    "n_suc": 0,
    "n": 4,
    "p": 0.0
  }
]


Press Enter to continue... 


n_trails 20
total n 31
[
  {
    "name": "A",
    "n_suc": 0,
    "n": 11,
    "p": 0.0
  },
  {
    "name": "B",
    "n_suc": 0,
    "n": 11,
    "p": 0.0
  },
  {
    "name": "C",
    "n_suc": 0,
    "n": 11,
    "p": 0.0
  }
]


Press Enter to continue... 


n_trails 50
total n 81
[
  {
    "name": "A",
    "n_suc": 0,
    "n": 28,
    "p": 0.0
  },
  {
    "name": "B",
    "n_suc": 0,
    "n": 28,
    "p": 0.0
  },
  {
    "name": "C",
    "n_suc": 0,
    "n": 27,
    "p": 0.0
  }
]


Press Enter to continue... 


n_trails 100
total n 181
[
  {
    "name": "A",
    "n_suc": 0,
    "n": 57,
    "p": 0.0
  },
  {
    "name": "B",
    "n_suc": 1,
    "n": 70,
    "p": 0.014285714285714285
  },
  {
    "name": "C",
    "n_suc": 0,
    "n": 56,
    "p": 0.0
  }
]


Press Enter to continue... 


n_trails 200
total n 381
[
  {
    "name": "A",
    "n_suc": 0,
    "n": 92,
    "p": 0.0
  },
  {
    "name": "B",
    "n_suc": 2,
    "n": 163,
    "p": 0.012269938650306749
  },
  {
    "name": "C",
    "n_suc": 1,
    "n": 128,
    "p": 0.0078125
  }
]


Press Enter to continue... 


n_trails 500
total n 881
[
  {
    "name": "A",
    "n_suc": 2,
    "n": 316,
    "p": 0.006329113924050633
  },
  {
    "name": "B",
    "n_suc": 2,
    "n": 315,
    "p": 0.006349206349206349
  },
  {
    "name": "C",
    "n_suc": 1,
    "n": 252,
    "p": 0.003968253968253968
  }
]


Press Enter to continue... 


n_trails 1000
total n 1881
[
  {
    "name": "A",
    "n_suc": 6,
    "n": 726,
    "p": 0.008264462809917356
  },
  {
    "name": "B",
    "n_suc": 5,
    "n": 765,
    "p": 0.006535947712418301
  },
  {
    "name": "C",
    "n_suc": 1,
    "n": 392,
    "p": 0.002551020408163265
  }
]


Press Enter to continue... 


n_trails 2000
total n 3881
[
  {
    "name": "A",
    "n_suc": 10,
    "n": 1660,
    "p": 0.006024096385542169
  },
  {
    "name": "B",
    "n_suc": 10,
    "n": 1662,
    "p": 0.006016847172081829
  },
  {
    "name": "C",
    "n_suc": 1,
    "n": 561,
    "p": 0.0017825311942959
  }
]


Press Enter to continue... 


n_trails 5000
total n 8881
[
  {
    "name": "A",
    "n_suc": 45,
    "n": 5298,
    "p": 0.008493771234428085
  },
  {
    "name": "B",
    "n_suc": 16,
    "n": 2885,
    "p": 0.005545927209705373
  },
  {
    "name": "C",
    "n_suc": 1,
    "n": 700,
    "p": 0.0014285714285714286
  }
]


Press Enter to continue... 


n_trails 10000
total n 18881
[
  {
    "name": "A",
    "n_suc": 125,
    "n": 15298,
    "p": 0.008171002745456922
  },
  {
    "name": "B",
    "n_suc": 16,
    "n": 2885,
    "p": 0.005545927209705373
  },
  {
    "name": "C",
    "n_suc": 1,
    "n": 700,
    "p": 0.0014285714285714286
  }
]


Press Enter to continue... 


n_trails 100000
total n 118881
[
  {
    "name": "A",
    "n_suc": 908,
    "n": 115223,
    "p": 0.00788037110646312
  },
  {
    "name": "B",
    "n_suc": 16,
    "n": 2885,
    "p": 0.005545927209705373
  },
  {
    "name": "C",
    "n_suc": 1,
    "n": 775,
    "p": 0.0012903225806451613
  }
]


Press Enter to continue... 
