In [60]:
import numpy as np
from scipy.optimize import minimize
from scipy.stats import beta

class Simulated:
    def __init__(self, a0, b0, q):
        self.a0 = a0
        self.b0 = b0
        self.q = q
        self.p = 0.0
        self.pickP()
        
    def pickP(self):
        # Pick p from the beta distribution
        self.p = np.random.beta(self.a0, self.b0)      
        
    def draw(self):
        if np.random.uniform(0, 1) < self.p:
            return True
        else:
            return False      
        
    def turn(self):
        if np.random.uniform(0, 1) < self.q:
            self.pickP()
        return self.draw()

np.random.seed(42)
ag = Simulated(0.1, 0.5, 0.1)
n = 20
outcomes = []
prob = []
for i in range(n):
    outcomes.append(ag.turn())
    ag = Simulated(0.1, 0.5, 0.1)

class DBM:
    def __init__(self, a0, b0, gamma, outcomes):
        self.a0 = a0
        self.b0 = b0
        self.gamma = gamma
        self.outcomes = outcomes
        self.num = len(outcomes)
        self.tb = np.zeros((self.num + 1, self.num + 1))
        self.tb[0, 0] = 1
    
        
    def update(self):
        for o in self.outcomes:
            tb_new = self.tb.copy()
            for n in range(self.tb.shape[0]):
                for m in range(self.tb.shape[1]):
                    if n == 0 and m == 0:
                        self.tb[n, m] = self.tb[0, 0] + self.gamma
                    else:
                        self.tb[n, m] = self.tb[n, m] * (1.0 - self.gamma)

            for n in range(self.tb.shape[0]):
                for m in range(self.tb.shape[1]):
                    # Updating each lever
                    if o: 
                        if m > 0:
                            a = self.a0 + m-1
                            b = self.b0 + n
                            tb_new[n, m] = self.tb[n, m-1] * (a/(a+b))
                        else:
                            tb_new[n, m] = 0.0 
                    else:
                        if n > 0:
                            a = self.a0 + m
                            b = self.b0 + n-1
                            tb_new[n, m] = self.tb[n-1, m] * (1 - (a/(a+b)))
                        else:
                            tb_new[n, m] = 0.0
                    # if o: 
                    #     if m > 0:
                    #         tb_new[n, m] = self.tb[n, m-1] * beta.mean(self.a0 + m-1, self.b0 + n)
                    #     else:
                    #         tb_new[n, m] = 0.0 
                    # else:
                    #     if n > 0:
                    #         tb_new[n, m] = self.tb[n-1, m] * (1 - beta.mean(self.a0 + m, self.b0 + n-1))
                    #     else:
                    #         tb_new[n, m] = 0.0

            self.z = np.sum(tb_new)
            for n in range(self.tb.shape[0]):
                for m in range(self.tb.shape[1]):
                    self.tb[n, m] = tb_new[n, m] / self.z    


# class DBM:
#     def __init__(self, a0, b0, gamma, outcomes):
#         self.a0 = a0
#         self.b0 = b0
#         self.gamma = gamma
#         self.outcomes = outcomes
#         self.num = len(outcomes)
#         self.tb = np.zeros((self.num + 1, self.num + 1))
#         self.tb[0, 0] = 1
        
#     def update(self):
#         for o in self.outcomes:
#             tb_new = self.tb.copy()
#             for n in range(self.tb.shape[0]):
#                 for m in range(self.tb.shape[1]):
#                     if n == 0 and m == 0:
#                         self.tb[n, m] = self.tb[0, 0] + self.gamma
#                     else:
#                         self.tb[n, m] = self.tb[n, m] * (1.0 - self.gamma)

#             for n in range(self.tb.shape[0]):
#                 for m in range(self.tb.shape[1]):
#                     # Updating each lever
#                     if o: 
#                         if m > 0:
#                             tb_new[n, m] = self.tb[n, m-1] * (self.a0 + m-1 / (self.a0 + m-1 + self.b0 + (n)))#beta.mean(self.a0 + m-1, self.b0 + (n))
#                         else:
#                             tb_new[n, m] = 0.0 
#                     else:
#                         if n > 0:
#                             tb_new[n, m] = self.tb[n-1, m] * (1-(self.a0+m /(self.a0 + m + self.b0 + n-1))) #(1- beta.mean(self.a0 + (m), self.b0 + n-1))
#                         else:
#                             tb_new[n, m] = 0.0

#             self.z = tb_new.sum()
#             for n in range(self.tb.shape[0]):
#                 for m in range(self.tb.shape[1]):
#                     self.tb[n, m] = tb_new[n, m] / self.z    
                           


In [61]:
print(outcomes)
dbm = DBM(1, 1, 0.1, outcomes)
dbm.update()
dbm.tb


[False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, True, True, False, False]


array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.08432376, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.11909912, 0.06976601, 0.1401165 , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.07744503, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.       

In [None]:
''' if o: 
                        if m > 0:
                            a = self.a0 + m-1
                            b = self.b0 + n
                            tb_new[n, m] = self.tb[n, m-1] * beta.mean(a, b)
                        else:
                            tb_new[n, m] = 0.0 
                    else:
                        if n > 0:
                            a = self.a0 + m
                            b = self.b0 + n-1
                            tb_new[n, m] = self.tb[n-1, m] * (1 - beta.mean(a, b))
                        else:
                            tb_new[n, m] = 0.0
'''

In [54]:
import numpy as np
from scipy.stats import beta

class DBModel:
    def __init__(self, a0, b0, gamma, num_arms):
        self.a0 = a0
        self.b0 = b0
        self.gamma = gamma
        self.num_arms = num_arms
        self.tb = np.zeros((num_arms, num_arms))
        

    def update(self, arm, outcome):
        for i in range(self.num_arms):
            for j in range(self.num_arms):
                if i == arm and j == 0:
                    self.tb[i, j] = self.tb[i, j] * (1 - self.gamma) + self.gamma
                elif i == arm and j > 0:
                    if outcome:
                        self.tb[i, j] = self.tb[i, j] * (1 - self.gamma) * beta.mean(self.a0 + j - 1, self.b0 + i)
                    else:
                        self.tb[i, j] = self.tb[i, j] * (1 - self.gamma) * (1 - beta.mean(self.a0 + j - 1, self.b0 + i))
                else:
                    self.tb[i, j] = self.tb[i, j] * (1 - self.gamma)

        # normalize the table
        self.tb = self.tb / np.sum(self.tb)

    def pick_arm(self):
        p = np.sum(self.tb, axis=1)
        return np.random.choice(self.num_arms, p=p)


In [55]:
for o in outcomes:
    mdl = DBModel(0.1,0.5,0.2, len(outcomes))
    for i in range(mdl.num_arms):
        mdl.update(i,o)

In [56]:
(mdl.tb)

array([[0.01441152, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.00360288, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.0045036 , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.0056295 , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0

In [29]:
mdl.pick_arm()

12