# Simulated Agent

In [2]:
import numpy as np
from scipy.optimize import minimize
from scipy.stats import binom, beta

In [3]:
class Agent:
    def __init__(self, a0, b0, q):
        self.a0 = a0
        self.b0 = b0
        self.q = q
        self.p = 0.0
        self.pickP()
        self.notPickP()
        
    def pickP(self):#PICK P  FROM BETA DISTRIBUTION
        self.p = np.random.beta(self.a0, self.b0)
    def notPickP(self):
        self.p = 1 - np.random.beta(self.a0, self.b0)
            
    def draw(self):
        if np.random.uniform(0,1) < self.p:
            return True
        else:
            return False
        
    def turn(self):
        if np.random.uniform(0,1) < self.q:
            self.pickP()
        # else:
        #     self.notPickP()
        return self.draw()

In [4]:
np.random.seed(0)
ag = Agent(0.1,0.5, 0.1)
n = 8
outcomes = []
prob = []
for i in range(n):
    outcomes.append(ag.turn())
np.random.seed(0)

In [5]:
outcomes

[True, True, True, True, True, False, False, False]

In [19]:
class DBMAgent:
    def __init__(self, a0, b0, gamma):
        self.a0 = a0
        self.b0 = b0
        self.gamma = gamma
        self.p = 0.0
        self.outcomes = outcomes
        self.num = len(outcomes)
        self.tb = np.zeros((self.num + 1, self.num + 1))
        self.tb[0, 0] = 1
        

    # def mu(self, n, m):
    #     self.a0 = self.a0 + n
    #     self.b0 = self.b0 + m
    #     return self.a0 / (self.a0 + self.b0)
        

    def update(self, outcomes):
        # print(outcomes, '\n',self.tb)
        for o in outcomes:
           
            tb_new = self.tb.copy()
            
            for n in range(self.tb.shape[0]):
                for m in range(self.tb.shape[1]):
                    if n==0 and m==0:
                        self.tb[n, m] = self.tb[0,0] + self.gamma
                    else:
                        self.tb[n, m] = self.tb[n,m] * (1.0 - self.gamma)

            for n in range(self.tb.shape[0]):
                for m in range(self.tb.shape[1]):
                    #updating each lever
                    if o: 
                        # if n > 0:
                        #     tb_new[n, m] = self.tb[n-1, m] * beta.mean(self.a0 + n, self.b0 + (m-1))
                        if m > 0:
                            tb_new[n, m] = self.tb[n, m-1] * beta.mean(self.a0 + n, self.b0 + (m-1))
                        else:
                            tb_new[n, m] = 0.0
                    else:
                        # if m > 0:
                        #     tb_new[n, m] = self.tb[n, m-1] * self.beta.mean(self.a0 + n, self.b0 +  m)
                        if n > 0:
                            tb_new[n, m] = self.tb[n-1, m] * (1- beta.mean(self.a0 + (n-1), self.b0 + m) )
                        else:
                            tb_new[n, m] = 0.0
            z = 0.0
            for n in range(self.tb.shape[0]):
                for m in range(self.tb.shape[1]):
                    z += tb_new[n, m] 
                    
            for n in range(self.tb.shape[0]):
                for m in range(self.tb.shape[1]):
                    self.tb[n, m] = tb_new[n, m] / z    
                           
            # print(o, z,'\n', self.tb, '\n', tb_new[n,m])
            
#         for o in self.outcomes:

#             for n in range(self.tb.shape[0]):
#                 for m in range(self.tb.shape[1]):
#                     if n==0 and m==0:
#                         self.tb[n, m] = self.tb[0,0]+self.gamma
#                     else:
#                         self.tb[n, m] = self.tb[n,m]*(1.0 - self.gamma)

            
#             tb_new=self.tb.copy()
            
#             for n in range(self.tb.shape[0]):
#                 for m in range(self.tb.shape[1]):
                        
#                     if o:
#                         if m>0:
#                             tb_new[n,m]=self.tb[n,m-1]*beta.mean(self.a0 + n, self.b0+m)
#                         else:
#                             tb_new[n,m]=0.0
                            
#                     else:
#                         if n>0:
#                             tb_new[n,m]=self.tb[n-1,m]*beta.mean(self.a0 + n, self.b0+m)
#                         else:
#                             tb_new[n,m]=0.0

#             z=0.0                    
#             for n in range(self.tb.shape[0]):
#                 for m in range(self.tb.shape[1]):
#                     z+=tb_new[n,m]

#             for n in range(self.tb.shape[0]):
#                 for m in range(self.tb.shape[1]):
#                     self.tb[n,m]=tb_new[n,m]/z
                
#             print(o,'\n', self.tb)

            
agent = DBMAgent(0.1, 0.5, 0.2)
agent.update(outcomes)




In [20]:
class logL:
    def __init__(self, data):
        self.data = data
    def __call__(self, params): # matrix of params
        a0,b0,gm = params 
        mdl = DBMAgent(a0,b0,gm)
        mdl.update(self.data)
        likelihood = gm * beta.pdf(self.data, a0, b0) + (1 - gm) * beta.pdf(self.data, 1, 1)
        loglikelihood = -1 * np.sum(np.log(likelihood))
        return loglikelihood
    

In [21]:
loglikelihood = logL(outcomes)
p = np.array([0.1,0.5,0.2])
loglikelihood(p)


-inf

In [None]:
optimised_result = minimize(loglikelihood,(1,1,0.2), method='Nelder-Mead')
alpha_mle, beta_mle, gamma_mle = optimised_result.x

In [165]:
print(f'optimised alpha: {alpha_mle}\noptimised beta: {beta_mle}\noptimised gamma: {gamma_mle}')

optimised alpha: 1.0333333333333332
optimised beta: 0.95
optimised gamma: 0.20666666666666672
