In [None]:
import numpy as np
from numba import jit

params = np.empty(shape=(10,2))
params[:,0] = np.random.randn(10)
params[:,1] = 5
params

In [149]:
@jit(nopython=True)
def g_policy(array,k):
    mask = array==np.max(array)
    seq = np.arange(k)[mask]
    return np.random.choice(seq,1)

@jit(nopython=True)
def e_policy(array,k,e):

    flag = np.random.choice((True,False),p=(1-e,e))

    if flag:
        mask = array==np.max(array)
        seq = np.arange(k)[mask]
        return np.random.choice(seq,1)
    else:
        seq = np.arange(k)
        return np.random.choice(seq,1)
    

class kBandit():

    def __init__(self,params,n,k,policy,**kwargs):
        self.p_reward = params
        self.policy = policy
        self.steps = n
        self.k = k
        self.kwargs = kwargs
        self.value = np.zeros(self.k)
        self.visits = np.ones(self.k)

    def action(self):
        return self.policy(self.value,self.k,**self.kwargs)

    def reward(self,i):
        return self.p_reward[i,1]*np.random.randn()+self.p_reward[i,0]

    def update_step(self,reward,i):
        self.value[i] = self.value[i]+(reward-self.value[i])/self.visits[i]
        self.visits[i] += 1
    
    def simulation(self):
        sim = np.empty(shape=self.steps)
        act = np.empty(shape=self.steps)
        for j in range(self.steps):
            i = self.action()
            act[j] = i
            reward = self.reward(i)
            sim[j] = reward
            self.update_step(reward,i)
        value = self.value
        self.value = np.zeros(self.k)
        self.visits = np.ones(self.k)
        return act,sim,value

In [150]:
it = 3000
k = 1000
tenbandit = kBandit(params,k,10,g_policy)

sense_matrix = np.empty(shape=(it,k))
for p in range(it):
    act,_,value = tenbandit.simulation()
    sense_matrix[p,:] = (act==np.argmax(params[:,0]))

ts_greedy = sense_matrix.sum(axis=0)/it

In [140]:
it = 3000
k = 1000
tenbandit = kBandit(params,k,10,e_policy,0.05)

sense_matrix = np.empty(shape=(it,k))
for p in range(it):
    act,_,value = tenbandit.simulation()
    sense_matrix[p,:] = (act==np.argmax(params[:,0]))

ts_e01 = sense_matrix.sum(axis=0)/it

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
[1m[1m[1mNo implementation of function Function(<built-in method choice of numpy.random.mtrand.RandomState object at 0x0000027C37E3A640>) found for signature:
 
 >>> choice(Tuple(Literal[bool](True), Literal[bool](False)), p=UniTuple(float64 x 2))
 
There are 2 candidate implementations:
[1m  - Of which 2 did not match due to:
  Overload in function 'choice': File: numba\cpython\randomimpl.py: Line 1920.
    With argument(s): '(UniTuple(bool x 2), p=UniTuple(float64 x 2))':[0m
[1m   Rejected as the implementation raised a specific error:
     TypingError: [1mgot an unexpected keyword argument 'p'[0m[0m
  raised from c:\Users\rober\AppData\Local\Programs\Python\Python310\lib\site-packages\numba\core\typing\templates.py:784
[0m
[0m[1mDuring: resolving callee type: Function(<built-in method choice of numpy.random.mtrand.RandomState object at 0x0000027C37E3A640>)[0m
[0m[1mDuring: typing of call at C:\Users\rober\AppData\Local\Temp\ipykernel_27300\2251086269.py (10)
[0m
[1m
File "..\..\AppData\Local\Temp\ipykernel_27300\2251086269.py", line 10:[0m
[1m<source missing, REPL/exec in use?>[0m


In [146]:
tenbandit = kBandit(params,k,10,e_policy,0.05)

In [None]:
tenbandit