In [2]:
import numpy as np
import pulp as pl

In [2]:
class karmbandit:
    """This is the k arm bandit problem

    Attributes:
        d: number of arms
        distribution (str): distribution of rewards
        params (array): parameters of the distribution, the line i contain the parameters of the distribution of the arm i
    """

    def __init__(self, d, distribution, params):
        """Init the k arm bandit problem

        Args:
            d (int): number of arms
            distribution (str): distribution of rewards
            params (array): parameters of the distribution, the line i contain the parameters of the distribution of the arm i
        """

        self.d = d
        self.distribution = distribution
        if distribution == 'bernoulli':
            self.mu = params
        if distribution == 'gaussian':
            self.mu = params[:,0]
            self.sigma = params[:,1]

    def pull(self, index):
        """Pull the arm index
        
        Args:
            index (int): index of the arm to pull
            
        Returns:
            reward (float): reward of the arm
        """

        if self.distribution == 'bernoulli':
            return np.random.binomial(1, self.mu[index])
        if self.distribution == 'gaussian':
            return np.random.normal(self.mu[index], self.sigma[index])

class karmpolicy:
    """This is the k arm bandit policy

    Attributes:
        d: number of arms
        self.muhat (array): empiric mean of the arms
        t (int): time step
        w (array): number of time an arm is played
        policy (str): policy to use
        params (array): parameters of the policy
    """

    def __init__(self, d, policy, params, distribution, prior = "uniform"):
        """Init the k arm bandit policy

        Args:
            d (int): number of arms
            policy (str): policy to use
            params (array): parameters of the policy
        """
        self.d = d
        self.policy = policy
        
        self.t = 0
        self.w = np.zeros(d)

        if policy == 'epsilon-greedy':
            self.epsilon = params[0]
        if policy == 'ucb':
            self.c = params[0]
        if policy == 'thompson-sampling':
            if distribution == 'bernoulli':
                if prior == "beta":
                    self.alphas = params[:0]
                    self.betas = params[:1]
                if prior == "uniform":
                    self.alphas = np.ones(d)
                    self.betas = np.ones(d)
                if prior == "gaussian":
                    self.mus = params[:0]
                    self.sigma = params[:1]
            if distribution == 'gaussian':
                if prior == "gaussian":
                    self.mus = params[:0]
                    self.sigma = params[:1]
        if policy == "klucb":
            pass

    def select(self):
        """Select the arm to pull according to the algorithm policy
        
        Returns:
            index (int): index of the arm to pull
        """
        if self.policy == 'ucb':
            index = np.argmax(self.muhat + self.c * np.sqrt(np.log(t) / (2 * self.w)))
        if self.policy == 'epsilon-greedy':
            if np.random.rand() < self.epsilon:
                index = np.random.randint(self.d)
            else:
                index = np.argmax(self.muhat)
        if self.policy == 'thompson-sampling':
            index = np.argmax(np.random.beta(self.alpha, self.beta))
        if self.policy == "klucb":
            pass

        return index







ModuleNotFoundError: No module named 'pandas'