In [3]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import random


CREATING THE BANDIT ENVIROMENT

In [4]:
class Bandit_Environment(object):
    def __init__(self, reward_probas, rewards):
        #if len(reward_probas) != len(rewards):
            #raise Exception(f"size of reward probas : {len(reward_probas)} does not match size of rewards: {len(rewards)}")
            
        self.reward_probas = reward_probas
        self.rewards = rewards 
        self.k_arms = len(rewards)

    def pull_arm(self, arm):
        if arm < 0 or arm > self.k_arms:
            raise Exception(f"arm must be a value between 0 and {self.k_arms -1}")

        if np.random.random() < self.reward_probas[arm]:
            return self.rewards[arm] 
        else:
            return 0.0

CREATING THE RANDOM AGENT  

In [None]:
class RandomAgent(object): #uses random random
    def __init__(self, env, max_iterations=2000):
        self.env = env
        self.iterations = max_iterations
   
    def execute(self):
        arm_counts = np.zeros(self.env.k_arms) #initialize starting arms array to zeros
        rewards = []
        arm_count_reward = np.empty(self.env.k_arms)
        cum_rewards = []

        for i in range(1, self.iterations + 1):
            arm = np.random.choice(self.env.k_arms) #randomly explore any arm index e.g if choice return 4 explore arm3
            reward = self.env.pull_arm(arm)

            arm_counts[arm] += 1
            arm_count_reward[arm] += round(reward,2)
            rewards.append(reward)
            cum_rewards.append(sum(rewards)/ len(rewards))
            #print("iteration ", i ," pulled arm ", arm ," with reward ", reward)
        return {"arms": arm_counts, "rewards": rewards, "cum_rewards": cum_rewards, "arm_rewards" : arm_count_reward}

DEFINING THE PLOT HISTORY

In [None]:
def plot_history(history):
    rewards = history["rewards"]
    cum_rewards = history["cum_rewards"]
    chosen_arms = history["arms"]
    chosen_arms_reward = history["arm_rewards"]
    
    
    fig = plt.figure(figsize=[30,16])
    
    
    ax2 = fig.add_subplot(221)
    ax2.plot(cum_rewards, label="avg rewards")
    ax2.set_xlabel("Iteration")
    ax2.set_ylabel("Average reward")
    ax2.set_title("Cummulative Rewards")

    ax3 = fig.add_subplot(222)
    ax3.bar([i for i in range(len(chosen_arms))], chosen_arms, label="chosen arms")
    ax3.set_xlabel("Arms")
    ax3.set_ylabel("Number of pulls of each arms")
    ax3.set_title("Chosen Actions")
    
    ax4 = fig.add_subplot(223)
    ax4.bar([i for i in range(len(chosen_arms_reward))], chosen_arms_reward, label="chosen arms reward")
    ax4.set_xlabel("Arms")
    ax4.set_ylabel("Total rewards per arm")
    ax4.set_title("Chosen Arm Rewards")
    