# Analysis

Train the various algorithms and show your results. 

You must plot the reward obtained by your agent per step and the total regret accumulated so far.

This one is an open ended assignment, so feel free to play around. Extra credit for more beautiful plots (you can check out Seaborn). 

In [7]:
import numpy as numpy
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt 

# import seaborn as sns

from bandits import Bandit
from agents import *

In [8]:
def train_agent(agent, num_steps):
    rewards_per_step = []
    tot_regret=[]
    for i in range(num_steps):
        current_reward = agent.act()
        rewards_per_step.append(agent.rewards/(i+1))
        tot_regret.append(agent.bandit.regret)
    return rewards_per_step,tot_regret

In [9]:
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(8, 8))

In [10]:
def plot_rewards(rewards_per_step,tot_regret,agent_name):
    ax1.plot(rewards_per_step, label=agent_name)
    ax1.set_yscale('log')
    ax1.set_xlabel('Time Steps')
    ax1.set_ylabel('Mean Reward')
    ax1.legend()
    ax2.plot(tot_regret, label=agent_name)
    ax2.set_yscale('log')
    ax2.set_xlabel('Time Steps')
    ax2.set_ylabel('Total Regret')
    ax2.legend()


In [11]:
num_steps = 1000

In [12]:
for bandit_type in Bandit.BANDIT_TYPES:
    bandit=Bandit(n=10,type=bandit_type)
    agents=[
        GreedyAgent(bandit,0.0),
        epsGreedyAgent(bandit,0.1),
        UCBAAgent(bandit,2),
        GradientBanditAgent(bandit,0.1),
        ThompsonSamplerAgent(bandit)
    ]
    for agent in agents:
        rewards_per_step,tot_regret=train_agent(agent,num_steps)
        plot_rewards(rewards_per_step,tot_regret, f"{type(agent).__name__} ({bandit_type})")
plt.show()