# Analysis

Train the various algorithms and show your results. 

You must plot the reward obtained by your agent per step and the total regret accumulated so far.

This one is an open ended assignment, so feel free to play around. Extra credit for more beautiful plots (you can check out Seaborn). 

In [23]:
# main.py

import numpy as np
import matplotlib.pyplot as plt
from bandits import Bandit
from agents import *

def train_agents(bandit, agents, num_steps):
    regrets = {agent: [] for agent in agents}
    
    for step in range(num_steps):
        for agent in agents:
            agent.act()
            regrets[agent].append(agent.bandit.get_regret())
    
    return regrets

def plot_regret(regrets, num_steps):
    plt.figure(figsize=(12, 8))
    for agent, regret in regrets.items():
        plt.plot(range(num_steps), regret, label=type(agent).__name__)
    plt.xlabel('Steps')
    plt.ylabel('Cumulative Regret')
    plt.title('Cumulative Regret of Different Bandit Algorithms')
    plt.legend()
    plt.show()

if __name__ == "__main__":
    num_arms = 10
    num_steps = 1000

    bandit = Bandit(num_arms, "Bernoulli")

    agents = [
        GreedyAgent(bandit, initialQ=0),
        epsGreedyAgent(bandit, epsilon=0.1),
        UCBAAgent(bandit, c=2),
        GradientBanditAgent(bandit, alpha=0.1),
        ThompsonSamplerAgent(bandit)
    ]

    regrets = train_agents(bandit, agents, num_steps)
    plot_regret(regrets, num_steps)


TypeError: '<=' not supported between instances of 'int' and 'NoneType'