# Multi-armed bandits
Aplique el algoritmo bandit ε−greedy con 
 
 - ε= 0 (greedy)
 - ε= 0.01
 - ε= 0.1 
 
A un problema k-armed bandit con k= 10 acciones.

Considere recompensas con medias aleatorias y desvío estándar constante σ.

Analice experimentalmente el efecto del desvío estándar σ evaluando tres casos:
- ε= 0 (determinístico)
- ε= 1
- ε= 10

¿Qué conclusiones puede sacar?

In [None]:
import itertools

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from armed_bandits import EpsilonGreedyBandit

sns.set_context("notebook")
plt.rcParams["text.usetex"] = True

In [None]:
bandits = {}
rewards_per_sigma = {}

In [None]:
k = 10
reps = 20000
sigma_list = [0, 1, 10]
epsilon_list = [0, 0.01, 0.1]

In [None]:
for sigma, epsilon in itertools.product(*[sigma_list, epsilon_list]):
    # np.random.seed(42)
    bandit = EpsilonGreedyBandit(k, epsilon)

    reward_mean = np.random.normal(0, sigma, k)
    rewards = np.random.normal(reward_mean, sigma, k)

    for _ in range(reps):
        arm = bandit.select_arm()
        reward = rewards[arm]
        bandit.update(arm, reward)

    bandits[(sigma, epsilon)] = bandit

    rewards_per_sigma[sigma] = rewards

In [None]:
for key, bandit in bandits.items():
    sigma, epsilon = key

    plt.figure()
    sns.barplot(x=range(k), y=bandit.selected_arm_counts, hue=range(k))
    plt.xlim(-1, k)
    plt.xlabel("Brazo")
    plt.ylabel("Cantidad de veces seleccionado")

    plt.legend([], [], frameon=False)
    plt.savefig(f"../img/arm_sigma_{sigma}_epsilon_{epsilon}.png", transparent=True, bbox_inches="tight")
    plt.close()

    plt.figure()
    sns.violinplot(x=bandit.estimated_values, native_scale=True)
    plt.legend([], [], frameon=False)
    plt.xlabel("Recompensa")
    plt.savefig(f"../img/values_sigma_{sigma}_epsilon_{epsilon}.png", transparent=True, bbox_inches="tight")
    plt.close()

In [None]:
for sigma, rew in rewards_per_sigma.items():
    plt.figure()
    sns.barplot(x=range(k), y=rew, hue=range(k))
    plt.xlabel("Brazo")
    plt.ylabel("Recompensa")
    plt.legend([], [], frameon=False)

    plt.savefig(f"../img/rewards_sigma_{sigma}.png", transparent=True, bbox_inches="tight")
    plt.close()