In [8]:
from Bandits.src.tools import create_k_armed_bandit

In [9]:
from pyecharts.globals import ThemeType
from pyecharts import options as opts
from pyecharts.charts import Boxplot, Line

k = 10
arms, q_values = create_k_armed_bandit(k = k)
    
c = Boxplot(init_opts=opts.InitOpts(theme=ThemeType.INFOGRAPHIC))
c.add_xaxis(["Bras du bandit"]),
for arm_number, arm in enumerate(arms):
    c.add_yaxis(f"{arm_number+1}", c.prepare_data([arms[0].generate_rewards(100).tolist()]))
c.set_global_opts(title_opts=opts.TitleOpts(title="Gains des K-bras"))

c.load_javascript()
c.render_notebook()

In [13]:
from tqdm import tqdm
import numpy as np
from Bandits.src.Agents import GreedyAgent, EpsilonGreedyAgent

steps = 1000
average_rewards = np.zeros(steps)
e_greedy_average_rewards = np.zeros(steps)
explorer_average_rewards = np.zeros(steps)

for run in tqdm(range(200)):
    # Initialise le jeu et les agents
    k = 10
    arms, q_values = create_k_armed_bandit(k = k)
    greedy = GreedyAgent(nb_arms = k)
    egreedy = EpsilonGreedyAgent(nb_arms = k, epsilon=.1)
    explorer = EpsilonGreedyAgent(nb_arms = k, epsilon=.3)
    
    # Réalise 1000 actions pour chaque agent
    greedy.n_steps(arms, steps)
    egreedy.n_steps(arms, steps)
    explorer.n_steps(arms, steps)
    
    average_rewards += greedy.rewards
    e_greedy_average_rewards += egreedy.rewards
    explorer_average_rewards += explorer.rewards

average_rewards /= 200
e_greedy_average_rewards /= 200
explorer_average_rewards /= 200

100%|██████████| 200/200 [01:18<00:00,  2.56it/s]


In [14]:
c = (
    Line(init_opts=opts.InitOpts(theme=ThemeType.INFOGRAPHIC))
    .set_global_opts(
        tooltip_opts=opts.TooltipOpts(is_show=False),
        xaxis_opts=opts.AxisOpts(type_="value", name = "Steps", name_location = "center"),
        legend_opts=opts.LegendOpts(pos_right=.1),
        yaxis_opts=opts.AxisOpts(
            type_="value",
            name = "Average reward",
            name_location = "center",
            axistick_opts=opts.AxisTickOpts(is_show=True),
            splitline_opts=opts.SplitLineOpts(is_show=True),
        ),
    )
    .add_xaxis(xaxis_data=np.arange(1, len(average_rewards)+1).tolist())
    .add_yaxis(
        series_name="Greedy agent",
        y_axis=average_rewards,
        symbol="emptyCircle",
        is_symbol_show=False,
        label_opts=opts.LabelOpts(is_show=False),
    )
    .add_yaxis(
        series_name="Epsilon-greedy agent (e = 0.1)",
        y_axis=e_greedy_average_rewards,
        symbol="emptyCircle",
        is_symbol_show=False,
        label_opts=opts.LabelOpts(is_show=False),
    )
    .add_yaxis(
        series_name="'Explorer' agent (e : 0.3)",
        y_axis=explorer_average_rewards,
        symbol="emptyCircle",
        is_symbol_show=False,
        label_opts=opts.LabelOpts(is_show=False),
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="Average rewards per agent"))
)

c.load_javascript()
c.render_notebook()