In [None]:
import numpy as np
import matplotlib.pyplot as plt

from NonStationaryEnvironment import NonStationaryEnvironment
from CUMSUM_UCB import CUMSUM_UCB
from SW_UCB import SW_UCB
from Solver import Solver

In [None]:
runs = 20
T = 200

ucb_cum_sum_results = []
ucb_sw_results = []
optimal_results = []

In [None]:
env = NonStationaryEnvironment(T)
solver = Solver(env)
optimal_configurations, optimal_rewards = solver.find_optimal()

for i in range(env.n_phases):

    print("OPTIMAL CONFIGURATION Phase "+str(i))
    print(optimal_configurations[i])
    print("OPTIMAL A-PRIORI REWARD "+str(i))
    print(optimal_rewards[i])

In [None]:
for r in range(runs):
    print("\n########## RUN", r+1, "\n")

    env = NonStationaryEnvironment(T)
    ucb_cum_sum = CUMSUM_UCB(env)
    ucb_sw = SW_UCB(env)

    ucb_cum_sum_rounds = []
    ucb_sw_rounds = []
    optimal_rounds = []

    for i in range(T):
        seed = np.random.randint(1, 2 ** 30)

        ucb_cum_sum_configuration = ucb_cum_sum.pull()
        ucb_cum_sum_round_data = env.round(ucb_cum_sum_configuration, seed)
        ucb_cum_sum.update(ucb_cum_sum_round_data)
        ucb_cum_sum_rounds.append(ucb_cum_sum_round_data)

        ucb_sw_configuration = ucb_sw.pull()
        ucb_sw_round_data = env.round(ucb_sw_configuration, seed)
        ucb_sw.update(ucb_sw_round_data)
        ucb_sw_rounds.append(ucb_sw_round_data)

        current_phase = int(env.t / env.phases_size)
        optimal_round_data = env.round(optimal_configurations[current_phase], seed, new_round=True)
        optimal_rounds.append(optimal_round_data)

    ucb_cum_sum_rewards = []
    ucb_sw_rewards = []
    optimal_rewards = []

    for i in range(T):
        ucb_cum_sum_rewards.append(ucb_cum_sum_rounds[i].reward)
        ucb_sw_rewards.append(ucb_sw_rounds[i].reward)
        optimal_rewards.append(optimal_rounds[i].reward)

    ucb_cum_sum_rewards = np.array(ucb_cum_sum_rewards)
    ucb_sw_rewards = np.array(ucb_sw_rewards)
    optimal_rewards = np.array(optimal_rewards)

    print("--------------------CUMSUM------------------")
    print("AVERAGE REWARD:")
    print("\t", np.average(ucb_cum_sum_rewards))
    print("AVERAGE REGRET:")
    print("\t", np.average(optimal_rewards - ucb_cum_sum_rewards))

    print("--------------------SW----------------------")
    print("AVERAGE REWARD:")
    print("\t", np.average(ucb_sw_rewards))
    print("AVERAGE REGRET:")
    print("\t", np.average(optimal_rewards - ucb_sw_rewards))

    ucb_cum_sum_results.append(ucb_cum_sum_rewards)
    ucb_sw_results.append(ucb_sw_rewards)
    optimal_results.append(optimal_rewards)

In [None]:
ucb_cum_sum_results = np.array(ucb_cum_sum_results)
ucb_sw_results = np.array(ucb_sw_results)
optimal_results = np.array(optimal_results)

rounds = list(range(T))

In [None]:
# REWARDS

means = np.average(ucb_cum_sum_results, axis=0)
std = np.std(ucb_cum_sum_results, axis=0)
optimal = np.average(optimal_results, axis=0)
fig, ax = plt.subplots()
plt.title("Rewards - CUMSUM_UCB")
plt.xlabel("t")
plt.ylabel("rewards")
plt.ylim(0, 50)
ax.set_xticks([0, 50, 100, 150, 200])
ax.set_xticks([25, 75, 125, 175], minor=True)
ax.grid(which='major', axis='x', linestyle='--')
ax.plot(means, 'g', label="CUMSUM_UCB")
ax.fill_between(rounds, means-std, means+std, alpha=0.3, color='green')
ax.plot(optimal, 'b--', label="Optimal")
plt.legend()

means = np.average(ucb_sw_results, axis=0)
std = np.std(ucb_sw_results, axis=0)
optimal = np.average(optimal_results, axis=0)
fig, ax = plt.subplots()
plt.title("Rewards - SW_UCB")
plt.xlabel("t")
plt.ylabel("rewards")
plt.ylim(0, 50)
ax.set_xticks([0, 50, 100, 150, 200])
ax.set_xticks([25, 75, 125, 175], minor=True)
ax.grid(which='major', axis='x', linestyle='--')
ax.plot(means, 'r', label="SW_UCB")
ax.fill_between(rounds, means-std, means+std, alpha=0.3, color='red')
ax.plot(optimal, 'b--', label="Optimal")
plt.legend()

plt.show()

In [None]:
# CUMULATIVE REWARDS

means = np.average(np.cumsum(ucb_cum_sum_results, axis=1), axis=0)
std = np.std(np.cumsum(ucb_cum_sum_results, axis=1), axis=0)
optimal = np.average(np.cumsum(optimal_results, axis=1), axis=0)
fig, ax = plt.subplots()
plt.title("Cumulative Rewards - CUMSUM_UCB")
plt.xlabel("t")
plt.ylabel("cumulative rewards")
#plt.ylim(0, 4000)
ax.set_xticks([0, 50, 100, 150, 200])
ax.set_xticks([25, 75, 125, 175], minor=True)
ax.grid(which='major', axis='x', linestyle='--')
ax.plot(means, 'g', label="CUMSUM_UCB")
ax.fill_between(rounds, means-std, means+std, alpha=0.3, color='green')
ax.plot(optimal, 'b--', label="Optimal")
plt.legend()

means = np.average(np.cumsum(ucb_sw_results, axis=1), axis=0)
std = np.std(np.cumsum(ucb_sw_results, axis=1), axis=0)
optimal = np.average(np.cumsum(optimal_results, axis=1), axis=0)
fig, ax = plt.subplots()
plt.title("Cumulative Rewards - SW_UCB")
plt.xlabel("t")
plt.ylabel("cumulative rewards")
#plt.ylim(0, 4000)
ax.set_xticks([0, 50, 100, 150, 200])
ax.set_xticks([25, 75, 125, 175], minor=True)
ax.grid(which='major', axis='x', linestyle='--')
ax.plot(means, 'r', label="SW_UCB")
ax.fill_between(rounds, means-std, means+std, alpha=0.3, color='red')
ax.plot(optimal, 'b--', label="Optimal")
plt.legend()

plt.show()

In [None]:
# CUMULATIVE REGRETS

means = np.average(np.cumsum(optimal_results - ucb_cum_sum_results, axis=1), axis=0)
std = np.std(np.cumsum(optimal_results - ucb_cum_sum_results, axis=1), axis=0)
fig, ax = plt.subplots()
plt.title("Cumulative Regrets - CUMSUM_UCB")
plt.xlabel("t")
plt.ylabel("cumulative regrets")
plt.ylim(0, 650)
ax.set_xticks([0, 50, 100, 150, 200])
ax.set_xticks([25, 75, 125, 175], minor=True)
ax.grid(which='major', axis='x', linestyle='--')
ax.plot(means, 'g')
ax.fill_between(rounds, means-std, means+std, alpha=0.3, color='green')

means = np.average(np.cumsum(optimal_results - ucb_sw_results, axis=1), axis=0)
std = np.std(np.cumsum(optimal_results - ucb_sw_results, axis=1), axis=0)
fig, ax = plt.subplots()
plt.title("Cumulative Regrets - SW_UCB")
plt.xlabel("t")
plt.ylabel("cumulative regrets")
plt.ylim(0, 650)
ax.set_xticks([0, 50, 100, 150, 200])
ax.set_xticks([25, 75, 125, 175], minor=True)
ax.grid(which='major', axis='x', linestyle='--')
ax.plot(means, 'r')
ax.fill_between(rounds, means-std, means+std, alpha=0.3, color='red')

plt.show()