In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import import_ipynb

import RL_attacker_polling_nn as ctrl_mod
import RL_attacker_polling_AMQ1 as exp_mod_AMQ1
import RL_attacker_polling_AMQ2 as exp_mod_AMQ2

plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['font.size'] = 20

def norm_curve_from_history(vector_list, max_steps=10000):
    if not vector_list:
        return None, None
    arr = np.array(vector_list[:max_steps], dtype=float)
    final_vec = arr[-1]
    diffs = np.linalg.norm(arr - final_vec, axis=1)
    if diffs[0] == 0:
        norm_diffs = np.zeros_like(diffs)
    else:
        norm_diffs = diffs / diffs[0]
    steps = np.arange(len(norm_diffs))
    return steps, norm_diffs

def run_single_experiment(i, sim_time=2000.0, max_steps=10000, output_dir='outputs', fig_dir='figures'):
    rng = np.random.default_rng(1000 + i)
    arrival_rates = rng.uniform(4.0, 5.0, size=3).tolist()
    service_rate = float(rng.uniform(23.0, 26.0))
    ini_jobs_list = [10, 10, 10]

    np.random.seed(2000 + i)
    ctrl_sys = ctrl_mod.PollingSystem(
        queue_nums=3,
        arrival_rates=arrival_rates,
        ini_jobs_list=ini_jobs_list,
        service_rate=service_rate,
        switch_time=0.1,
        simulation_time=sim_time,
    )
    ctrl_sys.run_simulation()

    np.random.seed(3000 + i)
    amq1_train = exp_mod_AMQ1.PollingSystem(
        queue_nums=3,
        arrival_rates=arrival_rates,
        ini_jobs_list=ini_jobs_list,
        service_rate=service_rate,
        switch_time=0.1,
        simulation_time=sim_time,
    )
    amq1_train.run_simulation()
    optimal_wk_amq1 = amq1_train.rl_history[-1]['w']

    amq1_sys = exp_mod_AMQ1.PollingSystemWithFixedPolicy(
        queue_nums=3,
        arrival_rates=arrival_rates,
        ini_jobs_list=ini_jobs_list,
        service_rate=service_rate,
        switch_time=0.1,
        simulation_time=sim_time,
        optimal_wk=optimal_wk_amq1,
    )
    amq1_sys.run_simulation_with_fixed_policy()

    np.random.seed(10000 + i)
    amq2_train = exp_mod_AMQ2.PollingSystem(
        queue_nums=3,
        arrival_rates=arrival_rates,
        ini_jobs_list=ini_jobs_list,
        service_rate=service_rate,
        switch_time=0.1,
        simulation_time=sim_time,
    )
    amq2_train.run_simulation()
    optimal_wk_amq2 = amq2_train.rl_history[-1]['w']

    amq2_sys = exp_mod_AMQ2.PollingSystemWithFixedPolicy(
        queue_nums=3,
        arrival_rates=arrival_rates,
        ini_jobs_list=ini_jobs_list,
        service_rate=service_rate,
        switch_time=0.1,
        simulation_time=sim_time,
        optimal_wk=optimal_wk_amq2,
    )
    amq2_sys.run_simulation_with_fixed_policy()

    steps_nnq, curve_nnq = norm_curve_from_history([rec['params'] for rec in ctrl_sys.rl_history], max_steps=max_steps)
    steps_amq1, curve_amq1 = norm_curve_from_history([rec['w'] for rec in amq1_train.rl_history], max_steps=max_steps)
    steps_amq2, curve_amq2 = norm_curve_from_history([rec['w'] for rec in amq2_train.rl_history], max_steps=max_steps)

    if curve_nnq is None:
        curve_nnq = np.array([])
    if curve_amq1 is None:
        curve_amq1 = np.array([])
    if curve_amq2 is None:
        curve_amq2 = np.array([])

    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(fig_dir, exist_ok=True)

    np.savetxt(os.path.join(output_dir, f'NNQ_{i+50}.out'), curve_nnq, fmt='%.8f')
    np.savetxt(os.path.join(output_dir, f'AMQ1_{i+50}.out'), curve_amq1, fmt='%.8f')
    np.savetxt(os.path.join(output_dir, f'AMQ2_{i+50}.out'), curve_amq2, fmt='%.8f')

    plt.figure(figsize=(10, 6))
    if steps_nnq is not None:
        plt.plot(steps_nnq, curve_nnq, label='NNQ(Baseline)', color='red', linewidth=2)
    if steps_amq1 is not None:
        plt.plot(steps_amq1, curve_amq1, label='AMQ1(Our method)', color='green', linewidth=2)
    if steps_amq2 is not None:
        plt.plot(steps_amq2, curve_amq2, label='AMQ2(Our method)', color='purple', linewidth=2)

    plt.xlabel('Update step')
    plt.ylabel('Normalized distance to limit')
    plt.title(f'Experiment {i} (3 queues)')
    plt.ylim(0.0, 1.05)
    plt.grid(True)
    plt.legend()
    plt.tight_layout()

    fig_path = os.path.join(fig_dir, f'convergence_{i}.png')
    plt.savefig(fig_path, dpi=200)
    plt.close()

    return arrival_rates, service_rate, fig_path

n_experiments = 50
sim_time = 2000.0
max_steps = 10000
output_dir = 'outputs'
fig_dir = 'figures'

for i in range(1, n_experiments + 1):
    arrival_rates, service_rate, fig_path = run_single_experiment(
        i, sim_time=sim_time, max_steps=max_steps, output_dir=output_dir, fig_dir=fig_dir
    )
    print(f'Experiment {i}: arrival_rates={arrival_rates}, service_rate={service_rate:.3f}, figure={fig_path}')


Experiment 1: arrival_rates=[4.612594928569951, 4.015700467820332, 4.1876895768819296], service_rate=25.574, figure=figures_new\convergence_1.png
Experiment 2: arrival_rates=[4.380821159483116, 4.357189337670947, 4.747612317068191], service_rate=24.168, figure=figures_new\convergence_2.png
Experiment 3: arrival_rates=[4.187757368268634, 4.256705810218822, 4.494679327188852], service_rate=24.982, figure=figures_new\convergence_3.png
Experiment 4: arrival_rates=[4.000880452740715, 4.130151312127632, 4.138477365298591], service_rate=24.142, figure=figures_new\convergence_4.png
Experiment 5: arrival_rates=[4.082219173067894, 4.930562042388604, 4.28728402836777], service_rate=24.916, figure=figures_new\convergence_5.png
Experiment 6: arrival_rates=[4.336921433572824, 4.600856943799655, 4.297831299409175], service_rate=23.713, figure=figures_new\convergence_6.png
Experiment 7: arrival_rates=[4.071224937460886, 4.815980345177943, 4.642928512918657], service_rate=23.590, figure=figures_new\con