#### [+] Create Dataset

NOTE: This is a one time call

NOTE: the dataset should be constucted in the same manner as generated by the `Gene` class 
i.e., as a collection of Future MD specifications of fixed sequence lengths with all input sequences stacked vertically.

for e.g., each item in the collection will be of shape (N, T) where
* N = number of input sequence which is 4 in case of S2S-offloading - Mt = (Dt, Ot, Ct, Zt)
* T = sequence length i.e., no of future MD specifications

partial action sequence is constructed by the simulating environment.

In [None]:
import experiment
experiment.ExpA( experiment.auto_device ).create_ds(n_apps=60, n_paths=1000, app_seed=1206, path_seed=1990,)

# Experiment

### Import experiment

In [None]:
from experiment import ExpA as Exp #<--- select experiment

exp = Exp( Exp.auto_device ).load_ds(split_ratio=0.9) #<--- selelct train-test split
print(exp)

### Create training and validation environments

In [None]:
tenv = exp.tEnv(seed=None, heed=None)
venvs = exp.vEnvs(heeds=None)
print(len(tenv.states), len(venvs))

# Train

### Create Policy

In [None]:
pie = exp.pie()
n_parameters = pie.Count()

print(f'Parameters = {n_parameters} \t ~ ({n_parameters*1e-6:.2f} million)')

In [None]:
pie.theta

### Training Loop

In [None]:
exp.list_Huristic() #<---- list all available heuristic behaviour policies

In [None]:
_ = exp.train(
    tenv,
    venvs,
    pie,
    learning_rate=      1e-5,
    lref=               0.05,
    weight_decay=       0.0,
    epochs =            500,
    batch_size =        6,
    learn_times =       50,
    tuf =               5,
    epsilon_range =     (1.0, 0.1),
    explore_pies =      ['pie_Huristic_Random', 'pie_Huristic_Edge_Cloud', 'pie_Huristic_Round_Robin_Edge_Cloud' ],
    explore_per_pie=    True,
    min_memory_pies=     exp.list_Huristic(),
    double =            False,
    validation_interval = 0.01,
    checkpoint_interval = 0.1,
    gradient_clipping = 1.0,
    save_at =           '__results__',
)

# Test Setup

In [None]:
import s2sdqn
import torch as tt
import torch.nn as nn
import os
import numpy as np
import matplotlib.pyplot as plt
from known import ktf 

### Create Policy

In [None]:
pie = exp.pie()
pie.Count()

In [None]:
pie.Show()

### Load Trained Policy

In [None]:
load_from = '__results__'
load_name = 'final.pie'

pie.load(os.path.join(load_from, load_name))
pie.eval()


In [None]:
# (optional) view training results for this policy 
results = s2sdqn.dqn.Eval.load_plot_training_result(os.path.join('results.npz'))

# Validation

## Select validation environments

In [None]:
envs =venvs[0:1]
episodes=1

## s2s-DQN Policy

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie=pie,
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")
v_dqn = validate_acts

## Fixed Policies

### Place all on Edge-0

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie=s2sdqn.pie.FixedPie(Aseq=[0 for _ in range(exp.T)]),
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

### Place all on Edge-1

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie=s2sdqn.pie.FixedPie(Aseq=[1 for _ in range(exp.T)]),
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

### Place all on Edge-2

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie=s2sdqn.pie.FixedPie(Aseq=[2 for _ in range(exp.T)]),
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

### Place all on Cloud

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie=s2sdqn.pie.FixedPie(Aseq=[3 for _ in range(exp.T)]),
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

## Random Policies

### Full Random

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie=s2sdqn.pie.RandomPie(Alow=0, Ahigh=exp.A),
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

### Edge Random

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie=s2sdqn.pie.RandomPie(Alow=0, Ahigh=exp.A-1),
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

## Hueristic Policies

In [None]:
exp.list_Huristic()

### pie_Huristic_Edge

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie='pie_Huristic_Edge',
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

### pie_Huristic_Edge_Cloud

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie='pie_Huristic_Edge_Cloud',
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

### pie_Huristic_Edge_Cloud_Limited

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie='pie_Huristic_Edge_Cloud_Limited',
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

### pie_Huristic_Random

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie='pie_Huristic_Random',
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

### pie_Huristic_Random_Edge

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie='pie_Huristic_Random_Edge',
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

### pie_Huristic_Round_Robin_Edge

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie='pie_Huristic_Round_Robin_Edge',
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

### pie_Huristic_Round_Robin_Edge_Cloud

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie='pie_Huristic_Round_Robin_Edge_Cloud',
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

### pie_Huristic_Zonal_Edge

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie='pie_Huristic_Zonal_Edge',
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

## Custom Action Sequence

In [None]:
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie=s2sdqn.pie.FixedPie(Aseq=[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2]),
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")

## FDA - Genetic Algorithm

In [None]:
for fenv in envs:
    print(f'fenv: {fenv}')
    n_actions = fenv.A
    n_edges =   fenv.sim.E
    n_steps =   fenv.T
    #====================================================================================================
    #====================================================================================================

    def randflowF(): return s2sdqn.fda.random_flow(0, n_actions+1, n_steps)
    def flow2sol(x): return [ int(abs(i)) % n_actions for i in x ]
    def flowcost(x):
        actions = flow2sol(x)
        total_reward, steps, _ = s2sdqn.dqn.Eval.test_solution(fenv, actions, max_steps=None, verbose=0)
        return -total_reward

    #====================================================================================================
    #====================================================================================================


    #print(f'{placement=}')
    top_flows =[(np.zeros(n_steps) + j) for j in range(n_edges, n_actions)]

    Flow_X, Flow_fitness  = \
    s2sdqn.fda.optimize(
                    MAXITER=    500,
                    randflowF=  randflowF,
                    costF=      flowcost,
                    beta=       12,
                    alpha=      12,
                    base_flows= top_flows,
                    seed=       None)

    for i,(fx, fi) in enumerate(zip(Flow_X, Flow_fitness)):
        print(f'\n{i=}\n{flow2sol(fx)=}\n{fi=}\n')

    # select top flows to continue
    selected_flows=[]
    selected_fitness = []
    for flow,fcost in zip(Flow_X, Flow_fitness):
        sol = flow2sol(flow)
        if sol not in selected_flows:
            selected_flows.append(sol)
            selected_fitness.append(fcost)
    selected_n = len(selected_flows)

    sorted_cost = np.argsort(selected_fitness)
    top_flows = [(np.array(selected_flows[sorted_cost[top]]), selected_fitness[sorted_cost[top]]) for top in range(selected_n)]
    #top_costs = [selected_fitness[sorted_cost[top]] for top in range(min(n_top, selected_n))]


    print(top_flows)
    mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
    s2sdqn.dqn.Eval.validate_policy(
        envs=[fenv],
        pie=s2sdqn.pie.FixedPie(Aseq=top_flows[0][0]),
        episodes=1,
        episodic_verbose=0,
        verbose_result=True
    )
    print(f"""
    {mean_return=}
    {mean_steps=}
    {sum_return=}
    {sum_steps=}
    {validate_acts=}
    """)




# Visualize attention scores for s2s-DQN

### Perform forward Pass

In [None]:
pie.theta.do_store_attention(True)
mean_return, mean_steps, sum_return, sum_steps, validate_acts = \
s2sdqn.dqn.Eval.validate_policy(
    pie=pie,
    envs=envs,
    episodes=episodes,
    episodic_verbose=0,
    verbose_result=True
)
print(f"""
{mean_return=}
{mean_steps=}
{sum_return=}
{sum_steps=}
{validate_acts=}
""")
v_dqn = validate_acts
pie.theta.do_store_attention(False)

### View Self-Attention scores at the encoders

In [None]:
ktf.Aview.view_mh_attention_encoders(
    former=pie.theta,
    batch_index=None,
    **ktf.Aview.dca
)
ktf.Aview.view_attention_encoders(
    former=pie.theta,
    batch_index=None,
    **ktf.Aview.dca
)

### View Self-Attention scores at the decoder

In [None]:
ktf.Aview.view_mh_attention_decoder(
    former=pie.theta,
    cross=False,
    batch_index=None,
    **ktf.Aview.dca
)
ktf.Aview.view_attention_decoder(
    former=pie.theta,
    cross=False,
    batch_index=None,
    **ktf.Aview.dca
)

### View Cross-Attention scores at the decoder

In [None]:
ktf.Aview.view_mh_attention_decoder(
    former=pie.theta,
    cross=False,
    batch_index=None,
    **ktf.Aview.dca
)
ktf.Aview.view_attention_decoder(
    former=pie.theta,
    cross=False,
    batch_index=None,
    **ktf.Aview.dca
)

# End