In [None]:
import torch
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
sys.path.append('../..')
from SBML import ZonoTorch as zt
from SBML import SBRL as sbrl

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

seed = 0

seedtorch = torch.random.manual_seed(seed)
seednp = np.random.seed(seed)

In [None]:
senv = sbrl.GymEnvironment('Hopper-v3', options={"reset_noise_scale":1e-6}, device=DEVICE)

In [None]:
def mad_attack(actor,state,epsilon,alpha,attack_iter):
    ori_state = state.clone().detach()
    gt_action = actor(state).clone().detach()

    criterion = torch.nn.MSELoss()
    noise = np.random.uniform(-alpha, alpha, size=state.shape)
    
    state += torch.tensor(noise).to(DEVICE, dtype=torch.float32)

    for _ in range(attack_iter):
        state = state.clone().detach().requires_grad_(True)
        action = actor(state)

        loss = -criterion(action, gt_action)
        actor.zero_grad()
        loss.backward()
        adv_state = state - alpha * torch.sign(state.grad)
        state = torch.clamp(adv_state, ori_state - epsilon, ori_state + epsilon)
    
    return state.detach()

In [None]:
def random_attack(actor,state,epsilon,alpha,attack_iter):
    noise = np.random.uniform(-epsilon, epsilon, size=state.shape)
    state += torch.tensor(noise).to(DEVICE, dtype=torch.float32)
    return state.detach()

In [None]:
from torch import nn

# Load model form pth file
# Transform the OrderedDict to a neural network model
actor_SAPC = torch.nn.Sequential(
    torch.nn.Linear(11, 400),
    torch.nn.ReLU(),
    torch.nn.Linear(400, 300),
    torch.nn.ReLU(),
    torch.nn.Linear(300, 3),
    torch.nn.Tanh()
)


## MAD Attack

In [None]:
epsilon = [0, 0.01, 0.04, 0.07, 0.1, 0.3]
n = 10
eps = 50
agents = [0,1,2,3,4]
types = ['PAPC','NAIVE','GRAD','MAD','SAPC']

equal_agent_num = 5


In [None]:

mean_rewards = np.zeros((equal_agent_num,len(agents),len(epsilon), len(types)))
median_rewards = np.zeros((equal_agent_num,len(agents),len(epsilon), len(types)))
min_rewards = np.zeros((equal_agent_num,len(agents),len(epsilon), len(types)))
std_rewards = np.zeros((equal_agent_num,len(agents),len(epsilon), len(types)))

for t in range(len(types)):
    for e in range(len(epsilon)):
        for a in agents:
            # List the agents in the directory
            agent_list = os.listdir(f'agents_{types[t]}_{a}')
            # List the agents for creation date
            agent_list.sort(key=lambda x: os.path.getctime(f'agents_{types[t]}_{a}/{x}'))
            # Get the number of agents
            agent_list = [x for x in agent_list if x.endswith('.pth')]
            for m in range(equal_agent_num):
                # Load the model
                # print(f'Loading agent {m} from agents_{types[t]}_{a}/{agent_list[-(m+1)]}')
                actor_SAPC.load_state_dict(torch.load(f'agents_{types[t]}_{a}/{agent_list[-(m+2)]}'))
                actor_SAPC.to(DEVICE)

                rewards = np.zeros(eps)
            
                for n in range(eps):
                    senv.reset()
                    done = False
                    total_reward = 0
                    state = senv.reset()
                    while not done:
                        adv_state = mad_attack(actor_SAPC, state, epsilon[e], epsilon[e]/7, n)
                        action = actor_SAPC(adv_state)
                        state, reward, done, _ = senv.step(action)
                        total_reward += reward
                    rewards[n] = total_reward
                
                print(f'Agent {m} in {types[t]} with epsilon {epsilon[e]} and attack {a} finished with {rewards.mean()},{rewards.min()}')

                mean_rewards[m,a,e,t] = rewards.mean()
                std_rewards[m,a,e,t] = rewards.std()
                median_rewards[m,a,e,t] = np.median(rewards)
                min_rewards[m,a,e,t] = rewards.min()

# Save the results
np.save('mad_mean_rewards.npy', mean_rewards)
np.save('mad_std_rewards.npy', std_rewards)
np.save('mad_median_rewards.npy', median_rewards)
np.save('mad_min_rewards.npy', min_rewards)


In [None]:
mean_rewards = np.load('mad_results/mad_mean_rewards.npy')
std_rewards = np.load('mad_results/mad_std_rewards.npy')
median_rewards = np.load('mad_results/mad_median_rewards.npy')
min_rewards = np.load('mad_results/mad_min_rewards.npy')


# Plot the mean min_rewards over all agents and its 95% confidence interval 
plt.figure(figsize=(10, 5))
for t in range(len(types)):
    mean_min_rewards = np.mean(np.mean(min_rewards[:, :, :, t], axis=0), axis=0)
    std_min_rewards = np.std(np.mean(min_rewards[:, :, :, t], axis=0), axis=0)
    plt.plot(epsilon, mean_min_rewards, label=types[t])
    plt.fill_between(epsilon, mean_min_rewards - std_min_rewards / np.sqrt(equal_agent_num), 
                     mean_min_rewards + std_min_rewards / np.sqrt(equal_agent_num), alpha=0.2)
plt.xlabel('Epsilon')
plt.ylabel('Mean Min Rewards')
plt.legend()
plt.savefig('mad_mean_min_rewards.png')

# Plot the mean median_rewards over all agents and its 95% confidence interval
plt.figure(figsize=(10, 5))
for t in range(len(types)):
    mean_median_rewards = np.mean(np.mean(median_rewards[:, :, :, t], axis=0), axis=0)
    std_median_rewards = np.std(np.mean(median_rewards[:, :, :, t], axis=0), axis=0)
    plt.plot(epsilon, mean_median_rewards, label=types[t])
    plt.fill_between(epsilon, mean_median_rewards -  std_median_rewards / np.sqrt(equal_agent_num), 
                     mean_median_rewards +  std_median_rewards / np.sqrt(equal_agent_num), alpha=0.2)
plt.xlabel('Epsilon')
plt.ylabel('Mean Median Rewards')
plt.legend()
plt.savefig('mad_mean_median_rewards.png')

# Plot the mean mean_rewards over all agents and its 95% confidence interval

plt.figure(figsize=(10, 5))
colors = ['black', (0.03140, 0.56470, 0.00000), (0.03920, 0.36470, 0.00000), (0.07843, 0.80196, 0.00000), (0.69020, 0.82350, 1.00000)]
for t in range(len(types)):
    mean_mean_rewards = np.mean(np.mean(mean_rewards[:, :, :, t], axis=0), axis=0)
    print(mean_mean_rewards,types[t])
    std_mean_rewards = np.std(np.mean(mean_rewards[:, :, :, t], axis=0), axis=0)
    print(mean_mean_rewards+std_mean_rewards/np.sqrt(equal_agent_num),types[t])
    print(mean_mean_rewards-std_mean_rewards/np.sqrt(equal_agent_num),types[t])
    plt.plot(epsilon, mean_mean_rewards, label=types[t], color=colors[t])
    plt.fill_between(epsilon, mean_mean_rewards - std_mean_rewards / np.sqrt(equal_agent_num), 
                     mean_mean_rewards + std_mean_rewards / np.sqrt(equal_agent_num), alpha=0.2, color=colors[t])
plt.xlabel('Epsilon')
plt.ylabel('Value under MAD attack')
plt.legend()

import tikzplotlib

#tikzplotlib.clean_figure()
#tikzplotlib.save("mad_mean_mean_rewards.tex")
#plt.show()
#plt.savefig('mad_mean_mean_rewards.png')

