Skeleton for the meta learning projet. Defines the model, the dataset and the function to be optimized

# Imports and hyperparameters

In [1]:
#imports and hyperparameters
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms, models
import numpy as np
from math import log


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# Hyper-parameters
num_epochs = 10
data_percent=0.10

#bounds for learning rate, batch size
lr_lb=1e-5
lr_hb=1e-2
bs_lb=20
bs_hb=200

lr_norm_type='exp' #'lin' or 'exp' for linear or exp-like regularisation

  from .autonotebook import tqdm as notebook_tqdm


cuda


# Model and dataset definition

In [2]:
#resnet18 model
def get_model():
  return models.resnet18(pretrained=False).to(device)

In [3]:
# CIFAR-10 dataset

# Image preprocessing modules
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])

cifar_train = torchvision.datasets.CIFAR10(root='data/',
                                             train=True, 
                                             transform=transform,
                                             download=True)

cifar_test = torchvision.datasets.CIFAR10(root='data/',
                                            train=False, 
                                            transform=transforms.ToTensor())


len_train = int(len(cifar_train)*data_percent)
len_test = int(len(cifar_test)*data_percent)
train_dataset = torch.utils.data.Subset(cifar_train, np.arange(len_train))
test_dataset = torch.utils.data.Subset(cifar_test, np.arange(len_test))

Files already downloaded and verified


# Objective function

In [4]:
# For updating learning rate
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [5]:
#Normalisation
#normalized lr and accuracy are between -1 and 1
#Then linear transformation to get them between the bounds defined in the hyperparameter
if lr_norm_type == 'lin':
    a_lr,b_lr = np.polyfit([-1,1],[lr_lb,lr_hb],deg=1)
    a_bs,b_bs = np.polyfit([-1,1],[bs_lb,bs_hb],deg=1)
    
    def denormalize(normalized_learning_rate, normalized_batch_size):
        learning_rate= a_lr * normalized_learning_rate + b_lr
        batch_size= int(a_bs * normalized_batch_size + b_bs)
    
        return learning_rate, batch_size
    
    def denormalize_array(normalized_learning_rate, normalized_batch_size):
        learning_rate= a_lr * normalized_learning_rate + b_lr
        batch_size= (a_bs * normalized_batch_size + b_bs).astype(int)
    
        return learning_rate, batch_size
elif lr_norm_type == 'exp':
    a_lr,b_lr = np.polyfit([-1,1],[log(lr_lb,10),log(lr_hb,10)],deg=1)
    a_bs,b_bs = np.polyfit([-1,1],[bs_lb,bs_hb],deg=1)

    def denormalize(normalized_learning_rate, normalized_batch_size):
        learning_rate= 10**(a_lr * normalized_learning_rate + b_lr)
        batch_size= int(a_bs * normalized_batch_size + b_bs)
    
        return learning_rate, batch_size
    
    def denormalize_array(normalized_learning_rate, normalized_batch_size):
        learning_rate= np.power(10, a_lr * normalized_learning_rate + b_lr)
        batch_size= (a_bs * normalized_batch_size + b_bs).astype(int)
        
        return learning_rate, batch_size
else:
    print('ERROR: %s not a vadid normalisation'%lr_norm_type)

print(denormalize(-1,-1))
print(denormalize(1,1))
print(denormalize_array(np.array([-1,1]),np.array([-1,1])))

(1e-05, 20)
(0.01000000000000001, 200)
(array([1.e-05, 1.e-02]), array([ 20, 200]))


In [6]:
#objective function

def func(normalized_learning_rate, normalized_batch_size): 
  '''generates and train a ResNet18 model on CIFAR10. accuracy after num_epochs epochs'''

  #denormalise
  learning_rate, batch_size = denormalize(normalized_learning_rate, normalized_batch_size)
  #learning_rate, batch_size = normalized_learning_rate, normalized_batch_size
  
  # Data loader using batch size
  train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

  test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)


  #model
  model = get_model()

  #train the model
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

  total_step = len(train_loader)
  curr_lr = learning_rate
  for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
      images = images.to(device)
      labels = labels.to(device)

      # Forward pass
      outputs = model(images)
      loss = criterion(outputs, labels)

      # Backward and optimize
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

          
      # Decay learning rate
      if (epoch+1) % 20 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)

  #test the model
  model.eval()
  with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
  
  #compute accuracy
  accuracy =  correct / total

  return accuracy


## Test of the function

In [7]:
#test the function to check
%time func(0, -1)



CPU times: user 58.4 s, sys: 11.9 s, total: 1min 10s
Wall time: 1min 10s


0.478

# Optimisation code here

## Env definition

In [8]:
import gym
from gym import spaces
import numpy as np
import random
from torch import nn

In [9]:
class ResNetEnv_1(gym.Env): #continuous variations env
    metadata = {"render.modes": ['print']}

    def __init__(self,render_mode='print'):

        #parameters to be learned
        self.lr = (random.random() - 0.5) * 2 # learning rate
        self.batch_size = (random.random() - 0.5) * 2 #batchsize
        self.accuracy = func(self.lr,self.batch_size)

        self.lr_history=[self.lr]
        self.batch_size_history=[self.batch_size]
        self.accuracy_history=[self.accuracy]
        
        self.numstep = 0

        #assert render_mode is None or render_mode in self.metadata["render.modes"]
        #self.render_mode=render_mode


        # Observation space: learning rate and batch size
        #self.observation_space = spaces.Dict(
        #    {
        #        "lr":spaces.Box(0,10,dtype=float),            #TODO: take the log of the learning rate?
        #        "batch":spaces.Discrete(1000)
        #    }
        #)

        # Action space: one continuous variationn of the lr and one discrete variation of the batch size
        ### Dict not supported by PPO  ###
        #self.action_space = spaces.Dict(
        #    {
        #        "d_lr":spaces.Box(-10,10,dtype=float),
        #        "d_batch":spaces.Discrete(100, start = -50)
        #    }
        #)

        #Instead, we have to take a 2 dim box and change batch variation to an int in the step
        #lr: from -10 to 10, continuous
        #batch size: from -50 to 50, to be converted to an int

        #self.observation_space = spaces.Box(low=np.array([0, 1]), high=np.array([10, 1000]), dtype=np.float32)
        #self.action_space = spaces.Box(low=np.array([-10, 10]), high=np.array([10, 50]), dtype=np.float32)

        #Learning rate and batch size are normalized and are between 0 and 1
        self.observation_space = spaces.Box(low=-1,high=1,shape=(2,), dtype=np.float32)
        self.action_space = spaces.Box(low=-1,high=1,shape=(2,), dtype=np.float32)

    
    def _get_obs(self):
      return np.array([self.lr,self.batch_size], dtype=np.float32)

    def _get_info(self):
        return {'Accuracy':self.accuracy}   #Additional info to be placed here

    def reset(self):
      self.lr=(random.random() - 0.5) * 2
      self.batch_size=(random.random() - 0.5) * 2
      self.accuracy = func(self.lr,self.batch_size)
      
      #break marker
      self.lr_history.append(np.nan)
      self.batch_size_history.append(np.nan)
      self.accuracy_history.append(np.nan)
    
      self.lr_history.append(self.lr)
      self.batch_size_history.append(self.batch_size)
      self.accuracy_history.append(self.accuracy)

      observation = self._get_obs()
      info = self._get_info()

      #Cuz im lost
      #print('env reset')
      self.numstep = 0
      
      return observation

    def step(self,action):

      #update learning rate, batch_size
      self.lr += action[0]
      self.batch_size += action[1]

      #make sure they are within the bonds
      self.lr = np.clip(self.lr,-1,1)
      self.batch_size = np.clip(self.batch_size,-1,1)

      #reward is difference in accuracy between this run and the previous run
      self.accuracy = func(self.lr, self.batch_size)
      reward = self.accuracy - self.accuracy_history[-1]

      #update history
      self.lr_history.append(self.lr)
      self.batch_size_history.append(self.batch_size)
      self.accuracy_history.append(self.accuracy)

      #print cuz i m loost
      #self.render(mode='print')
      self.numstep += 1
      lr,batch_size=denormalize(self.lr,self.batch_size)
      print('step # %i Learning rate: %f,    batch size: %i,   accuracy:%f'%(self.numstep,lr,batch_size,self.accuracy))

      #returns
      terminated = False #no terminal condition for now
      observation = self._get_obs()
      info = self._get_info()

      return observation, reward, terminated, info

    def render(self,mode):
      #assert mode is None or mode in self.metadata["render.modes"]
      if mode=='print':
        lr,batch_size=denormalize(self.lr,self.batch_size)
        print('Learning rate: %f,    batch size: %i,   accuracy:%f'%(lr,batch_size,self.accuracy))


In [10]:
class ResNetEnv_2(gym.Env):    #based from https://doi.org/10.1016/j.neucom.2021.12.086
    metadata = {"render.modes": []}

    def __init__(self):
        
        #initial context
        lambda0 = [(random.random() - 0.5) * 2, (random.random() - 0.5) * 2] #random first policy
        r0 = func(lambda0[0], lambda0[1])
        c0 = ([0,0], lambda0, [r0])
        
        self.qphi = lambda l : np.array([item for sublist in l for item in sublist], dtype=np.float32) #flattens the context to a list
        
        self.buffer = [c0] #contains all tested context
        
        self.c_rmax = c0  #context with the most accuracy
        
        #Observation space: space of z (here z = flattened c)
        self.observation_space = spaces.Box(low=-1,high=1,shape=(5,), dtype=np.float32)
        
        #Action space: sapce of (normalized) hyperparameter
        self.action_space = spaces.Box(low=-1,high=1,shape=(2,), dtype=np.float32)
        
        self.buffer_hist = []


    def reset(self):
        
        self.buffer_hist.append(self.buffer)
        
        lambda0 = [(random.random() - 0.5) * 2, (random.random() - 0.5) * 2] #random first policy
        r0 = func(lambda0[0], lambda0[1])
        c0 = ([0,0], lambda0, [r0])
        
        self.buffer = [c0]

        observation = self.qphi(c0)
        
        return observation

    def step(self,action):

        #new context
        previous_params = self.buffer[-1][1]
        current_params = action
        reward = func(current_params[0], current_params[1])
        
        c = (previous_params, current_params, [reward])
        
        self.buffer.append(c)
    
        #print cuz i m loost
        #self.render(mode='print')
        #self.numstep += 1
        #lr,batch_size=denormalize(self.lr,self.batch_size)
        #print('step # %i Learning rate: %f,    batch size: %i,   accuracy:%f'%(self.numstep,lr,batch_size,self.accuracy))
        
        if reward > self.c_rmax[2][0]:
            self.c_rmax = c
        
        #returns
        terminated = False
        observation = self.qphi(c)
        info = {}

        return observation, reward, terminated, info



## Run PPO

https://stable-baselines3.readthedocs.io/en/master/modules/ppo.html

In [11]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

#check env
#from stable_baselines3.common.env_checker import check_env
#check_env(ResNetEnv_1(), warn=True, skip_render_check=True)
#check_env(ResNetEnv_2(), warn=True, skip_render_check=True)

### Long 100 run

In [12]:
#continuous env
env = make_vec_env(ResNetEnv_1, n_envs=1)
model_continuous_long = PPO("MlpPolicy", env, verbose=2,n_steps=20,batch_size=4, n_epochs=20, gamma = 0.95)
%time model_continuous_long.learn(total_timesteps=100)



Using cuda device
step # 1 Learning rate: 0.001200,    batch size: 130,   accuracy:0.414000
step # 2 Learning rate: 0.000129,    batch size: 52,   accuracy:0.489000
step # 3 Learning rate: 0.004077,    batch size: 99,   accuracy:0.458000
step # 4 Learning rate: 0.000129,    batch size: 147,   accuracy:0.355000
step # 5 Learning rate: 0.000393,    batch size: 57,   accuracy:0.507000
step # 6 Learning rate: 0.000059,    batch size: 62,   accuracy:0.458000
step # 7 Learning rate: 0.001851,    batch size: 20,   accuracy:0.503000
step # 8 Learning rate: 0.000711,    batch size: 20,   accuracy:0.494000
step # 9 Learning rate: 0.010000,    batch size: 39,   accuracy:0.433000
step # 10 Learning rate: 0.010000,    batch size: 20,   accuracy:0.305000
step # 11 Learning rate: 0.010000,    batch size: 20,   accuracy:0.445000
step # 12 Learning rate: 0.008872,    batch size: 40,   accuracy:0.420000
step # 13 Learning rate: 0.010000,    batch size: 130,   accuracy:0.403000
step # 14 Learning rate: 0

step # 81 Learning rate: 0.000422,    batch size: 85,   accuracy:0.447000
step # 82 Learning rate: 0.000524,    batch size: 175,   accuracy:0.497000
step # 83 Learning rate: 0.000025,    batch size: 101,   accuracy:0.404000
step # 84 Learning rate: 0.000010,    batch size: 191,   accuracy:0.304000
step # 85 Learning rate: 0.000316,    batch size: 101,   accuracy:0.430000
step # 86 Learning rate: 0.000124,    batch size: 150,   accuracy:0.474000
step # 87 Learning rate: 0.000010,    batch size: 158,   accuracy:0.291000
step # 88 Learning rate: 0.000256,    batch size: 77,   accuracy:0.477000
step # 89 Learning rate: 0.000110,    batch size: 110,   accuracy:0.504000
step # 90 Learning rate: 0.000010,    batch size: 200,   accuracy:0.283000
step # 91 Learning rate: 0.000030,    batch size: 110,   accuracy:0.412000
step # 92 Learning rate: 0.000020,    batch size: 200,   accuracy:0.362000
step # 93 Learning rate: 0.000010,    batch size: 110,   accuracy:0.339000
step # 94 Learning rate: 0.

<stable_baselines3.ppo.ppo.PPO at 0x7fc3eb4f2a10>

In [13]:
#second env
env = make_vec_env(ResNetEnv_2, n_envs=1)
model_adv_long = PPO("MlpPolicy", env, verbose=2,n_steps=20,batch_size=4, n_epochs=20, gamma = 0.95)
%time model_adv_long.learn(total_timesteps=100)

Using cuda device
----------------------------
| time/              |     |
|    fps             | 0   |
|    iterations      | 1   |
|    time_elapsed    | 943 |
|    total_timesteps | 20  |
----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 2           |
|    time_elapsed         | 1744        |
|    total_timesteps      | 40          |
| train/                  |             |
|    approx_kl            | 0.013652066 |
|    clip_fraction        | 0.0575      |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.85       |
|    explained_variance   | -0.293      |
|    learning_rate        | 0.0003      |
|    loss                 | 1.96        |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.0171     |
|    std                  | 1.01        |
|    value_loss           | 4.88        |
-----------------------

<stable_baselines3.ppo.ppo.PPO at 0x7fc50969f910>

# short run with 10 rep

In [14]:
n_runs = 10

#continuous env
trained_model_continuous_list = []

for i in range(n_runs):
    print('Run %i'%(i+1))
    env = make_vec_env(ResNetEnv_1, n_envs=1)
    
    model = PPO("MlpPolicy", env, verbose=2,n_steps=2,batch_size=2, n_epochs=20, gamma = 0.95)
    model.learn(total_timesteps=32)
    
    trained_model_continuous_list.append(model)
    
#second_env
trained_model_adv_list = []

for i in range(n_runs):
    print('Run %i'%(i+1))
    env = make_vec_env(ResNetEnv_2, n_envs=1)
    
    model = PPO("MlpPolicy", env, verbose=2,n_steps=2,batch_size=2, n_epochs=20, gamma = 0.95)
    model.learn(total_timesteps=32)
    
    trained_model_adv_list.append(model)

Using cuda device
step # 1 Learning rate: 0.010000,    batch size: 54,   accuracy:0.370000
step # 2 Learning rate: 0.010000,    batch size: 34,   accuracy:0.321000
----------------------------
| time/              |     |
|    fps             | 0   |
|    iterations      | 1   |
|    time_elapsed    | 117 |
|    total_timesteps | 2   |
----------------------------
step # 3 Learning rate: 0.000948,    batch size: 20,   accuracy:0.533000
step # 4 Learning rate: 0.010000,    batch size: 110,   accuracy:0.423000
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 2           |
|    time_elapsed         | 210         |
|    total_timesteps      | 4           |
| train/                  |             |
|    approx_kl            | 0.013314217 |
|    clip_fraction        | 0.05        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.84       |
|    explained_variance 

step # 21 Learning rate: 0.000023,    batch size: 110,   accuracy:0.416000
step # 22 Learning rate: 0.000742,    batch size: 200,   accuracy:0.398000
------------------------------------------
| time/                   |              |
|    fps                  | 0            |
|    iterations           | 11           |
|    time_elapsed         | 1027         |
|    total_timesteps      | 22           |
| train/                  |              |
|    approx_kl            | 0.0020910203 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.81        |
|    explained_variance   | 0            |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0411      |
|    n_updates            | 200          |
|    policy_gradient_loss | -0.00846     |
|    std                  | 0.984        |
|    value_loss           | 0.00452      |
------------------------------------------
step # 23 Learning rate: 0.010000

step # 9 Learning rate: 0.000130,    batch size: 121,   accuracy:0.500000
step # 10 Learning rate: 0.001056,    batch size: 31,   accuracy:0.472000
----------------------------------------
| time/                   |            |
|    fps                  | 0          |
|    iterations           | 5          |
|    time_elapsed         | 538        |
|    total_timesteps      | 10         |
| train/                  |            |
|    approx_kl            | 0.01538685 |
|    clip_fraction        | 0.05       |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.85      |
|    explained_variance   | 0.204      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0958    |
|    n_updates            | 80         |
|    policy_gradient_loss | -0.0282    |
|    std                  | 1          |
|    value_loss           | 0.0139     |
----------------------------------------
step # 11 Learning rate: 0.007752,    batch size: 98,   accuracy:0.118000

step # 27 Learning rate: 0.001281,    batch size: 20,   accuracy:0.441000
step # 28 Learning rate: 0.001229,    batch size: 20,   accuracy:0.428000
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 14          |
|    time_elapsed         | 1209        |
|    total_timesteps      | 28          |
| train/                  |             |
|    approx_kl            | 0.032470584 |
|    clip_fraction        | 0.15        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.81       |
|    explained_variance   | -1.38       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.125      |
|    n_updates            | 260         |
|    policy_gradient_loss | -0.0631     |
|    std                  | 0.985       |
|    value_loss           | 0.00162     |
-----------------------------------------
step # 29 Learning rate: 0.000039,    batch size: 110,

step # 15 Learning rate: 0.000316,    batch size: 200,   accuracy:0.453000
step # 16 Learning rate: 0.001427,    batch size: 200,   accuracy:0.534000
----------------------------------------
| time/                   |            |
|    fps                  | 0          |
|    iterations           | 8          |
|    time_elapsed         | 628        |
|    total_timesteps      | 16         |
| train/                  |            |
|    approx_kl            | 0.03223139 |
|    clip_fraction        | 0.375      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.82      |
|    explained_variance   | -12.1      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.101     |
|    n_updates            | 140        |
|    policy_gradient_loss | -0.0658    |
|    std                  | 0.988      |
|    value_loss           | 0.0127     |
----------------------------------------
step # 17 Learning rate: 0.002967,    batch size: 200,   accuracy:0.437

Using cuda device
step # 1 Learning rate: 0.010000,    batch size: 200,   accuracy:0.362000
step # 2 Learning rate: 0.004227,    batch size: 200,   accuracy:0.459000
---------------------------
| time/              |    |
|    fps             | 0  |
|    iterations      | 1  |
|    time_elapsed    | 73 |
|    total_timesteps | 2  |
---------------------------
step # 3 Learning rate: 0.010000,    batch size: 110,   accuracy:0.435000
step # 4 Learning rate: 0.000316,    batch size: 187,   accuracy:0.438000
------------------------------------------
| time/                   |              |
|    fps                  | 0            |
|    iterations           | 2            |
|    time_elapsed         | 126          |
|    total_timesteps      | 4            |
| train/                  |              |
|    approx_kl            | 0.0017106235 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.84        |
|    explained_va

step # 21 Learning rate: 0.010000,    batch size: 72,   accuracy:0.336000
step # 22 Learning rate: 0.010000,    batch size: 74,   accuracy:0.464000
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 11          |
|    time_elapsed         | 1021        |
|    total_timesteps      | 22          |
| train/                  |             |
|    approx_kl            | 0.017191112 |
|    clip_fraction        | 0.125       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.81       |
|    explained_variance   | -0.064      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0899     |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.0462     |
|    std                  | 0.983       |
|    value_loss           | 0.00658     |
-----------------------------------------
step # 23 Learning rate: 0.010000,    batch size: 95, 

step # 9 Learning rate: 0.010000,    batch size: 137,   accuracy:0.406000
step # 10 Learning rate: 0.001564,    batch size: 120,   accuracy:0.415000
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 5           |
|    time_elapsed         | 304         |
|    total_timesteps      | 10          |
| train/                  |             |
|    approx_kl            | 0.021800756 |
|    clip_fraction        | 0.1         |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.82       |
|    explained_variance   | -98.1       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.105      |
|    n_updates            | 80          |
|    policy_gradient_loss | -0.0562     |
|    std                  | 0.991       |
|    value_loss           | 0.0422      |
-----------------------------------------
step # 11 Learning rate: 0.000188,    batch size: 62,

step # 27 Learning rate: 0.000538,    batch size: 200,   accuracy:0.492000
step # 28 Learning rate: 0.010000,    batch size: 200,   accuracy:0.464000
----------------------------------------
| time/                   |            |
|    fps                  | 0          |
|    iterations           | 14         |
|    time_elapsed         | 900        |
|    total_timesteps      | 28         |
| train/                  |            |
|    approx_kl            | 0.02050376 |
|    clip_fraction        | 0.05       |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.8       |
|    explained_variance   | 0          |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0907    |
|    n_updates            | 260        |
|    policy_gradient_loss | -0.00439   |
|    std                  | 0.98       |
|    value_loss           | 0.00269    |
----------------------------------------
step # 29 Learning rate: 0.010000,    batch size: 200,   accuracy:0.449

step # 15 Learning rate: 0.009820,    batch size: 49,   accuracy:0.212000
step # 16 Learning rate: 0.010000,    batch size: 20,   accuracy:0.441000
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 8           |
|    time_elapsed         | 1025        |
|    total_timesteps      | 16          |
| train/                  |             |
|    approx_kl            | 0.007256508 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.8        |
|    explained_variance   | 0.493       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0797     |
|    n_updates            | 140         |
|    policy_gradient_loss | -0.0123     |
|    std                  | 0.982       |
|    value_loss           | 0.00207     |
-----------------------------------------
step # 17 Learning rate: 0.005559,    batch size: 20, 

Using cuda device
step # 1 Learning rate: 0.000137,    batch size: 200,   accuracy:0.437000
step # 2 Learning rate: 0.000456,    batch size: 190,   accuracy:0.460000
---------------------------
| time/              |    |
|    fps             | 0  |
|    iterations      | 1  |
|    time_elapsed    | 77 |
|    total_timesteps | 2  |
---------------------------
step # 3 Learning rate: 0.000014,    batch size: 100,   accuracy:0.381000
step # 4 Learning rate: 0.000285,    batch size: 89,   accuracy:0.498000
------------------------------------------
| time/                   |              |
|    fps                  | 0            |
|    iterations           | 2            |
|    time_elapsed         | 138          |
|    total_timesteps      | 4            |
| train/                  |              |
|    approx_kl            | 0.0027785897 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.84        |
|    explained_var

step # 21 Learning rate: 0.000903,    batch size: 171,   accuracy:0.440000
step # 22 Learning rate: 0.000179,    batch size: 81,   accuracy:0.508000
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 11          |
|    time_elapsed         | 962         |
|    total_timesteps      | 22          |
| train/                  |             |
|    approx_kl            | 0.034762263 |
|    clip_fraction        | 0.15        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.82       |
|    explained_variance   | -0.309      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.12       |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.0485     |
|    std                  | 0.994       |
|    value_loss           | 0.00181     |
-----------------------------------------
step # 23 Learning rate: 0.000010,    batch size: 20,

step # 9 Learning rate: 0.010000,    batch size: 200,   accuracy:0.457000
step # 10 Learning rate: 0.010000,    batch size: 143,   accuracy:0.443000
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 5           |
|    time_elapsed         | 612         |
|    total_timesteps      | 10          |
| train/                  |             |
|    approx_kl            | 0.018057346 |
|    clip_fraction        | 0.15        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.85       |
|    explained_variance   | -0.313      |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0653     |
|    n_updates            | 80          |
|    policy_gradient_loss | -0.0384     |
|    std                  | 1.01        |
|    value_loss           | 0.0226      |
-----------------------------------------
step # 11 Learning rate: 0.010000,    batch size: 200

step # 27 Learning rate: 0.000010,    batch size: 190,   accuracy:0.302000
step # 28 Learning rate: 0.000010,    batch size: 200,   accuracy:0.269000
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 14          |
|    time_elapsed         | 1121        |
|    total_timesteps      | 28          |
| train/                  |             |
|    approx_kl            | 0.010736287 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.87       |
|    explained_variance   | -4.52       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.102      |
|    n_updates            | 260         |
|    policy_gradient_loss | -0.0447     |
|    std                  | 1.02        |
|    value_loss           | 0.0102      |
-----------------------------------------
step # 29 Learning rate: 0.000117,    batch size: 20

step # 15 Learning rate: 0.000339,    batch size: 200,   accuracy:0.499000
step # 16 Learning rate: 0.000024,    batch size: 200,   accuracy:0.362000
------------------------------------------
| time/                   |              |
|    fps                  | 0            |
|    iterations           | 8            |
|    time_elapsed         | 790          |
|    total_timesteps      | 16           |
| train/                  |              |
|    approx_kl            | 0.0006623268 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.8         |
|    explained_variance   | -9.22        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0239      |
|    n_updates            | 140          |
|    policy_gradient_loss | -0.00808     |
|    std                  | 0.978        |
|    value_loss           | 0.00405      |
------------------------------------------
step # 17 Learning rate: 0.000010

Using cuda device
step # 1 Learning rate: 0.000260,    batch size: 139,   accuracy:0.483000
step # 2 Learning rate: 0.000686,    batch size: 49,   accuracy:0.346000
----------------------------
| time/              |     |
|    fps             | 0   |
|    iterations      | 1   |
|    time_elapsed    | 115 |
|    total_timesteps | 2   |
----------------------------
step # 3 Learning rate: 0.000831,    batch size: 95,   accuracy:0.567000
step # 4 Learning rate: 0.000026,    batch size: 122,   accuracy:0.386000
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 2           |
|    time_elapsed         | 175         |
|    total_timesteps      | 4           |
| train/                  |             |
|    approx_kl            | 0.006000161 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.84       |
|    explained_variance

step # 21 Learning rate: 0.000010,    batch size: 58,   accuracy:0.354000
step # 22 Learning rate: 0.000010,    batch size: 20,   accuracy:0.393000
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 11          |
|    time_elapsed         | 1157        |
|    total_timesteps      | 22          |
| train/                  |             |
|    approx_kl            | 0.019897401 |
|    clip_fraction        | 0.025       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.86       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | -0.138      |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.0169     |
|    std                  | 1.01        |
|    value_loss           | 0.000928    |
-----------------------------------------
step # 23 Learning rate: 0.000010,    batch size: 65, 

-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 5           |
|    time_elapsed         | 449         |
|    total_timesteps      | 10          |
| train/                  |             |
|    approx_kl            | 0.008367062 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.83       |
|    explained_variance   | 0.447       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.228       |
|    n_updates            | 80          |
|    policy_gradient_loss | -0.0325     |
|    std                  | 0.996       |
|    value_loss           | 1           |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 6           |
|    time_elapsed         | 508   

-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 16          |
|    time_elapsed         | 1274        |
|    total_timesteps      | 32          |
| train/                  |             |
|    approx_kl            | 0.039639324 |
|    clip_fraction        | 0.275       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.83       |
|    explained_variance   | 0.383       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.141      |
|    n_updates            | 300         |
|    policy_gradient_loss | -0.0819     |
|    std                  | 0.996       |
|    value_loss           | 0.00379     |
-----------------------------------------
Using cuda device
---------------------------
| time/              |    |
|    fps             | 0  |
|    iterations      | 1  |
|    time_elapsed    | 85 |
|    total_timesteps | 2  |
----------------

-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 11          |
|    time_elapsed         | 1011        |
|    total_timesteps      | 22          |
| train/                  |             |
|    approx_kl            | 0.015962005 |
|    clip_fraction        | 0.025       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.84       |
|    explained_variance   | -16.6       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.191       |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.0358     |
|    std                  | 0.999       |
|    value_loss           | 1.22        |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 0          |
|    iterations           | 12         |
|    time_elapsed         | 1059      

----------------------------------------
| time/                   |            |
|    fps                  | 0          |
|    iterations           | 6          |
|    time_elapsed         | 427        |
|    total_timesteps      | 12         |
| train/                  |            |
|    approx_kl            | 0.01895997 |
|    clip_fraction        | 0.1        |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.83      |
|    explained_variance   | 0.721      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0726    |
|    n_updates            | 100        |
|    policy_gradient_loss | -0.0383    |
|    std                  | 0.992      |
|    value_loss           | 0.0374     |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 7           |
|    time_elapsed         | 480         |
|    total_

Using cuda device
----------------------------
| time/              |     |
|    fps             | 0   |
|    iterations      | 1   |
|    time_elapsed    | 155 |
|    total_timesteps | 2   |
----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 2           |
|    time_elapsed         | 282         |
|    total_timesteps      | 4           |
| train/                  |             |
|    approx_kl            | 0.007942975 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.84       |
|    explained_variance   | 0.113       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00437     |
|    n_updates            | 20          |
|    policy_gradient_loss | -0.0371     |
|    std                  | 1           |
|    value_loss           | 0.309       |
-----------------------

----------------------------------------
| time/                   |            |
|    fps                  | 0          |
|    iterations           | 12         |
|    time_elapsed         | 1277       |
|    total_timesteps      | 24         |
| train/                  |            |
|    approx_kl            | 0.05572629 |
|    clip_fraction        | 0.475      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.83      |
|    explained_variance   | -12.8      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.091     |
|    n_updates            | 220        |
|    policy_gradient_loss | -0.0992    |
|    std                  | 0.999      |
|    value_loss           | 0.393      |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 13          |
|    time_elapsed         | 1327        |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 0            |
|    iterations           | 8            |
|    time_elapsed         | 721          |
|    total_timesteps      | 16           |
| train/                  |              |
|    approx_kl            | 0.0023793578 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.82        |
|    explained_variance   | 0.688        |
|    learning_rate        | 0.0003       |
|    loss                 | -0.0361      |
|    n_updates            | 140          |
|    policy_gradient_loss | -0.0158      |
|    std                  | 0.993        |
|    value_loss           | 0.226        |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 9           |
|    time_elaps

------------------------------------------
| time/                   |              |
|    fps                  | 0            |
|    iterations           | 4            |
|    time_elapsed         | 362          |
|    total_timesteps      | 8            |
| train/                  |              |
|    approx_kl            | 0.0073192716 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.82        |
|    explained_variance   | -1.42        |
|    learning_rate        | 0.0003       |
|    loss                 | 0.131        |
|    n_updates            | 60           |
|    policy_gradient_loss | -0.0328      |
|    std                  | 0.99         |
|    value_loss           | 0.879        |
------------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 0          |
|    iterations           | 5          |
|    time_elapsed  

-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 15          |
|    time_elapsed         | 1255        |
|    total_timesteps      | 30          |
| train/                  |             |
|    approx_kl            | 0.030350745 |
|    clip_fraction        | 0.175       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.79       |
|    explained_variance   | -0.12       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.084      |
|    n_updates            | 280         |
|    policy_gradient_loss | -0.0418     |
|    std                  | 0.973       |
|    value_loss           | 0.152       |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 0          |
|    iterations           | 16         |
|    time_elapsed         | 1311      

-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 11          |
|    time_elapsed         | 863         |
|    total_timesteps      | 22          |
| train/                  |             |
|    approx_kl            | 0.009523928 |
|    clip_fraction        | 0.025       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.89       |
|    explained_variance   | -13.8       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.204       |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.0312     |
|    std                  | 1.03        |
|    value_loss           | 1.45        |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 12          |
|    time_elapsed         | 916   

---------------------------------------
| time/                   |           |
|    fps                  | 0         |
|    iterations           | 6         |
|    time_elapsed         | 428       |
|    total_timesteps      | 12        |
| train/                  |           |
|    approx_kl            | 0.0163275 |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -2.83     |
|    explained_variance   | -0.324    |
|    learning_rate        | 0.0003    |
|    loss                 | -0.123    |
|    n_updates            | 100       |
|    policy_gradient_loss | -0.0557   |
|    std                  | 0.993     |
|    value_loss           | 0.107     |
---------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 0          |
|    iterations           | 7          |
|    time_elapsed         | 479        |
|    total_timesteps      | 14     

Using cuda device
---------------------------
| time/              |    |
|    fps             | 0  |
|    iterations      | 1  |
|    time_elapsed    | 85 |
|    total_timesteps | 2  |
---------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 0            |
|    iterations           | 2            |
|    time_elapsed         | 225          |
|    total_timesteps      | 4            |
| train/                  |              |
|    approx_kl            | 0.0014642775 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.84        |
|    explained_variance   | -0.318       |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0726       |
|    n_updates            | 20           |
|    policy_gradient_loss | -0.0175      |
|    std                  | 1.01         |
|    value_loss           | 0.381        |
------------

-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 12          |
|    time_elapsed         | 1052        |
|    total_timesteps      | 24          |
| train/                  |             |
|    approx_kl            | 0.024412543 |
|    clip_fraction        | 0.1         |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.83       |
|    explained_variance   | -1.93       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.126      |
|    n_updates            | 220         |
|    policy_gradient_loss | -0.0599     |
|    std                  | 0.995       |
|    value_loss           | 0.0765      |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 0          |
|    iterations           | 13         |
|    time_elapsed         | 1110      

-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 8           |
|    time_elapsed         | 789         |
|    total_timesteps      | 16          |
| train/                  |             |
|    approx_kl            | 0.020310849 |
|    clip_fraction        | 0.05        |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.87       |
|    explained_variance   | 0.689       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0859      |
|    n_updates            | 140         |
|    policy_gradient_loss | -0.0569     |
|    std                  | 1.02        |
|    value_loss           | 0.684       |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 0           |
|    iterations           | 9           |
|    time_elapsed         | 843   

 # Post processing

In [93]:
import matplotlib.pyplot as plt
def get_acc_hist_continuous(model):
    return np.array(model.env.get_attr('accuracy_history')[0])[2:] *100
def get_acc_hist_adv(env):
    buffer = model.env.get_attr('buffer')[0]
    
    acc_hist = np.array([c[2][0] * 100 for c in buffer])
    return acc_hist

In [94]:
#Extract history of long runs and save them for further analysis:
acc_long_run_continuous = get_acc_hist_continuous(model_continuous_long)
acc_long_run_adv = get_acc_hist_adv(model_adv_long)
print(acc_long_run_continuous)
print(acc_long_run_adv)
np.save('Accuracy_long_run_continuous_env',acc_long_run_continuous)
np.save('Accuracy_long_run_adv_env',acc_long_run_adv)

[28.3 41.4 48.9 45.8 35.5 50.7 45.8 50.3 49.4 43.3 30.5 44.5 42.  40.3
 46.6 49.2 48.1 29.5 34.1 43.2 46.1 33.8 43.9 34.8 47.6 36.5 31.2 44.1
 42.5 29.8 35.8 44.  40.4 46.5 39.9 45.6 44.2 41.3 44.2 50.9 40.2 36.2
 40.9 38.7 32.5 30.2 29.9 48.4 41.5 45.3 37.3 50.2 46.2 52.4 25.  47.3
 38.3 49.3 48.5 36.3 40.4 51.6 45.5 41.3 37.3 33.6 30.2 36.  40.2 51.2
 41.4 40.4 48.2 36.4 44.1 46.5 48.9 42.4 54.4 50.4 52.  44.7 49.7 40.4
 30.4 43.  47.4 29.1 47.7 50.4 28.3 41.2 36.2 33.9 37.9 48.9 46.3 41.7
 43.6 49.  43. ]
[41.2 31.5 35.  52.3 50.5 50.1 46.1 48.  50.9 29.6 42.4 39.6 36.2 31.5
 48.2 41.3 37.4 49.4 47.8 47.7 36.  45.8 50.8 32.1 32.1 52.2 31.7 46.1
 35.2 47.7 30.8 39.3 34.6]


In [101]:
#extract history of short runs and save them:
accs_short_runs_continuous = np.zeros((n_runs,33))
accs_short_runs_adv = np.zeros((n_runs,33))

for i, model in enumerate(trained_model_continuous_list):
    accs_short_runs_continuous[i] = get_acc_hist_continuous(model)
    
for i, model in enumerate(trained_model_adv_list):
    accs_short_runs_adv[i] = get_acc_hist_adv(model)

print(accs_short_runs_continuous)
print(accs_short_runs_adv)
np.save('Accuracy_short_runs_continuous_env',accs_short_runs_continuous)
np.save('Accuracy_short_runs_adv_env',accs_short_runs_adv)

[[47.8 37.  32.1 53.3 42.3 52.1 50.3 53.4 43.3 44.  32.1 43.1 38.7 40.3
  42.9 52.3 43.2 40.  36.6 28.9 46.6 41.6 39.8 42.5 41.4 34.3 44.1 37.8
  31.9 47.4 45.5 42.4 41.6]
 [47.1 50.  49.9 43.7 44.7 51.2 47.1 41.9 50.6 50.  47.2 11.8 48.1 45.7
  46.5 47.9 49.  38.1 37.6 45.4 43.6 42.5 39.1 37.7 39.9 34.9 48.9 44.1
  42.8 43.9 31.1 29.4 32. ]
 [25.7 45.2 48.1 46.4 47.5 44.5 55.1 30.1 48.8 49.6 34.7 49.  39.1 43.7
  38.4 45.3 53.4 43.7 42.  44.3 49.4 27.6 47.7 48.  54.1 42.5 44.6 45.6
  38.7 33.  34.2 37.4 37.2]
 [37.3 36.2 45.9 43.5 43.8 34.8 32.6 36.  34.7 43.6 46.8 38.  54.4 55.2
  43.5 47.7 49.5 38.5 37.1 43.4 42.6 33.6 46.4 46.3 41.7 37.2 42.7 29.5
  44.8 39.3 46.1 43.  37.9]
 [31.6 47.2 45.6 45.4 54.  42.1 42.3 48.  38.  40.6 41.5 52.8 41.2 41.2
  36.6 44.4 43.  25.5 42.2 41.5 27.7 38.8 45.1 31.  33.2 30.7 42.3 49.2
  46.4 44.9 47.9 29.4 32.2]
 [32.6 54.  47.7 27.5 47.2 41.  34.4 46.8 46.2 35.8 41.3 48.7 47.1 40.9
  46.  21.2 44.1 33.6 40.7 37.4 49.  51.5 25.5 41.6 42.5 42.2 46.9 3