In [1]:
%matplotlib inline
%matplotlib notebook
%load_ext autoreload
%autoreload 2

### Importing the dependencies

In [1]:
from env.surrogate.SurrogateModel import SurrogateModel
from env.vqvae.decoder import Decoder
from env.environment import VQVAE_Env, RenderCallback
from agent.RLTrainer import Trainer
import numpy as np

### Loading the trained environment weights

In [2]:
surrogate_model = 'env/models/surrogate_model.json'
codebook = 'env/models/codebook.pth'

### PPO config

In [6]:
decoder_config = {
    "out_dim": 22,           # Output dimension
    "embed_dim": 8,          # Embedding dimension
    "h_nodes": 512,          # Number of hidden nodes
    "dropout": 0.2,          # Dropout rate
    "scale": 2,              # Scale factor
    "num_layers": 5,         # Number of layers
    "load_path": 'env/models/decoder_model.pth', # Path to load model weights
}

env_config = {
    "embed_dim": decoder_config['embed_dim'],    # Embedding dimension
    "num_embeddings": 14,           # Number of embeddings
    "max_allowed_actions": 200,      # Maximum allowed actions
    "consider_previous_actions": False, # Consider previous actions
    "num_previous_actions": 6,       # Number of previous actions to consider  
    "render_mode": 'human',          # Render mode
    "render_data": 'env/render/architectures_trained_on.npy',  # Data for rendering
    "render_labels": 'env/render/labels.npy',   # Labels for rendering
    "render_log_dir": 'trainingLogs',                  # Directory for logging data
    "consider_max_params": True,   # Consider maximum parameters
    "max_params": 1e9,             # Maximum parameters
    "min_params" : 1e8,                # Minimum parameters
}

model_config = {                #TODO: Consider adding entropy coefficient as parameter and policy & value function structure parameters
    "model": "PPO",                # Model type ('PPO', 'A2C', 'DQN', etc.)
    "policy": 'MultiInputPolicy',          # Policy type
    "total_timesteps": 1000000,       # Total number of timesteps
    "verbose": 0,                  # Verbosity level
    "tensorboard_log": env_config['render_log_dir'],  # Tensorboard log directory
    "n_steps": 2048,               # Number of steps to run for each environment per update
    "progress_bar": False,          # Whether to display a progress bar
    "n_epochs": 12,                # Number of epochs
    "batch_size": 32,              # Batch size
}

log_config = {
    "project": 'Test',                          # Project name in wandb
    #"entity": 'trex-ai',                            # Entity name in wandb
    "sync_tensorboard": True,                           # Whether to sync TensorBoard
    "save_code": True,                                  # Whether to save code in wandb
    "model_save_path": env_config['render_log_dir'],    # Path to save the model
    "gradient_save_freq": 100,                          # Frequency to save gradients
    "verbose": 2,                                       # Verbosity level
}

custom_callback_function = RenderCallback()

### Init the trainer object

In [7]:
trainer = Trainer(surrogate_path=surrogate_model, 
                  codebook_path=codebook, 
                  decoder_config=decoder_config, 
                  env_config=env_config, 
                  model_config=model_config, 
                  log_config=log_config)

Surrogate model loaded from:  env/models/surrogate_model.json
Codebook loaded from:  env/models/codebook.pth
Decoder model loaded from:  env/models/decoder_model.pth
Environment check passed


### Run the below cell to train the PPO agent

In [8]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initializing WanderDB


[34m[1mwandb[0m: Currently logged in as: [33masaficontact[0m ([33mtrex-ai[0m). Use [1m`wandb login --relogin`[0m to force relogin


Setting up Model
Model Config: {'model': 'PPO', 'policy': 'MultiInputPolicy', 'total_timesteps': 1000000, 'verbose': 0, 'tensorboard_log': 'trainingLogs', 'n_steps': 2048, 'progress_bar': False, 'n_epochs': 12, 'batch_size': 32}
Resetting Environment
Training Model
Training Started


VBox(children=(Label(value='0.571 MB of 0.571 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
rollout/ep_len_mean,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
rollout/ep_rew_mean,▁█▇█▇▇███▇▇██████▇▇██▇████▇▇▇█▇▇▇▇█▇█▇▇▇
time/fps,▁▂▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇██████████
train/approx_kl,▅▂▃█▆▅▂▆▆▂▄▄▅▅▄▃▆▅▂▄▅▅▄▄▃▃▃▃▆▅▁▂▂▁▄▂▂▂▄▆
train/clip_fraction,█▃▄▇▄▄▂▄▅▂▅▃▂▄▆▃▄▄▃▂▃▆▇▄▂▂▂▂▄▄▂▂▂▁▄▂▃▃▅█
train/clip_range,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/entropy_loss,▁▆▆▅▆▇▇▆▇▇▇█▇▇▇▇▇▇▇█▇████▇▇▇█▇█████▇▇▇▇▇
train/explained_variance,▁▅▁▅▁▅▅▅▅▁▁▅▅▅█▁▅▅▅▁▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▅▆▅█▅
train/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
global_step,1001472.0
rollout/ep_len_mean,2.21
rollout/ep_rew_mean,0.33496
time/fps,366.0
train/approx_kl,0.01996
train/clip_fraction,0.37272
train/clip_range,0.2
train/entropy_loss,-2.44525
train/explained_variance,0.0
train/learning_rate,0.0003


In [7]:
save_path = "models/test"
trainer.save_model(save_path)

### Run the below cell to evaluate the agent

In [8]:
# Load the model
save_path = "models/test"
trainer.load_model(save_path)

In [9]:
x = trainer.evaluate_accuracy(num_episodes=100)

Episode 0
Episode 0,200: cum reward: 0.9141677618026733, max reward: 0.9141677618026733, action: [2], last action: [2]
Episode 0: Episode Accuracy: 0.9141677618026733, Max Accuracy till Episode: 0.9141677618026733
Episode 1
Episode 1,200: cum reward: 0.7513466477394104, max reward: 0.7513466477394104, action: [26], last action: [26]
Episode 1: Episode Accuracy: 0.7513466477394104, Max Accuracy till Episode: 0.9141677618026733
Episode 2
Episode 2,200: cum reward: 0.7723342180252075, max reward: 0.7725493907928467, action: [11], last action: [77]
Episode 2: Episode Accuracy: 0.7725493907928467, Max Accuracy till Episode: 0.9141677618026733
Episode 3
Episode 3,200: cum reward: 0.7889947295188904, max reward: 0.7889947295188904, action: [98], last action: [98]
Episode 3: Episode Accuracy: 0.7889947295188904, Max Accuracy till Episode: 0.9141677618026733
Episode 4
Episode 4,200: cum reward: 0.7760769128799438, max reward: 0.7760769128799438, action: [33], last action: [33]
Episode 4: Episod

In [10]:
x

tensor([[108.9913,   6.4508,  12.8702,  11.9179,  10.2512,   6.4805,   3.7706,
           3.9087,   4.6221,  10.6889,   4.0760,   3.8912,   9.1347,   3.6351,
           3.5720,   7.1175,   2.9104,   2.9669,   5.9397,   0.0000,   0.0000,
           3.2940]])

In [11]:
trainer.calculate_accuracy_for_decoded_state(x)

0.947152

## Check if the recommended model exists in the training dataset

In [29]:
min_values = torch.tensor(
            [8, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0],
            dtype=torch.float32,
        )
max_values = torch.tensor(
    [
        128,
        5,
        16,
        16,
        16,
        16,
        16,
        4,
        4,
        12,
        4,
        4,
        12,
        4,
        4,
        12,
        4,
        4,
        12,
        4,
        4,
        12,
    ],
    dtype=torch.float32,
)

def clip_values(X):
    # Shape of X = (batch_size, 22)

    # Round all values
    rounded_data = torch.round(X)

    # Now clamp each column individually
    clamped_data = torch.empty_like(rounded_data)
    for i in range(X.shape[1]):
        clamped_data[:, i] = torch.clamp(
            rounded_data[:, i], min_values[i], max_values[i]
        )

    return clamped_data

In [32]:
import pandas as pd
import torch
df = pd.read_csv("data/dataset_cifar10_v1.csv") # loading the dataset to pandas df
map = {"A":1.0,"B":2.0,"C":3.0,"D":4.0} # mapping the conv block type to numerical values
for column, dtype in df.dtypes.items(): # applying the mapping to the column and also converting to float32
    if dtype == 'object':
        df[column] = df[column].replace(map).astype('float32')
df = df.astype({col: 'float32' for col in df.select_dtypes('int64').columns})
df = df.iloc[:,:-3]
df.head()
df_tensor = torch.tensor(df[df.columns].values,dtype=torch.float32)
df_tensor[0]

  df[column] = df[column].replace(map).astype('float32')


tensor([117.,   1.,   9.,   0.,   0.,   0.,   0.,   2.,   4.,  11.,   0.,   0.,
          0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.])

In [30]:
x = clip_values(x)

In [31]:
x

tensor([[109.,   5.,  13.,  12.,  10.,   6.,   4.,   4.,   4.,  11.,   4.,   4.,
           9.,   4.,   4.,   7.,   3.,   3.,   6.,   1.,   0.,   3.]])

In [35]:
for t in df_tensor: 
    if torch.allclose(t, x, rtol=1e-05, atol=1e-08):
        print("True")
        break


In [36]:
for t in df_tensor: 
    if torch.all(t.eq(x)):
        print("True")
        break

In [4]:
decoder_config = {
    "out_dim": 22,           # Output dimension
    "embed_dim": 8,          # Embedding dimension
    "h_nodes": 512,          # Number of hidden nodes
    "dropout": 0.2,          # Dropout rate
    "scale": 2,              # Scale factor
    "num_layers": 5,         # Number of layers
    "load_path": 'env/models/decoder_model.pth', # Path to load model weights
}

env_config = {
    "embed_dim": decoder_config['embed_dim'],    # Embedding dimension
    "num_embeddings": 14,           # Number of embeddings
    "max_allowed_actions": 200,      # Maximum allowed actions
    "consider_previous_actions": False, # Consider previous actions
    "num_previous_actions": 6,       # Number of previous actions to consider  
    "render_mode": 'human',          # Render mode
    "render_data": 'env/render/architectures_trained_on.npy',  # Data for rendering
    "render_labels": 'env/render/labels.npy',   # Labels for rendering
    "render_log_dir": 'trainingLogs',                  # Directory for logging data
}

model_config = {
    "model": "PPO",                # Model type ('PPO', 'A2C', 'DQN', etc.)
    "policy": 'MlpPolicy',  # Policy type
    "total_timesteps": 512,       # Total number of timesteps
    "verbose": 0,                  # Verbosity level
    "tensorboard_log": env_config['render_log_dir'],  # Tensorboard log directory
    "n_steps": 512,               # Number of steps to run for each environment per update
    "progress_bar": False,          # Whether to display a progress bar
    "n_epochs": 12,                # Number of epochs
    "batch_size": 32,              # Batch size
}

log_config = {
    "project": 'PPO Training',                          # Project name in wandb
    #"entity": 'trex-ai',                            # Entity name in wandb
    "sync_tensorboard": True,                           # Whether to sync TensorBoard
    "save_code": True,                                  # Whether to save code in wandb
    "model_save_path": env_config['render_log_dir'],    # Path to save the model
    "gradient_save_freq": 100,                          # Frequency to save gradients
    "verbose": 2,                                       # Verbosity level
}

custom_callback_function = RenderCallback()

In [5]:
trainer = Trainer(surrogate_path=surrogate_model, 
                  codebook_path=codebook, 
                  decoder_config=decoder_config, 
                  env_config=env_config, 
                  model_config=model_config, 
                  log_config=log_config)

Surrogate model loaded from:  env/models/surrogate_model.json
Codebook loaded from:  env/models/codebook.pth
Decoder model loaded from:  env/models/decoder_model.pth
Environment check passed


In [6]:
trainer.train(custom_callback=custom_callback_function)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33masaficontact[0m ([33mtrex-ai[0m). Use [1m`wandb login --relogin`[0m to force relogin


Episode lengths:  [87, 59, 199, 106]
(512, 1, 8)
states.shape, ep_len_states.shape (512, 8) (4,)
Rendering the environment...
(1200, 8) (14, 8) (512, 8)


<IPython.core.display.Javascript object>

/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/trainingLogs/render/latent_space_20240509_085333.png


VBox(children=(Label(value='0.670 MB of 0.670 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
global_step,▁
rollout/ep_len_mean,▁
rollout/ep_rew_mean,▁
time/fps,▁

0,1
global_step,512.0
rollout/ep_len_mean,113.5
rollout/ep_rew_mean,0.77255
time/fps,249.0


In [6]:
# Load the model
save_path = "/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/models/ppo_mlpPolicy_1500000.zip"
trainer.load_model(save_path)

In [8]:
x = trainer.evaluate_accuracy(num_episodes=10)

Episode 0
Episode 0,200: cum reward: 0.7686756253242493, max reward: 0.7686756253242493, action: [71], last action: [71]
Episode 0: Accuracy: 0.7686756253242493 Max Accuracy till Episode: 0.7686756253242493
Episode 1
Episode 1,200: cum reward: 0.9201174378395081, max reward: 0.9238914251327515, action: [23], last action: [49]
Episode 1: Accuracy: 0.9238914251327515 Max Accuracy till Episode: 0.9238914251327515
Episode 2
Episode 2,4: cum reward: 0.7723342180252075, max reward: 0.7723342180252075, action: [103], last action: [112]
Episode 2: Accuracy: 0.7723342180252075 Max Accuracy till Episode: 0.9238914251327515
Episode 3
Episode 3,200: cum reward: 0.757368803024292, max reward: 0.7575494050979614, action: [8], last action: [96]
Episode 3: Accuracy: 0.7575494050979614 Max Accuracy till Episode: 0.9238914251327515
Episode 4
Episode 4,200: cum reward: 0.7889947295188904, max reward: 0.7889947295188904, action: [68], last action: [68]
Episode 4: Accuracy: 0.7889947295188904 Max Accuracy 

In [9]:
trainer.calculate_accuracy_for_decoded_state(x)

0.93187684