In [1]:
%matplotlib inline
%matplotlib notebook
%load_ext autoreload
%autoreload 2

### Importing the dependencies

In [2]:
from env.surrogate.SurrogateModel import SurrogateModel
from env.vqvae.decoder import Decoder
from env.environment import VQVAE_Env, RenderCallback
from agent.RLTrainer import Trainer
import numpy as np

### Loading the trained environment weights

In [3]:
surrogate_model = 'env/models/surrogate_model.json'
codebook = 'env/models/codebook.pth'

### PPO config

In [4]:
decoder_config = {
    "out_dim": 22,           # Output dimension
    "embed_dim": 8,          # Embedding dimension
    "h_nodes": 512,          # Number of hidden nodes
    "dropout": 0.2,          # Dropout rate
    "scale": 2,              # Scale factor
    "num_layers": 5,         # Number of layers
    "load_path": 'env/models/decoder_model.pth', # Path to load model weights
}

env_config = {
    "embed_dim": decoder_config['embed_dim'],    # Embedding dimension
    "num_embeddings": 14,           # Number of embeddings
    "max_allowed_actions": 200,      # Maximum allowed actions
    "consider_previous_actions": False, # Consider previous actions
    "num_previous_actions": 6,       # Number of previous actions to consider  
    "render_mode": 'human',          # Render mode
    "render_data": 'env/render/architectures_trained_on.npy',  # Data for rendering
    "render_labels": 'env/render/labels.npy',   # Labels for rendering
    "render_log_dir": 'trainingLogs',                  # Directory for logging data
    "consider_max_params": True,   # Consider maximum parameters
    "max_params": 1e9,             # Maximum parameters
    "min_params" : 1e9,
    #"min_params" : 1e8,                # Minimum parameters
}

model_config = {                #TODO: Consider adding entropy coefficient as parameter and policy & value function structure parameters
    "model": "PPO",                # Model type ('PPO', 'A2C', 'DQN', etc.)
    "policy": 'MultiInputPolicy',          # Policy type
    "total_timesteps": 1000000,       # Total number of timesteps
    "verbose": 0,                  # Verbosity level
    "tensorboard_log": env_config['render_log_dir'],  # Tensorboard log directory
    "n_steps": 2048,               # Number of steps to run for each environment per update
    "progress_bar": False,          # Whether to display a progress bar
    "n_epochs": 12,                # Number of epochs
    "batch_size": 32,              # Batch size
}

log_config = {
    "project": 'Test',                          # Project name in wandb
    #"entity": 'trex-ai',                            # Entity name in wandb
    "sync_tensorboard": True,                           # Whether to sync TensorBoard
    "save_code": True,                                  # Whether to save code in wandb
    "model_save_path": env_config['render_log_dir'],    # Path to save the model
    "gradient_save_freq": 100,                          # Frequency to save gradients
    "verbose": 2,                                       # Verbosity level
}

custom_callback_function = RenderCallback()

### Init the trainer object

In [5]:
trainer = Trainer(surrogate_path=surrogate_model, 
                  codebook_path=codebook, 
                  decoder_config=decoder_config, 
                  env_config=env_config, 
                  model_config=model_config, 
                  log_config=log_config)

Surrogate model loaded from:  env/models/surrogate_model.json
Codebook loaded from:  env/models/codebook.pth
Decoder model loaded from:  env/models/decoder_model.pth
Environment check passed


### Run the below cell to train the PPO agent

In [6]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initializing WanderDB


[34m[1mwandb[0m: Currently logged in as: [33masaficontact[0m ([33mtrex-ai[0m). Use [1m`wandb login --relogin`[0m to force relogin


Setting up Model
Model Config: {'model': 'PPO', 'policy': 'MultiInputPolicy', 'total_timesteps': 1000000, 'verbose': 0, 'tensorboard_log': 'trainingLogs', 'n_steps': 2048, 'progress_bar': False, 'n_epochs': 12, 'batch_size': 32}
Resetting Environment
Training Model
Training Started


In [7]:
save_path = "models/test"
trainer.save_model(save_path)

### Run the below cell to evaluate the agent

In [41]:
decoder_config = {
    "out_dim": 22,           # Output dimension
    "embed_dim": 8,          # Embedding dimension
    "h_nodes": 512,          # Number of hidden nodes
    "dropout": 0.2,          # Dropout rate
    "scale": 2,              # Scale factor
    "num_layers": 5,         # Number of layers
    "load_path": 'env/models/decoder_model.pth', # Path to load model weights
}

env_config = {
    "embed_dim": decoder_config['embed_dim'],    # Embedding dimension
    "num_embeddings": 14,           # Number of embeddings
    "max_allowed_actions": 200,      # Maximum allowed actions
    "consider_previous_actions": True, # Consider previous actions
    "num_previous_actions": 6,       # Number of previous actions to consider  
    "render_mode": 'human',          # Render mode
    "render_data": 'env/render/architectures_trained_on.npy',  # Data for rendering
    "render_labels": 'env/render/labels.npy',   # Labels for rendering
    "render_log_dir": 'trainingLogs',                  # Directory for logging data
    "consider_max_params": False,   # Consider maximum parameters
    "max_params": 1e9,             # Maximum parameters
    "min_params" : 1e9,
    #"min_params" : 1e8,                # Minimum parameters
}

model_config = {                #TODO: Consider adding entropy coefficient as parameter and policy & value function structure parameters
    "model": "PPO",                # Model type ('PPO', 'A2C', 'DQN', etc.)
    "policy": 'MultiInputPolicy',          # Policy type
    "total_timesteps": 1000000,       # Total number of timesteps
    "verbose": 0,                  # Verbosity level
    "tensorboard_log": env_config['render_log_dir'],  # Tensorboard log directory
    "n_steps": 2048,               # Number of steps to run for each environment per update
    "progress_bar": False,          # Whether to display a progress bar
    "n_epochs": 12,                # Number of epochs
    "batch_size": 32,              # Batch size
}

log_config = {
    "project": 'Test',                          # Project name in wandb
    #"entity": 'trex-ai',                            # Entity name in wandb
    "sync_tensorboard": True,                           # Whether to sync TensorBoard
    "save_code": True,                                  # Whether to save code in wandb
    "model_save_path": env_config['render_log_dir'],    # Path to save the model
    "gradient_save_freq": 100,                          # Frequency to save gradients
    "verbose": 2,                                       # Verbosity level
}

custom_callback_function = RenderCallback()

In [42]:
trainer = Trainer(surrogate_path=surrogate_model, 
                  codebook_path=codebook, 
                  decoder_config=decoder_config, 
                  env_config=env_config, 
                  model_config=model_config, 
                  log_config=log_config)

Surrogate model loaded from:  env/models/surrogate_model.json
Codebook loaded from:  env/models/codebook.pth
Decoder model loaded from:  env/models/decoder_model.pth
Environment check passed


In [43]:
# Load the model
save_path = "/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/models/ppo_MultiInputPolicy_6_1500000.zip"
trainer.load_model(save_path)

In [44]:
x = trainer.evaluate_accuracy(num_episodes=100)

Episode 0
reward:  [0.7889947]
action:  [49]
reward:  [0.]
action:  [9]
reward:  [0.]
action:  [3]
reward:  [-0.01853627]
action:  [86]
reward:  [-0.00175923]
action:  [79]
reward:  [-0.00909311]
action:  [5]
reward:  [0.]
action:  [6]
reward:  [0.]
action:  [79]
reward:  [0.]
action:  [79]
reward:  [-0.0223639]
action:  [20]
reward:  [0.]
action:  [6]
reward:  [0.]
action:  [79]
reward:  [0.00041479]
action:  [92]
reward:  [0.]
action:  [3]
reward:  [0.]
action:  [45]
reward:  [0.]
action:  [79]
reward:  [-0.00041479]
action:  [78]
reward:  [0.]
action:  [20]
reward:  [0.]
action:  [45]
reward:  [0.]
action:  [79]
reward:  [0.]
action:  [60]
reward:  [0.]
action:  [20]
reward:  [0.]
action:  [45]
reward:  [0.]
action:  [79]
reward:  [0.]
action:  [108]
reward:  [0.03530717]
action:  [42]
reward:  [0.]
action:  [45]
reward:  [0.]
action:  [92]
reward:  [0.]
action:  [18]
reward:  [0.]
action:  [60]
reward:  [0.]
action:  [73]
reward:  [0.]
action:  [79]
reward:  [0.]
action:  [60]
rewa

In [45]:
x

tensor([[118.4816,   7.0945,  13.9395,  13.0070,  11.2699,   7.1419,   4.1525,
           4.2256,   5.0592,  11.5852,   4.4791,   4.2973,   9.9597,   4.0309,
           3.9689,   7.7955,   3.2666,   3.3099,   6.5729,   0.0000,   0.0000,
           3.6494]])

In [52]:
trainer.calculate_accuracy_for_decoded_state(x)

Beforee X:  tensor([[118.4816,   7.0945,  13.9395,  13.0070,  11.2699,   7.1419,   4.1525,
           4.2256,   5.0592,  11.5852,   4.4791,   4.2973,   9.9597,   4.0309,
           3.9689,   7.7955,   3.2666,   3.3099,   6.5729,   0.0000,   0.0000,
           3.6494]])
Afterr X:     out_channel0  M    R1    R2    R3   R4   R5 convblock1  widenfact1    B1  \
0         118.0  2  14.0  13.0  11.0  7.0  4.0          D         4.0  12.0   

   ...    B2  convblock3  widenfact3   B3  convblock4  widenfact4   B4  \
0  ...  10.0           D         4.0  8.0           C         3.0  7.0   

   convblock5  widenfact5   B5  
0           A           5  4.0  

[1 rows x 22 columns]


(array([0.9558083], dtype=float32), None)

## Check if the recommended model exists in the training dataset

In [12]:
import torch
import pandas as pd 

value_mapping = {1.0: "A", 2.0: "B", 3.0: "C", 4.0: "D"}
column_names = [
            "out_channel0",
            "M",
            "R1",
            "R2",
            "R3",
            "R4",
            "R5",
            "convblock1",
            "widenfact1",
            "B1",
            "convblock2",
            "widenfact2",
            "B2",
            "convblock3",
            "widenfact3",
            "B3",
            "convblock4",
            "widenfact4",
            "B4",
            "convblock5",
            "widenfact5",
            "B5",
        ]
min_values = torch.tensor(
            [8, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0],
            dtype=torch.float32,
        )
max_values = torch.tensor(
    [
        128,
        5,
        16,
        16,
        16,
        16,
        16,
        4,
        4,
        12,
        4,
        4,
        12,
        4,
        4,
        12,
        4,
        4,
        12,
        4,
        4,
        12,
    ],
    dtype=torch.float32,
)

def correct_data_format(data):
    data = data.numpy()

    # Create a pandas DataFrame with the numpy array and set the column names
    df = pd.DataFrame(data, columns=column_names)

    df["convblock1"] = (
        df["convblock1"].replace(value_mapping).astype("category")
    )
    df["convblock2"] = (
        df["convblock2"].replace(value_mapping).astype("category")
    )
    df["convblock3"] = (
        df["convblock3"].replace(value_mapping).astype("category")
    )
    df["convblock4"] = (
        df["convblock4"].replace(value_mapping).astype("category")
    )
    df["convblock5"] = (
        df["convblock5"].replace(value_mapping).astype("category")
    )

    return df


def clip_values(X):
    # Shape of X = (batch_size, 22)

    # Round all values
    rounded_data = torch.round(X)

    # Now clamp each column individually
    clamped_data = torch.empty_like(rounded_data)
    for i in range(X.shape[1]):
        clamped_data[:, i] = torch.clamp(
            rounded_data[:, i], min_values[i], max_values[i]
        )

    output = correct_data_format(clamped_data)

    return output



In [47]:
X = clip_values(x)
X['widenfact5'] = 1

In [11]:

def parse_model_representation(df):
    """add each column element to the dictionary"""
    config = {}
    for idx,val in df.items():
        if type(val) == np.float32:
            val = int(val)
        config[idx] = val
    return config

def calc_conv_params(in_channels, out_channels, kernel_size):
    return (kernel_size ** 2) * in_channels * out_channels

def calc_bn_params(num_features):
    return 2 * num_features

def calc_fc_params(in_features, out_features):
    return (in_features * out_features) + out_features

def calc_residual_branch_params(in_channels, out_channels, filter_size):
    conv1_params = calc_conv_params(in_channels, out_channels, filter_size)
    bn1_params = calc_bn_params(out_channels)
    conv2_params = calc_conv_params(out_channels, out_channels, filter_size)
    bn2_params = calc_bn_params(out_channels)
    return conv1_params + bn1_params + conv2_params + bn2_params

def calc_skip_connection_params(in_channels, out_channels):
    conv1_params = calc_conv_params(in_channels, out_channels // 2, 1)
    conv2_params = calc_conv_params(in_channels, out_channels // 2, 1)
    bn_params = calc_bn_params(out_channels)
    return conv1_params + conv2_params + bn_params

def calc_basic_block_params(in_channels, out_channels, filter_size, res_branches, use_skip):
    branches_params = sum([calc_residual_branch_params(in_channels, out_channels, filter_size) for _ in range(res_branches)])
    skip_params = calc_skip_connection_params(in_channels, out_channels) if use_skip else 0
    return branches_params + skip_params

def calc_residual_group_params(in_channels, out_channels, n_blocks, filter_size, res_branches, use_skip):
    return sum([calc_basic_block_params(in_channels if i == 0 else out_channels, out_channels, filter_size, res_branches, use_skip and i == 0) for i in range(n_blocks)])

def calc_total_params(config, input_dim=(3, 32, 32), classes=10):
    out_channel0 = config["out_channel0"]
    M = config["M"]
    R = [config[f"R{i+1}"] for i in range(M)]
    widen_factors = [config[f"widenfact{i+1}"] for i in range(M)]
    B = [config[f"B{i+1}"] for i in range(M)]

    # Initial Conv and BN layer
    total_params = calc_conv_params(3, out_channel0, 7) + calc_bn_params(out_channel0)

    in_channels = out_channel0
    for i in range(M):
        out_channels = in_channels * widen_factors[i]
        total_params += calc_residual_group_params(in_channels, out_channels, R[i], 3, B[i], in_channels != out_channels)
        in_channels = out_channels

    # Average pooling
    feature_maps_out = in_channels
    if M == 1:
        fc_len = feature_maps_out * 21 * 21
    elif M == 2:
        fc_len = feature_maps_out * 21 * 21
    else:
        fc_len = feature_maps_out * 21 * 21  # Assuming average pooling down to 1x1 feature maps

    # Fully connected layer
    total_params += calc_fc_params(fc_len, classes)

    return total_params



In [37]:
config = parse_model_representation(X.iloc[0, :])
print(config)
total_params = calc_total_params(config)
total_params

{'out_channel0': 118, 'M': 5, 'R1': 14, 'R2': 13, 'R3': 11, 'R4': 7, 'R5': 4, 'convblock1': 'D', 'widenfact1': 4, 'B1': 12, 'convblock2': 'D', 'widenfact2': 4, 'B2': 10, 'convblock3': 'D', 'widenfact3': 4, 'B3': 8, 'convblock4': 'C', 'widenfact4': 3, 'B4': 7, 'convblock5': 'A', 'widenfact5': 0, 'B5': 4}


527378142584

# Train RL Agent with max num params in the training set as threshold

In [15]:
decoder_config = {
    "out_dim": 22,           # Output dimension
    "embed_dim": 8,          # Embedding dimension
    "h_nodes": 512,          # Number of hidden nodes
    "dropout": 0.2,          # Dropout rate
    "scale": 2,              # Scale factor
    "num_layers": 5,         # Number of layers
    "load_path": 'env/models/decoder_model.pth', # Path to load model weights
}

env_config = {
    "embed_dim": decoder_config['embed_dim'],    # Embedding dimension
    "num_embeddings": 14,           # Number of embeddings
    "max_allowed_actions": 200,      # Maximum allowed actions
    "consider_previous_actions": True, # Consider previous actions
    "num_previous_actions": 6,       # Number of previous actions to consider  
    "render_mode": 'human',          # Render mode
    "render_data": 'env/render/architectures_trained_on.npy',  # Data for rendering
    "render_labels": 'env/render/labels.npy',   # Labels for rendering
    "render_log_dir": 'trainingLogs',                  # Directory for logging data
    "consider_max_params": True,   # Consider maximum parameters
    "max_params": 1e9,             # Maximum parameters
    "min_params" : 1e9,
    #"min_params" : 1e8,                # Minimum parameters
}

model_config = {                #TODO: Consider adding entropy coefficient as parameter and policy & value function structure parameters
    "model": "PPO",                # Model type ('PPO', 'A2C', 'DQN', etc.)
    "policy": 'MultiInputPolicy',          # Policy type
    "total_timesteps": 100000,       # Total number of timesteps
    "verbose": 0,                  # Verbosity level
    "tensorboard_log": env_config['render_log_dir'],  # Tensorboard log directory
    "n_steps": 2048,               # Number of steps to run for each environment per update
    "progress_bar": False,          # Whether to display a progress bar
    "n_epochs": 12,                # Number of epochs
    "batch_size": 32,              # Batch size
}

log_config = {
    "project": 'Test',                          # Project name in wandb
    #"entity": 'trex-ai',                            # Entity name in wandb
    "sync_tensorboard": True,                           # Whether to sync TensorBoard
    "save_code": True,                                  # Whether to save code in wandb
    "model_save_path": env_config['render_log_dir'],    # Path to save the model
    "gradient_save_freq": 100,                          # Frequency to save gradients
    "verbose": 2,                                       # Verbosity level
}

custom_callback_function = RenderCallback()

In [16]:
trainer = Trainer(surrogate_path=surrogate_model, 
                  codebook_path=codebook, 
                  decoder_config=decoder_config, 
                  env_config=env_config, 
                  model_config=model_config, 
                  log_config=log_config)

Surrogate model loaded from:  env/models/surrogate_model.json
Codebook loaded from:  env/models/codebook.pth
Decoder model loaded from:  env/models/decoder_model.pth
Environment check passed


In [17]:
trainer.train()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Initializing WanderDB


[34m[1mwandb[0m: Currently logged in as: [33masaficontact[0m ([33mtrex-ai[0m). Use [1m`wandb login --relogin`[0m to force relogin


Setting up Model
Model Config: {'model': 'PPO', 'policy': 'MultiInputPolicy', 'total_timesteps': 100000, 'verbose': 0, 'tensorboard_log': 'trainingLogs', 'n_steps': 2048, 'progress_bar': False, 'n_epochs': 12, 'batch_size': 32}
Resetting Environment
Training Model
Training Started


VBox(children=(Label(value='0.321 MB of 0.321 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
rollout/ep_len_mean,█▇▇▆▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
rollout/ep_rew_mean,▂▁▂▁▃▄▆▅▆▆▇▇▇███████▇████████▇██▇█▇▇▇███
time/fps,▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇███████████
train/approx_kl,▄▆▅▆▆▆▇▇▇█▇▆▆▇▇▂▁▂▅▃▂▂▁▃▅▇▄▂▃▄▄▃▃▆▅▅▃▇▆▂
train/clip_fraction,▅▆▅▅▄▆▆▇▆█▇▇██▆▂▂▂▃▃▂▃▂▃▂▄▃▁▂▃▃▂▂▃▃▃▁▄▅▃
train/clip_range,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/entropy_loss,▁▁▁▁▁▂▂▂▂▃▃▄▄▅▆▆▆▆▇▇▆▇▇▆▇█▇▇█▇▇█▇█▇██▇█▇
train/explained_variance,▅▅▅▁▅▅▅▆▆▁▅▁▁▅▅▅▅▅▅▅▅▅▆▆▁▅▅▅▅▅▁▅▅▅▅▅▅▅█▅
train/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
global_step,100352.0
rollout/ep_len_mean,3.21
rollout/ep_rew_mean,0.2218
time/fps,317.0
train/approx_kl,0.0131
train/clip_fraction,0.24638
train/clip_range,0.2
train/entropy_loss,-3.23418
train/explained_variance,0.0
train/learning_rate,0.0003


In [18]:
save_path = "models/ppo_MultiInputPolicy_6_max_param_1e9_100000"
trainer.save_model(save_path)

In [34]:
save_path = "models/ppo_MultiInputPolicy_6_max_param_1e9_100000"
trainer.load_model(save_path)

In [40]:
x = trainer.evaluate_accuracy(num_episodes=1000)

Episode 0
reward:  [0.]
action:  [112]
Episode 0,1: cum reward: 0.0, max reward: 0.0, max action: [112], last action: [112]
Episode 0: Episode Accuracy: [0.93187684], Max Accuracy till Episode: [0.93187684]
Episode 1
reward:  [0.]
action:  [112]
Episode 1,1: cum reward: 0.0, max reward: 0.0, max action: [112], last action: [112]
Episode 1: Episode Accuracy: [0.7889947], Max Accuracy till Episode: [0.93187684]
Episode 2
reward:  [0.]
action:  [112]
Episode 2,1: cum reward: 0.0, max reward: 0.0, max action: [112], last action: [112]
Episode 2: Episode Accuracy: [0.7889947], Max Accuracy till Episode: [0.93187684]
Episode 3
reward:  [0.]
action:  [112]
Episode 3,1: cum reward: 0.0, max reward: 0.0, max action: [112], last action: [112]
Episode 3: Episode Accuracy: [0.75357157], Max Accuracy till Episode: [0.93187684]
Episode 4
reward:  [0.]
action:  [112]
Episode 4,1: cum reward: 0.0, max reward: 0.0, max action: [112], last action: [112]
Episode 4: Episode Accuracy: [0.7889947], Max Accu

In [36]:
trainer.calculate_accuracy_for_decoded_state(x)

(array([0.9552871], dtype=float32), None)

In [37]:
X = clip_values(x)

In [38]:
config = parse_model_representation(X.iloc[0, :])
print(config)
total_params = calc_total_params(config)
total_params

{'out_channel0': 118, 'M': 5, 'R1': 14, 'R2': 13, 'R3': 11, 'R4': 7, 'R5': 4, 'convblock1': 'D', 'widenfact1': 4, 'B1': 12, 'convblock2': 'D', 'widenfact2': 4, 'B2': 10, 'convblock3': 'D', 'widenfact3': 4, 'B3': 8, 'convblock4': 'C', 'widenfact4': 3, 'B4': 7, 'convblock5': 'A', 'widenfact5': 0, 'B5': 4}


527378142584