In [1]:
%matplotlib inline
%matplotlib notebook
%load_ext autoreload
%autoreload 2

In [2]:
from env.SurrogateModel import SurrogateModel
from env.Decoder import Decoder
from env.VQVAE_environment import VQVAE_Env, RenderCallback
from env.RLTrainer import Trainer

In [3]:
surrogate_model = '/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/models/surrogate_model.json'
codebook = '/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/models/codebook.pth'


In [8]:
decoder_config = {
    "out_dim": 22,           # Output dimension
    "embed_dim": 8,          # Embedding dimension
    "h_nodes": 512,          # Number of hidden nodes
    "dropout": 0.2,          # Dropout rate
    "scale": 2,              # Scale factor
    "num_layers": 5,         # Number of layers
    "load_path": '/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/models/decoder_model.pth', # Path to load model weights
}

env_config = {
    "embed_dim": decoder_config['embed_dim'],    # Embedding dimension
    "num_embeddings": 14,           # Number of embeddings
    "max_allowed_actions": 200,      # Maximum allowed actions
    "consider_previous_actions": False, # Consider previous actions
    "num_previous_actions": 12,       # Number of previous actions to consider  
    "render_mode": 'human',          # Render mode
    "render_data": '/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/render/architectures_trained_on.npy',  # Data for rendering
    "render_labels": '/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/render/labels.npy',   # Labels for rendering
    "render_log_dir": 'trainingLogs',                  # Directory for logging data
}

model_config = {
    "model": "PPO",                # Model type ('PPO', 'A2C', 'DQN', etc.)
    "policy": 'MLPPolicy',  # Policy type
    "total_timesteps": 1500000,       # Total number of timesteps
    "verbose": 0,                  # Verbosity level
    "tensorboard_log": env_config['render_log_dir'],  # Tensorboard log directory
    "n_steps": 2048,               # Number of steps to run for each environment per update
    "progress_bar": False,          # Whether to display a progress bar
    "n_epochs": 10,                # Number of epochs
    "batch_size": 64,              # Batch size
}

log_config = {
    "project": 'PPO Training',                          # Project name in wandb
    #"entity": 'trex-ai',                            # Entity name in wandb
    "sync_tensorboard": True,                           # Whether to sync TensorBoard
    "save_code": True,                                  # Whether to save code in wandb
    "model_save_path": env_config['render_log_dir'],    # Path to save the model
    "gradient_save_freq": 100,                          # Frequency to save gradients
    "verbose": 2,                                       # Verbosity level
}

# Example of initializing the Trainer class with these configurations
# trainer = Trainer(
#     surrogate_path="path_to_surrogate_model.pt",
#     codebook_path="path_to_codebook.pt",
#     decoder_config=decoder_config,
#     env_config=env_config,
#     model_config=model_config,
#     log_config=log_config
# )


In [9]:
trainer = Trainer(surrogate_path=surrogate_model, 
                  codebook_path=codebook, 
                  decoder_config=decoder_config, 
                  env_config=env_config, 
                  model_config=model_config, 
                  log_config=log_config)

Surrogate model loaded from:  /Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/models/surrogate_model.json
Codebook loaded from:  /Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/models/codebook.pth
Decoder model loaded from:  /Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/models/decoder_model.pth
Environment check passed


In [10]:
trainer.load_model('/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/models/ppo_mlpPolicy_1500000.zip')

In [12]:
max_state = trainer.evaluate_accuracy(num_episodes=1000, num_steps=500, find_max=True)

Accuracy: 0.7744229435920715 for episode 0
Accuracy: 0.9243929982185364 for episode 1
Accuracy: 0.9238914251327515 for episode 2
Accuracy: 0.7723342180252075 for episode 3
Accuracy: 0.7889947295188904 for episode 4
Accuracy: 0.7889947295188904 for episode 5
Accuracy: 0.7535715699195862 for episode 6
Accuracy: 0.9272683262825012 for episode 7
Accuracy: 0.7889947295188904 for episode 8
Accuracy: 0.7348170280456543 for episode 9
Accuracy: 0.9238914251327515 for episode 10
Accuracy: 0.7513466477394104 for episode 11
Accuracy: 0.9369305968284607 for episode 12
Accuracy: 0.7348170280456543 for episode 13
Accuracy: 0.7376570105552673 for episode 14
Accuracy: 0.7348170280456543 for episode 15
Accuracy: 0.925092875957489 for episode 16
Accuracy: 0.770458459854126 for episode 17
Accuracy: 0.7686992287635803 for episode 18
Accuracy: 0.770458459854126 for episode 19
Accuracy: 0.7348170280456543 for episode 20
Accuracy: 0.7692904472351074 for episode 21
Accuracy: 0.9369305968284607 for episode 22
A

In [14]:
max_state.shape

torch.Size([1, 22])

In [52]:
decoder_config = {
    "out_dim": 22,           # Output dimension
    "embed_dim": 8,          # Embedding dimension
    "h_nodes": 512,          # Number of hidden nodes
    "dropout": 0.2,          # Dropout rate
    "scale": 2,              # Scale factor
    "num_layers": 5,         # Number of layers
    "load_path": '/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/models/decoder_model.pth', # Path to load model weights
}

env_config = {
    "embed_dim": decoder_config['embed_dim'],    # Embedding dimension
    "num_embeddings": 14,           # Number of embeddings
    "max_allowed_actions": 200,      # Maximum allowed actions
    "consider_previous_actions": True, # Consider previous actions
    "num_previous_actions": 12,       # Number of previous actions to consider  
    "render_mode": 'human',          # Render mode
    "render_data": '/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/render/architectures_trained_on.npy',  # Data for rendering
    "render_labels": '/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/render/labels.npy',   # Labels for rendering
    "render_log_dir": 'trainingLogs',                  # Directory for logging data
}

model_config = {
    "model": "PPO",                # Model type ('PPO', 'A2C', 'DQN', etc.)
    "policy": 'MultiInputPolicy',  # Policy type
    "total_timesteps": 1500000,       # Total number of timesteps
    "verbose": 0,                  # Verbosity level
    "tensorboard_log": env_config['render_log_dir'],  # Tensorboard log directory
    "n_steps": 2048,               # Number of steps to run for each environment per update
    "progress_bar": False,          # Whether to display a progress bar
    "n_epochs": 10,                # Number of epochs
    "batch_size": 64,              # Batch size
}

log_config = {
    "project": 'PPO Training',                          # Project name in wandb
    #"entity": 'trex-ai',                            # Entity name in wandb
    "sync_tensorboard": True,                           # Whether to sync TensorBoard
    "save_code": True,                                  # Whether to save code in wandb
    "model_save_path": env_config['render_log_dir'],    # Path to save the model
    "gradient_save_freq": 100,                          # Frequency to save gradients
    "verbose": 2,                                       # Verbosity level
}

trainer = Trainer(surrogate_path=surrogate_model, 
                  codebook_path=codebook, 
                  decoder_config=decoder_config, 
                  env_config=env_config, 
                  model_config=model_config, 
                  log_config=log_config)


Surrogate model loaded from:  /Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/models/surrogate_model.json
Codebook loaded from:  /Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/models/codebook.pth
Decoder model loaded from:  /Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/env/models/decoder_model.pth
Predictions:  [0.7889947]
Predictions:  [0.7404414]
Predictions:  [0.92011744]
Predictions:  [0.9441887]
Predictions:  [0.9441887]
Predictions:  [0.9441887]
Predictions:  [0.9238914]
Predictions:  [0.9238914]
Predictions:  [0.9238914]
Predictions:  [0.9238914]
Predictions:  [0.9238914]
Environment check passed


In [53]:
trainer.load_model('/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/models/ppo_multiInputPolicy_12_1500000.zip')

In [57]:
max_state = trainer.evaluate_accuracy(num_episodes=5000, num_steps=500, find_max=True)

Episode 0
Predictions:  [0.7723342]
reward: [0.7723342], action: [110], cum_reward: [0.7723342], max_reward: 0
Predictions:  [0.7760769]
reward: [0.00374269], action: [69], cum_reward: [0.7760769], max_reward: [0.7760769]
Predictions:  [0.91561663]
reward: [0.13953972], action: [39], cum_reward: [0.91561663], max_reward: [0.91561663]
Predictions:  [0.91561663]
reward: [0.], action: [57], cum_reward: [0.91561663], max_reward: [0.91561663]
Predictions:  [0.91561663]
reward: [0.], action: [39], cum_reward: [0.91561663], max_reward: [0.91561663]
Predictions:  [0.7626149]
reward: [-0.15300173], action: [11], cum_reward: [0.7626149], max_reward: [0.7626149]
Predictions:  [0.77330315]
reward: [0.01068825], action: [84], cum_reward: [0.77330315], max_reward: [0.77330315]
Predictions:  [0.7725494]
reward: [-0.00075376], action: [59], cum_reward: [0.7725494], max_reward: [0.7725494]
Predictions:  [0.7725494]
reward: [0.], action: [12], cum_reward: [0.7725494], max_reward: [0.7725494]
Predictions

KeyboardInterrupt: 

In [21]:
max_state

tensor([[117.6796,   7.0402,  13.8492,  12.9151,  11.1857,   7.0861,   4.1173,
           4.1981,   5.0224,  11.5101,   4.4452,   4.2604,   9.8896,   3.9975,
           3.9347,   7.7382,   3.2340,   3.2807,   6.5205,   0.0000,   0.0000,
           3.6231]])

In [21]:
import torch

In [37]:
def run_steps(env, model, num_steps):
    obs = env.reset()
    for _ in range(num_steps):
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = env.step(action)
        if done:
            print('Early stopping')
            break
    return obs

def evaluate_model(trainer, num_episodes, num_steps):

    model = trainer.model
    env = trainer.env
    surrogate_model = trainer.surrogate_model
    decoder_model = trainer.decoder_model

    accuracy = []
    for i in range(num_episodes):
        rl_output = run_steps(env, model, num_steps)
        rl_output_tensor = torch.tensor(rl_output, dtype=torch.float32)  # Add batch dimension
        decoder_output = decoder_model(rl_output_tensor)
        calculated_accuracy = surrogate_model.evaluate(decoder_output)
        print(f"Accuracy: {calculated_accuracy} for episode {i}")
        accuracy.append(calculated_accuracy)
    
    print(f"Average accuracy: {sum(accuracy)/num_episodes}")

In [47]:
trainer.load_model('/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/trainingLogs/models/rklmkht6/model.zip')

In [22]:
trainer.evaluate_accuracy(num_episodes=100, num_steps=500)

Accuracy: 0.798057496547699 for episode 0
Accuracy: 0.7889947295188904 for episode 1
Accuracy: 0.9471520185470581 for episode 2
Accuracy: 0.7839134335517883 for episode 3
Accuracy: 0.9190993905067444 for episode 4
Accuracy: 0.7772524356842041 for episode 5
Accuracy: 0.7889947295188904 for episode 6
Accuracy: 0.7889947295188904 for episode 7
Accuracy: 0.9243929982185364 for episode 8
Accuracy: 0.7725493907928467 for episode 9
Accuracy: 0.9243929982185364 for episode 10
Accuracy: 0.7349642515182495 for episode 11
Accuracy: 0.7551933526992798 for episode 12
Accuracy: 0.819848895072937 for episode 13
Accuracy: 0.9325972199440002 for episode 14
Accuracy: 0.7681959271430969 for episode 15
Accuracy: 0.759606122970581 for episode 16
Accuracy: 0.9228904843330383 for episode 17
Accuracy: 0.736478328704834 for episode 18
Accuracy: 0.7753796577453613 for episode 19
Accuracy: 0.9325860738754272 for episode 20
Accuracy: 0.7889947295188904 for episode 21
Accuracy: 0.7725493907928467 for episode 22
Ac

In [49]:
trainer.load_model('/Users/tawab/Desktop/columbia/Courses/Spring2024/HPML/Project/Analog_NAS/trainingLogs/models/uenl6yuv/model.zip')

In [50]:
trainer.evaluate_accuracy(num_episodes=100, num_steps=500)

Accuracy: 0.9238914251327515 for episode 0
Accuracy: 0.7889947295188904 for episode 1
Accuracy: 0.7889947295188904 for episode 2
Accuracy: 0.7818193435668945 for episode 3
Accuracy: 0.7889947295188904 for episode 4
Accuracy: 0.9326378107070923 for episode 5
Accuracy: 0.7889947295188904 for episode 6
Accuracy: 0.9325860738754272 for episode 7
Accuracy: 0.7889947295188904 for episode 8
Accuracy: 0.9238914251327515 for episode 9
Accuracy: 0.759606122970581 for episode 10
Accuracy: 0.819848895072937 for episode 11
Accuracy: 0.7575494050979614 for episode 12
Accuracy: 0.9322266578674316 for episode 13
Accuracy: 0.9290140271186829 for episode 14
Accuracy: 0.7889947295188904 for episode 15
Accuracy: 0.7818193435668945 for episode 16
Accuracy: 0.7348170280456543 for episode 17
Accuracy: 0.7734160423278809 for episode 18
Accuracy: 0.7889947295188904 for episode 19
Accuracy: 0.7704378962516785 for episode 20
Accuracy: 0.7744229435920715 for episode 21
Accuracy: 0.7889947295188904 for episode 22
