<!-- # Imports -->

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from abc_py.interface import ABC
import functools
import os
from torch.distributions import Categorical

<!-- # Definitions -->

In [2]:
possible_actions = [
    functools.partial(ABC.resub, zero_cost=False, preserve_levels=False),
    functools.partial(ABC.resub, zero_cost=False, preserve_levels=True),
    functools.partial(ABC.resub, zero_cost=True, preserve_levels=False),
    functools.partial(ABC.resub, zero_cost=True, preserve_levels=True),
    functools.partial(ABC.rewrite, zero_cost=False, preserve_levels=True, verbose=False),
    functools.partial(ABC.rewrite, zero_cost=True, preserve_levels=True, verbose=False),
    functools.partial(ABC.rewrite, zero_cost=False, preserve_levels=False, verbose=False),
    functools.partial(ABC.rewrite, zero_cost=True, preserve_levels=False, verbose=False),
    functools.partial(ABC.refactor, zero_cost=False, preserve_levels=True),
    functools.partial(ABC.refactor, zero_cost=False, preserve_levels=False),
    functools.partial(ABC.refactor, zero_cost=True, preserve_levels=True),
    functools.partial(ABC.refactor, zero_cost=True, preserve_levels=False),
    functools.partial(ABC.balance),
]

num_actions = len(possible_actions)
num_features = 4
learning_rate = 0.01
discount_factor = 0.99

In [3]:
class ActorNetwork(nn.Module):
    def __init__(self, num_features, num_actions):
        super(ActorNetwork, self).__init__()
        self.fc1 = nn.Linear(num_features, 20)
        self.fc2 = nn.Linear(20, 20)
        self.fc3 = nn.Linear(20, num_actions)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return F.softmax(self.fc3(x), dim=-1)

In [4]:
class CriticNetwork(nn.Module):
    def __init__(self, num_features):
        super(CriticNetwork, self).__init__()
        self.fc1 = nn.Linear(num_features, 10)
        self.fc2 = nn.Linear(10, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)

In [5]:
def select_action(actor: ActorNetwork, state: torch.Tensor):
    action_probs = actor(state)
    action_distribution = Categorical(action_probs)
    action = action_distribution.sample()
    return action.item(), action_distribution.log_prob(action).reshape(1), action_probs

In [6]:
def calculate_losses(critic: CriticNetwork, episode_design_states, optimisation_sequence, episode_rewards):
    actor_loss = torch.tensor([0.0], dtype=torch.float)
    critic_loss = torch.tensor([0.0], dtype=torch.float)

    for i in range(len(episode_design_states) - 1):
        state = episode_design_states[i]
        next_state = episode_design_states[i + 1]
        _, action_log_prob = optimisation_sequence[i]
        reward = episode_rewards[i]

        value = critic(state)
        next_value = critic(next_state)
        td_error = reward + discount_factor * next_value - value
        actor_loss += -action_log_prob * td_error.detach()
        critic_loss += td_error ** 2

    return actor_loss, critic_loss

In [7]:
def calculate_reward(area, delay, new_area, new_delay):
    if new_delay < delay:
        if new_area < area:
            return 3
        if new_area > area:
            return 1
        return 2
    if new_delay > delay:
        if new_area < area:
            return -1
        if new_area > area:
            return -3
        return -2
    if new_area < area:
        return 3
    if new_area > area:
        return -2
    return 0

In [8]:
def discount_rewards(synth_rewards: list, discount_factor: float):
    synth_rewards = torch.tensor(synth_rewards, dtype=torch.float)
    discounted_rewards = torch.zeros_like(synth_rewards)
    running_add = 0
    for i in range(len(synth_rewards)):        
        running_add = running_add * discount_factor + synth_rewards[i]
        discounted_rewards[i] = running_add
    return discounted_rewards

In [9]:
# def train(actor: ActorNetwork, critic: CriticNetwork, actor_optimizer, critic_optimizer, episodes=50, iterations=50):
#     for episode in range(episodes):
#         total_actor_loss = 0
#         total_critic_loss = 0

#         for dir in os.listdir("./benchmarks"):
#             if dir != "arithmetic":
#                 continue
            
#             for filename in os.listdir(f"./benchmarks/{dir}"):
#                 if not filename.endswith(".aig"):
#                     continue

#                 abc = ABC()
#                 abc.read_aiger(f"./benchmarks/{dir}/{filename}")
#                 init_stats = abc.read_libraries("libraries/asap7sc7p5t_SIMPLE_RVT_FF_nldm_201020.lib", "libraries/asap7sc7p5t_INVBUF_RVT_FF_nldm_201020.lib")
#                 assert len(init_stats) - 4 == num_features

#                 state = torch.tensor([1] * num_features, dtype=torch.float)
#                 area, delay = init_stats[6], init_stats[7]
#                 init_stats = torch.tensor(init_stats[2:6], dtype=torch.float)
                
#                 for i in range(iterations):
#                     # select action from actor model
#                     action, action_log_prob, _ = select_action(actor, state)

#                     # take action and observe next state
#                     action_to_be_taken = possible_actions[action]
#                     new_stats = action_to_be_taken(abc)
#                     next_state = torch.tensor(new_stats[2:6], dtype=torch.float) / init_stats # take ratio with respect to initial stats

#                     if init_stats[0] == 0:
#                         next_state[0] = 0

#                     # calculate reward and update actor and critic models
#                     reward = calculate_reward(area, delay, new_stats[6], new_stats[7])
#                     actor_loss, critic_loss = calculate_losses(critic, action_log_prob, reward, state, next_state)
#                     total_actor_loss += actor_loss.detach().item()
#                     total_critic_loss += critic_loss.detach().item()

#                     actor_optimizer.zero_grad()
#                     actor_loss.backward()
#                     actor_optimizer.step()
#                     critic_optimizer.zero_grad()
#                     critic_loss.backward()
#                     critic_optimizer.step()

#                     # update state
#                     state = next_state
#                     area = new_stats[6]
#                     delay = new_stats[7]

#         total_actor_loss /= iterations
#         total_critic_loss /= iterations
#         print(f"Episode {episode + 1}: Actor Loss: {total_actor_loss}, Critic Loss: {total_critic_loss}")
        
#         if abc.quit() != 0:
#             print("Error in quitting abc")
#             return

In [10]:
def train(actor: ActorNetwork, critic: CriticNetwork, actor_optimizer, critic_optimizer, episodes=50, iterations=50):
    fileslist = os.listdir("./benchmarks/arithmetic")

    for episode in range(episodes):
        total_actor_loss = 0
        total_critic_loss = 0

        for filename in fileslist:
            if not filename.endswith(".aig"):
                continue

            episode_design_states = []
            optimisation_sequence = []
            synth_rewards = []

            abc = ABC()
            abc.read_aiger(f"./benchmarks/arithmetic/{filename}")
            init_stats = abc.read_libraries("libraries/asap7sc7p5t_SIMPLE_RVT_FF_nldm_201020.lib", "libraries/asap7sc7p5t_INVBUF_RVT_FF_nldm_201020.lib")
            assert len(init_stats) - 4 == num_features

            state = torch.tensor([1] * num_features, dtype=torch.float)
            area, delay = init_stats[6], init_stats[7]
            init_stats = torch.tensor(init_stats[2:6], dtype=torch.float)
            
            for i in range(iterations):
                # select action from actor model
                action, action_log_prob, _ = select_action(actor, state)

                # take action and observe next state
                action_to_be_taken = possible_actions[action]
                new_stats = action_to_be_taken(abc)
                next_state = torch.tensor(new_stats[2:6], dtype=torch.float) / init_stats # take ratio with respect to initial stats

                if init_stats[0] == 0:
                    next_state[0] = 0

                # calculate reward and update actor and critic models
                reward = calculate_reward(area, delay, new_stats[6], new_stats[7])
                episode_design_states.append(state)
                optimisation_sequence.append((action, action_log_prob))
                synth_rewards.append(reward)

                # update state
                state = next_state
                area = new_stats[6]
                delay = new_stats[7]

            episode_rewards = discount_rewards(synth_rewards, discount_factor)
            actor_loss, critic_loss = calculate_losses(critic, episode_design_states, optimisation_sequence, episode_rewards)
            print(actor_loss.detach(), critic_loss.detach())
            total_actor_loss += actor_loss.detach()
            total_critic_loss += critic_loss.detach()

            # propagate loss and step optmisers
            actor_optimizer.zero_grad()
            actor_loss.backward()
            actor_optimizer.step()
            critic_optimizer.zero_grad()
            critic_loss.backward()
            critic_optimizer.step()

        print(f"Episode {episode + 1}: Actor Loss: {total_actor_loss / (len(fileslist) * iterations)}, Critic Loss: {total_critic_loss / (len(fileslist) * iterations)}")
        
        if abc.quit() != 0:
            print("Error in quitting abc")
            return

In [11]:
actor = ActorNetwork(num_features, num_actions)
critic = CriticNetwork(num_features)
actor_optimizer = optim.Adam(actor.parameters(), lr=learning_rate)
critic_optimizer = optim.Adam(critic.parameters(), lr=learning_rate)

In [12]:
train(actor, critic, actor_optimizer, critic_optimizer)

tensor([523.9854]) tensor([1653.1078])


KeyboardInterrupt: 

In [None]:
# def generate_strategy(actor: ActorNetwork, file):
#     abc = ABC()
#     abc.read_aiger(file)
#     assert len(init_stats) == num_features

#     state = torch.tensor(init_stats[:2] + [1] * (num_features - 2), dtype=torch.float)
#     init_stats = torch.tensor(init_stats, dtype=torch.float)

#     for i in range(50):
#         action, _, prob = select_action(actor, state)
#         print(prob)
#         action_to_be_taken = possible_actions[action]
#         print(f"Taking action {action}")
#         new_stats = action_to_be_taken(abc)
#         next_state = torch.tensor(new_stats, dtype=torch.float) / init_stats
#         next_state[0] = state[0]
#         next_state[1] = state[1]

#         if init_stats[2] == 0:
#             next_state[2] = 0

#         state = next_state

#     print(state)
#     if abc.quit() != 0:
#         print("Error in quitting abc")

In [None]:
# generate_strategy(actor, "./benchmarks/arithmetic/adder.aig")

In [None]:
# abc = ABC()
# abc.read_aiger("i10.aig")
# init_stats = abc.read_libraries("libraries/asap7sc7p5t_SIMPLE_RVT_FF_nldm_201020.lib", "libraries/asap7sc7p5t_INVBUF_RVT_FF_nldm_201020.lib")
# assert len(init_stats) - 4 == num_features

# state = torch.tensor([1] * num_features, dtype=torch.float)
# area, delay = init_stats[6], init_stats[7]
# init_stats = torch.tensor(init_stats[2:6], dtype=torch.float)

# total_actor_loss = 0
# total_critic_loss = 0

# action_probs = actor(state)
# action_distribution = Categorical(action_probs)
# action = action_distribution.sample()
# action_log_prob = action_distribution.log_prob(action).reshape(1)

# action_to_be_taken = possible_actions[action]
# new_stats = action_to_be_taken(abc)
# next_state = torch.tensor(new_stats[2:6], dtype=torch.float) / init_stats

# reward = calculate_reward(area, delay, new_stats[6], new_stats[7])
# print(reward)

# abc.quit()