In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions import Categorical
from torch.optim import Adam
from abc_py.interface import ABC
import functools
import os

In [2]:
delay_constraint_table = {
    "adder.aig": 2400,
    "bar.aig": 120,
    "div.aig": 64000,
    "hyp.aig": 94000,
    "log2.aig": 4000,
    "max.aig": 2345,
    "multiplier.aig": 2700,
    "sin.aig": 2000,
    "sqrt.aig": 34000,
    "square.aig": 2300
}

In [3]:
class AdvantageActorCritic(nn.Module):
    def __init__(self, input_dim, n_actions, gamma):
        super(AdvantageActorCritic, self).__init__()
        self.gamma = gamma

        self.pi1 = nn.Linear(input_dim, 20)
        self.pi2 = nn.Linear(20, 20)
        self.pi = nn.Linear(20, n_actions)

        self.v1 = nn.Linear(input_dim, 20)
        self.v = nn.Linear(20, 1)

        self.rewards = []
        self.actions = []
        self.states = []

    def remember(self, state, action, reward):
        self.states.append(state)
        self.actions.append(action)
        self.rewards.append(reward)

    def clear_memory(self):
        self.states = []
        self.actions = []
        self.rewards = []

    def forward(self, state):
        pi1 = F.relu(self.pi1(state))
        pi2 = F.relu(self.pi2(pi1))
        pi = self.pi(pi2)

        v1 = F.relu(self.v1(state))
        v = F.leaky_relu(self.v(v1), 0.2)

        return pi, v
    
    def calc_R(self):
        states = torch.stack(self.states)
        _, v = self.forward(states)

        R = v[-1]
        batch_return = []
        for reward in self.rewards[::-1]:
            R = reward + self.gamma * R
            batch_return.append(R)

        batch_return.reverse()
        return torch.tensor(batch_return, dtype=torch.float)
    
    def calc_loss(self, final_state: torch.Tensor):
        states = torch.stack(self.states)
        actions = torch.tensor(self.actions, dtype=torch.float)
        returns = self.calc_R()

        pi, values = self.forward(states)
        values = values.squeeze()
        final_state = final_state.unsqueeze(0)
        final_value = self.forward(final_state)[1].squeeze(0)
        next_values = torch.cat((values[1:], final_value), dim=0)
        td_error = returns + (self.gamma * next_values) - values

        critic_loss = td_error ** 2
        probs = F.softmax(pi, dim=1)
        dist = Categorical(probs)
        log_probs = dist.log_prob(actions)
        actor_loss = -log_probs * td_error.detach()

        total_loss = (actor_loss + critic_loss).mean()
        return total_loss

    def select_action(self, observation):
        state = observation.unsqueeze(0)
        pi, _ = self.forward(state)
        probs = F.softmax(pi, dim=1)
        dist = Categorical(probs)
        action = dist.sample().item()

        return action

In [4]:
def perform_action(abc: ABC, area, delay, action, possible_actions, filename):
    new_stats = possible_actions[action](abc)
    observation_ = torch.tensor(new_stats[:6], dtype=torch.float)
    new_area, new_delay = new_stats[6], new_stats[7]

    if new_delay <= delay_constraint_table[filename]:
        if new_area < area:
            reward = 3
        elif new_area > area:
            reward = -1
        else:
            reward = 0
    elif new_delay < delay:
        if new_area < area:
            reward = 3
        elif new_area > area:
            reward = 0.5
        else:
            reward = 2
    elif new_delay > delay:
        if new_area < area:
            reward = 0
        elif new_area > area:
            reward = -3
        else:
            reward = -2
    else:
        if new_area < area:
            reward = 3
        elif new_area > area:
            reward = -2
        else:
            reward = 0

    return observation_, new_area, new_delay, reward

In [5]:
def train(actor_critic: AdvantageActorCritic, optimizer, episodes, iterations, abc: ABC, input_dim, possible_actions):
    filelist = os.listdir('benchmarks/arithmetic')

    for episode in range(episodes):
        avg_loss = torch.tensor(0., dtype=torch.float)
        avg_score = torch.tensor(0., dtype=torch.float)
        avg_area = torch.tensor(0., dtype=torch.float)
        avg_delay = torch.tensor(0., dtype=torch.float)
        constraints_met = 0

        for filename in filelist:
            if not filename.endswith('.aig') or filename == "hyp.aig":
                continue

            abc.read_aiger(f"benchmarks/arithmetic/{filename}")
            init_stats = abc.read_libraries("libraries/asap7sc7p5t_INVBUF_RVT_FF_nldm_201020.lib", "libraries/asap7sc7p5t_SIMPLE_RVT_FF_nldm_201020.lib")
            observation, area, delay = torch.tensor([1] * input_dim, dtype=torch.float), init_stats[6], init_stats[7]
            observation[0] = init_stats[0] / 512
            observation[1] = init_stats[1] / 130

            init_area = area
            init_delay = delay
            init_stats = torch.tensor(init_stats[:6], dtype=torch.float)

            score = 0
            actor_critic.clear_memory()

            for _ in range(iterations):
                action = actor_critic.select_action(observation)
                observation_, new_area, new_delay, reward = perform_action(abc, area, delay, action, possible_actions, filename)
                observation_ = observation_ / init_stats

                if init_stats[2] == 0:
                    observation_[2] = 0.0
                observation_[0] = init_stats[0] / 512
                observation_[1] = init_stats[1] / 130

                score += reward
                actor_critic.remember(observation, action, reward)
                observation = observation_
                area = new_area
                delay = new_delay

            loss = actor_critic.calc_loss(observation_)
            avg_loss += loss.detach().item()
            avg_area += area / init_area
            avg_delay += delay / init_delay
            avg_score += score
            if delay <= delay_constraint_table[filename]:
                constraints_met += 1

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            actor_critic.clear_memory()

        avg_loss = (avg_loss * 4) / len(filelist)
        avg_area = (avg_area * 4) / len(filelist)
        avg_delay = (avg_delay * 4) / len(filelist)
        avg_score = (avg_score * 4) / len(filelist)
        print(f"Episode {episode + 1}: loss {avg_loss} area {avg_area} delay {avg_delay} constraints met {constraints_met}/{len(filelist) / 4}")

In [6]:
possible_actions = [
    functools.partial(ABC.resub, zero_cost=False, preserve_levels=True),
    functools.partial(ABC.resub, zero_cost=True, preserve_levels=True),
    functools.partial(ABC.rewrite, zero_cost=False, preserve_levels=True, verbose=False),
    functools.partial(ABC.rewrite, zero_cost=True, preserve_levels=True, verbose=False),
    functools.partial(ABC.refactor, zero_cost=False, preserve_levels=True),
    functools.partial(ABC.refactor, zero_cost=True, preserve_levels=True),
    functools.partial(ABC.balance),
    functools.partial(ABC.balance),
]

n_actions = len(possible_actions)
input_dim = 6
learning_rate = 3e-4
gamma = 0.9
epochs = 20
iterations = 15

In [7]:
actor_critic = AdvantageActorCritic(input_dim, n_actions, gamma)
optimiser = Adam(actor_critic.parameters(), lr=learning_rate)
abc = ABC()

In [8]:
train(actor_critic, optimiser, epochs, iterations, abc, input_dim, possible_actions)

Episode 1: loss 58.20940399169922 area 0.9750029444694519 delay 0.7612615823745728 constraints met 7/10.0
Episode 2: loss 41.511497497558594 area 1.011614441871643 delay 0.7720639705657959 constraints met 7/10.0
Episode 3: loss 59.573081970214844 area 1.0103243589401245 delay 0.7631706595420837 constraints met 7/10.0
Episode 4: loss 46.49177169799805 area 1.0292069911956787 delay 0.7630993127822876 constraints met 7/10.0
Episode 5: loss 35.73936462402344 area 0.9701582789421082 delay 0.7575303316116333 constraints met 7/10.0
Episode 6: loss 42.56972122192383 area 0.9893997311592102 delay 0.7810378670692444 constraints met 6/10.0
Episode 7: loss 42.03363800048828 area 0.992631733417511 delay 0.7678967118263245 constraints met 7/10.0
Episode 8: loss 30.94564437866211 area 0.989105224609375 delay 0.7630956172943115 constraints met 7/10.0
Episode 9: loss 53.030250549316406 area 0.9652625322341919 delay 0.7563589811325073 constraints met 7/10.0
Episode 10: loss 38.91315460205078 area 0.9705

In [9]:
abc.quit()

0

In [10]:
torch.save(actor_critic, "actor_critic4.pth")

In [11]:
actor_critic = torch.load("actor_critic4.pth")

  actor_critic = torch.load("actor_critic4.pth")


In [12]:
def generate_actions(actor_critic: AdvantageActorCritic, iterations, input_dim, possible_actions, filename):
    abc = ABC()
    abc.read_aiger(f"benchmarks/arithmetic/{filename}")
    init_stats = abc.read_libraries("libraries/asap7sc7p5t_INVBUF_RVT_FF_nldm_201020.lib", "libraries/asap7sc7p5t_SIMPLE_RVT_FF_nldm_201020.lib")
    observation, area, delay = torch.tensor([1] * input_dim, dtype=torch.float), init_stats[6], init_stats[7]
    iarea, idelay = area, delay
    observation[0] = init_stats[0] / 512
    observation[1] = init_stats[1] / 130

    init_stats = torch.tensor(init_stats[:6], dtype=torch.float)

    with torch.no_grad():
        actor_critic.clear_memory()

        for _ in range(iterations):
            action = actor_critic.select_action(observation)
            observation_, new_area, new_delay, reward = perform_action(abc, area, delay, action, possible_actions, filename)
            observation_ = observation_ / init_stats

            if init_stats[2] == 0:
                observation_[2] = 0.0
            observation_[0] = init_stats[0] / 512
            observation_[1] = init_stats[1] / 130

            actor_critic.remember(observation, action, reward)
            observation = observation_
            area = new_area
            delay = new_delay

        actions = actor_critic.actions
        actor_critic.clear_memory()
        return actions, iarea, idelay, area, delay, abc.quit()
    
for filename in os.listdir("benchmarks/arithmetic/"):
    if not filename.endswith('.aig'):
        continue

    _, iarea, idelay, area, delay, _ = generate_actions(actor_critic, iterations, input_dim, possible_actions, filename)
    print(f"Filename: {filename}, initial area: {iarea}, initial delay: {idelay}, final area: {area}, final delay: {delay}")

Filename: square.aig, initial area: 1096.52, initial delay: 2401.29, final area: 1089.62, final delay: 2377.67
Filename: hyp.aig, initial area: 13041.75, initial delay: 166839.98, final area: 14727.64, final delay: 135089.5
Filename: adder.aig, initial area: 58.68, initial delay: 2445.81, final area: 73.28, final delay: 2265.86
Filename: sin.aig, initial area: 295.81, initial delay: 1990.54, final area: 329.95, final delay: 1739.05
Filename: bar.aig, initial area: 155.64, initial delay: 151.33, final area: 174.07, final delay: 173.1
Filename: max.aig, initial area: 167.8, initial delay: 2626.79, final area: 186.78, final delay: 2292.29
Filename: sqrt.aig, initial area: 1139.18, initial delay: 65898.94, final area: 1413.68, final delay: 35446.91
Filename: div.aig, initial area: 2982.86, initial delay: 64616.92, final area: 2753.44, final delay: 39649.43
Filename: multiplier.aig, initial area: 1469.97, initial delay: 2563.76, final area: 1383.48, final delay: 2530.95
Filename: log2.aig, 