In [1]:
from re import I
import pandas
import configparser
import os
import numpy as np
import bitfusion.src.benchmarks.benchmarks as benchmarks
from bitfusion.src.simulator.stats import Stats
from bitfusion.src.simulator.simulator import Simulator
from bitfusion.src.sweep.sweep import SimulatorSweep, check_pandas_or_run
from bitfusion.src.utils.utils import *
from bitfusion.src.optimizer.optimizer import optimize_for_order, get_stats_fast

def df_to_stats(df):
    stats = Stats()
    stats.total_cycles = float(df['Cycles'].iloc[0])
    stats.mem_stall_cycles = float(df['Memory wait cycles'].iloc[0])
    stats.reads['act'] = float(df['IBUF Read'].iloc[0])
    stats.reads['out'] = float(df['OBUF Read'].iloc[0])
    stats.reads['wgt'] = float(df['WBUF Read'].iloc[0])
    stats.reads['dram'] = float(df['DRAM Read'].iloc[0])
    stats.writes['act'] = float(df['IBUF Write'].iloc[0])
    stats.writes['out'] = float(df['OBUF Write'].iloc[0])
    stats.writes['wgt'] = float(df['WBUF Write'].iloc[0])
    stats.writes['dram'] = float(df['DRAM Write'].iloc[0])
    return stats

sim_sweep_columns = ['N', 'M',
        'Max Precision (bits)', 'Min Precision (bits)',
        'Network', 'Layer',
        'Cycles', 'Memory wait cycles',
        'WBUF Read', 'WBUF Write',
        'OBUF Read', 'OBUF Write',
        'IBUF Read', 'IBUF Write',
        'DRAM Read', 'DRAM Write',
        'Bandwidth (bits/cycle)',
        'WBUF Size (bits)', 'OBUF Size (bits)', 'IBUF Size (bits)',
        'Batch size']

batch_size = 64

list_bench = [
    'llama7b',
    'llama13b',
    'llama30b',
    'llama65b',
    'opt6b',
    'opt13b',
    'opt30b',
    'opt66b',
]

results_dir = './results'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

In [2]:
# ANT configuration file
config_file = 'conf_ant.ini'
# Create simulator object
bf_e_sim = Simulator(config_file, False)
bf_e_sim_sweep_csv = os.path.join(results_dir, 'ant_os.csv')
bf_e_sim_sweep_df = pandas.DataFrame(columns=sim_sweep_columns)
# TODO: use list bench to only call OPT and LLAMA
bf_e_results = check_pandas_or_run(bf_e_sim, bf_e_sim_sweep_df, bf_e_sim_sweep_csv, 
                                   batch_size=batch_size, bench_type='ant', list_bench=list_bench)
bf_e_results = bf_e_results.groupby('Network',as_index=False).agg(np.sum)
bf_e_cycles_ant = []
bf_e_energy_ant = []
for name in list_bench:
    bf_e_stats = df_to_stats(bf_e_results.loc[bf_e_results['Network'] == name])
    bf_e_cycles_ant.append(bf_e_stats.total_cycles)
    bf_e_energy_ant.append(bf_e_stats.get_energy_breakdown(bf_e_sim.get_energy_cost()))

INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama7b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024


No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')


INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama13b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama30b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama65b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simul

In [3]:
# OLAceel configuration file
config_file = 'conf_olaccel.ini'
# Create simulator object
bf_e_sim = Simulator(config_file, False)
bf_e_sim_sweep_csv = os.path.join(results_dir, 'olaceel.csv')
bf_e_sim_sweep_df = pandas.DataFrame(columns=sim_sweep_columns)
bf_e_results = check_pandas_or_run(bf_e_sim, bf_e_sim_sweep_df, bf_e_sim_sweep_csv, 
                                    batch_size=batch_size, bench_type='ola', list_bench=list_bench)
bf_e_results = bf_e_results.groupby('Network',as_index=False).agg(np.sum)
# area_stats = bf_e_sim.get_area()
bf_e_cycles_ola = []
bf_e_energy_ola = []
for name in list_bench:
    bf_e_stats = df_to_stats(bf_e_results.loc[bf_e_results['Network'] == name])
    bf_e_cycles_ola.append(bf_e_stats.total_cycles)
    bf_e_energy_ola.append(bf_e_stats.get_energy_breakdown(bf_e_sim.get_energy_cost()))

INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama7b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024


No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')


INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama13b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama30b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama65b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simul

In [4]:
# Oltron configuration file
config_file = 'conf_oltron.ini'
# Create simulator object
bf_e_sim = Simulator(config_file, False)
bf_e_sim_sweep_csv = os.path.join(results_dir, 'oltron.csv')
bf_e_sim_sweep_df = pandas.DataFrame(columns=sim_sweep_columns)
# TODO: use list bench to only call OPT and LLAMA
bf_e_results = check_pandas_or_run(bf_e_sim, bf_e_sim_sweep_df, bf_e_sim_sweep_csv, 
                                   batch_size=batch_size, bench_type='oltron', list_bench=list_bench)
bf_e_results = bf_e_results.groupby('Network',as_index=False).agg(np.sum)
bf_e_cycles_oltron = []
bf_e_energy_oltron = []
for name in list_bench:
    bf_e_stats = df_to_stats(bf_e_results.loc[bf_e_results['Network'] == name])
    bf_e_cycles_oltron.append(bf_e_stats.total_cycles)
    bf_e_energy_oltron.append(bf_e_stats.get_energy_breakdown(bf_e_sim.get_energy_cost(), act_cost_alpha=1+1/64))

INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama7b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024


No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')


INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama13b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama30b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama65b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simul

In [16]:
import numpy  as np
from copy import deepcopy

all_cycles = {
    'ant': bf_e_cycles_ant,
    'olaccel': bf_e_cycles_ola,
    'oltron': bf_e_cycles_oltron, 
}

def get_speedup(cycles, baseline='olaccel'):
    speedup = {k: [] for k in cycles.keys()}
    
    for i in range(len(list_bench)):
        baseline_cycle = cycles[baseline][i]
        for k in cycles.keys():
            speedup[k].append(cycles[k][i] / baseline_cycle)
    return speedup

all_speedup = get_speedup(all_cycles)
all_speedup_geomean = {k: np.mean(v) for k, v in all_speedup.items()}

In [22]:
all_energy = {
    'ant': bf_e_energy_ant,
    'olaccel': bf_e_energy_ola,
    'oltron': bf_e_energy_oltron,
}

def get_relative_energy(energy, baseline='olaccel'):
    relative_energy = {k: [] for k in energy.keys()}
    relative_energy_breakdown = [{k: [] for k in energy.keys()} for _ in range(4)]

    for model in range(len(list_bench)):
        baseline_energy = energy[baseline][model]
        total_baseline_energy = np.sum(baseline_energy)
        for arch in energy:
            target_energy = deepcopy(energy[arch][model])
            for i in range(len(target_energy)):
                target_energy[i] /= total_baseline_energy
                relative_energy_breakdown[i][arch].append(target_energy[i])
            relative_energy[arch].append(target_energy)
    
    # average breakdown
    for i in range(4):
        for arch in relative_energy_breakdown[i]:
            relative_energy_breakdown[i][arch] = np.mean(relative_energy_breakdown[i][arch])

    return relative_energy, relative_energy_breakdown

relative_energy, relative_energy_geomean = get_relative_energy(all_energy)

In [23]:
all_speedup

{'ant': [0.27782270832170214,
  0.2758620684556964,
  0.2724511883493292,
  0.2785243873767876,
  0.2807016991903762,
  0.27745663804850884,
  0.2774566426282647,
  0.27745664451293794],
 'olaccel': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 'oltron': [0.27782270832170214,
  0.2758620684556964,
  0.2724511883493292,
  0.2785243873767876,
  0.2807016991903762,
  0.27745663804850884,
  0.2774566426282647,
  0.27745664451293794]}

In [24]:
all_speedup_geomean

{'ant': 0.2772164971104504, 'olaccel': 1.0, 'oltron': 0.2772164971104504}

In [25]:
relative_energy

{'ant': [[0.16040413053744842,
   0.044699824872393476,
   0.19043593680161822,
   0.1166871056533474],
  [0.15991073222252117,
   0.054540914727922225,
   0.1904980798578481,
   0.11632818097927933],
  [0.15821535945175913,
   0.05364578123233511,
   0.1884746067383375,
   0.11509487067597411],
  [0.16994652865154142,
   0.057410556622194645,
   0.20244684151596284,
   0.12362879207857531],
  [0.16331830047558846,
   0.0467846802479295,
   0.19398769997720394,
   0.11880703892520654],
  [0.16187572368959396,
   0.05503186225948917,
   0.19283677256025236,
   0.11775762776010637],
  [0.161933211174512,
   0.05470050414856955,
   0.1929010327510276,
   0.1177994480298286],
  [0.16946275726105733,
   0.05703994993758301,
   0.20186806079756056,
   0.12327686929556524]],
 'olaccel': [[0.5773614817393183,
   0.09930646189256873,
   0.20520548743586398,
   0.11812656893224883],
  [0.5796764053779395,
   0.09915201129321878,
   0.2025713675238646,
   0.11860021580497697],
  [0.58071084369395

In [26]:
relative_energy_geomean

[{'ant': 0.16313334293300272,
  'olaccel': 0.5884464019179909,
  'oltron': 0.16313334293300272},
 {'ant': 0.052981759256052086,
  'olaccel': 0.0868598566466188,
  'oltron': 0.052981759256052086},
 {'ant': 0.1941811288749764,
  'olaccel': 0.20429921418506353,
  'oltron': 0.19570920715172416},
 {'ant': 0.11867249167473537,
  'olaccel': 0.1203945272503268,
  'oltron': 0.2630047199877846}]

In [None]:
# print the results to results/oltron_res.csv

with open('./results/oltron_res.csv')