In [1]:
from re import I
import pandas
import configparser
import os
import numpy as np
import bitfusion.src.benchmarks.benchmarks as benchmarks
from bitfusion.src.simulator.stats import Stats
from bitfusion.src.simulator.simulator import Simulator
from bitfusion.src.sweep.sweep import SimulatorSweep, check_pandas_or_run
from bitfusion.src.utils.utils import *
from bitfusion.src.optimizer.optimizer import optimize_for_order, get_stats_fast

def df_to_stats(df):
    stats = Stats()
    stats.total_cycles = float(df['Cycles'].iloc[0])
    stats.mem_stall_cycles = float(df['Memory wait cycles'].iloc[0])
    stats.reads['act'] = float(df['IBUF Read'].iloc[0])
    stats.reads['out'] = float(df['OBUF Read'].iloc[0])
    stats.reads['wgt'] = float(df['WBUF Read'].iloc[0])
    stats.reads['dram'] = float(df['DRAM Read'].iloc[0])
    stats.writes['act'] = float(df['IBUF Write'].iloc[0])
    stats.writes['out'] = float(df['OBUF Write'].iloc[0])
    stats.writes['wgt'] = float(df['WBUF Write'].iloc[0])
    stats.writes['dram'] = float(df['DRAM Write'].iloc[0])
    return stats

sim_sweep_columns = ['N', 'M',
        'Max Precision (bits)', 'Min Precision (bits)',
        'Network', 'Layer',
        'Cycles', 'Memory wait cycles',
        'WBUF Read', 'WBUF Write',
        'OBUF Read', 'OBUF Write',
        'IBUF Read', 'IBUF Write',
        'DRAM Read', 'DRAM Write',
        'Bandwidth (bits/cycle)',
        'WBUF Size (bits)', 'OBUF Size (bits)', 'IBUF Size (bits)',
        'Batch size']

batch_size = 64

list_bench = [
    'llama7b',
    'llama13b',
    'llama30b',
    'llama65b',
    'opt6b',
    'opt13b',
    'opt30b',
    'opt66b',
]

results_dir = './results'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

In [2]:
# ANT configuration file
config_file = 'conf_ant.ini'
# Create simulator object
bf_e_sim = Simulator(config_file, False)
bf_e_sim_sweep_csv = os.path.join(results_dir, 'ant_os.csv')
bf_e_sim_sweep_df = pandas.DataFrame(columns=sim_sweep_columns)
# TODO: use list bench to only call OPT and LLAMA
bf_e_results = check_pandas_or_run(bf_e_sim, bf_e_sim_sweep_df, bf_e_sim_sweep_csv, 
                                   batch_size=batch_size, bench_type='ant', list_bench=list_bench)
bf_e_results = bf_e_results.groupby('Network',as_index=False).agg(np.sum)
bf_e_cycles_ant = []
bf_e_energy_ant = []
for name in list_bench:
    bf_e_stats = df_to_stats(bf_e_results.loc[bf_e_results['Network'] == name])
    bf_e_cycles_ant.append(bf_e_stats.total_cycles)
    bf_e_energy_ant.append(bf_e_stats.get_energy_breakdown(bf_e_sim.get_energy_cost()))

INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama7b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024


No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')


INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama13b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama30b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama65b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simul

In [3]:
# OLAceel configuration file
config_file = 'conf_olaccel.ini'
# Create simulator object
bf_e_sim = Simulator(config_file, False)
bf_e_sim_sweep_csv = os.path.join(results_dir, 'olaceel.csv')
bf_e_sim_sweep_df = pandas.DataFrame(columns=sim_sweep_columns)
bf_e_results = check_pandas_or_run(bf_e_sim, bf_e_sim_sweep_df, bf_e_sim_sweep_csv, 
                                    batch_size=batch_size, bench_type='ola', list_bench=list_bench)
bf_e_results = bf_e_results.groupby('Network',as_index=False).agg(np.sum)
# area_stats = bf_e_sim.get_area()
bf_e_cycles_ola = []
bf_e_energy_ola = []
for name in list_bench:
    bf_e_stats = df_to_stats(bf_e_results.loc[bf_e_results['Network'] == name])
    bf_e_cycles_ola.append(bf_e_stats.total_cycles)
    bf_e_energy_ola.append(bf_e_stats.get_energy_breakdown(bf_e_sim.get_energy_cost()))

INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama7b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024


No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')


INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama13b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama30b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama65b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simul

In [4]:
# Oltron configuration file
config_file = 'conf_oltron.ini'
# Create simulator object
bf_e_sim = Simulator(config_file, False)
bf_e_sim_sweep_csv = os.path.join(results_dir, 'oltron.csv')
bf_e_sim_sweep_df = pandas.DataFrame(columns=sim_sweep_columns)
# TODO: use list bench to only call OPT and LLAMA
bf_e_results = check_pandas_or_run(bf_e_sim, bf_e_sim_sweep_df, bf_e_sim_sweep_csv, 
                                   batch_size=batch_size, bench_type='oltron', list_bench=list_bench)
bf_e_results = bf_e_results.groupby('Network',as_index=False).agg(np.sum)
bf_e_cycles_oltron = []
bf_e_energy_oltron = []
for name in list_bench:
    bf_e_stats = df_to_stats(bf_e_results.loc[bf_e_results['Network'] == name])
    bf_e_cycles_oltron.append(bf_e_stats.total_cycles)
    bf_e_energy_oltron.append(bf_e_stats.get_energy_breakdown(bf_e_sim.get_energy_cost()))

INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama7b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024


No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')


INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama13b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama30b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama65b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simul

In [5]:
bf_e_cycles_ant

[6476005552.0,
 10150215808.0,
 17121149056.0,
 25904021632.0,
 6442451120.0,
 10066329728.0,
 19730006144.0,
 32614908032.0]

In [6]:
bf_e_cycles_ola

[23309849620.0,
 36794532372.0,
 62841161236.0,
 93004500884.0,
 22951236628.0,
 36280731284.0,
 71110231700.0,
 117549565588.0]

In [7]:
bf_e_cycles_oltron

[6476005552.0,
 10150215808.0,
 17121149056.0,
 25904021632.0,
 6442451120.0,
 10066329728.0,
 19730006144.0,
 32614908032.0]

In [8]:
bf_e_energy_ant

[[6276738861.16496, 1749139046.006784, 7451906887.185859, 4566057546.007118],
 [9837893667.587841, 3355420315.877376, 11719662761.054047, 7156644579.836952],
 [16594331299.54688, 5626608375.3000965, 19768055874.00262, 12071662457.38591],
 [25106954886.38336, 8481516313.1166725, 29908370339.55492, 18264230108.16059],
 [6244216899.0376005,
  1788738250.211328,
  7416812879.3634405,
  4542399216.873308],
 [9756588762.26944, 3316885550.53056, 11622675996.318106, 7097498757.002428],
 [19122913854.94912,
  6459657170.190336,
  22779946158.501392,
  13911097547.539612],
 [31611347311.855362,
  10640153018.105854,
  37656187614.20243,
  22995895934.922523]]

In [9]:
bf_e_energy_ola

[[22592605547.1926, 3885939385.9584, 8029850934.619668, 4622384867.9509325],
 [35662364610.91356,
  6099946704.470016,
  12462425417.604137,
  7296422796.8059435],
 [60907538704.76828,
  10356705883.324417,
  21158690928.404724,
  12461516644.461063],
 [90142752391.79932, 8693570800.18944, 30455153203.200653, 18442961522.414055],
 [22245027076.95644, 3579082784.636928, 7858037186.447279, 4551271257.527235],
 [35164373182.39132, 5630821411.06176, 12282364041.992601, 7194534918.114342],
 [68922169870.591, 10994971456.831488, 24072934728.02341, 14101288379.615902],
 [113932565454.85724, 10947974047.727615, 38347778243.6905, 23310292994.95131]]

In [10]:
bf_e_energy_oltron

[[6276738861.16496, 1749139046.006784, 7451906887.185859, 10119402295.79604],
 [9837893667.587841,
  3355420315.877376,
  11719662761.054047,
  15860721193.543793],
 [16594331299.54688,
  5626608375.3000965,
  19768055874.00262,
  26753497471.611042],
 [25106954886.38336, 8481516313.1166725, 29908370339.55492, 40477609099.93404],
 [6244216899.0376005,
  1788738250.211328,
  7416812879.3634405,
  10066970159.680191],
 [9756588762.26944, 3316885550.53056, 11622675996.318106, 15729640853.254175],
 [19122913854.94912,
  6459657170.190336,
  22779946158.501392,
  30830096054.618156],
 [31611347311.855362,
  10640153018.105854,
  37656187614.20243,
  50964036323.10347]]