In [1]:
from re import I
import pandas
import configparser
import os
import numpy as np
import bitfusion.src.benchmarks.benchmarks as benchmarks
from bitfusion.src.simulator.stats import Stats
from bitfusion.src.simulator.simulator import Simulator
from bitfusion.src.sweep.sweep import SimulatorSweep, check_pandas_or_run
from bitfusion.src.utils.utils import *
from bitfusion.src.optimizer.optimizer import optimize_for_order, get_stats_fast

def df_to_stats(df):
    stats = Stats()
    stats.total_cycles = float(df['Cycles'].iloc[0])
    stats.mem_stall_cycles = float(df['Memory wait cycles'].iloc[0])
    stats.reads['act'] = float(df['IBUF Read'].iloc[0])
    stats.reads['out'] = float(df['OBUF Read'].iloc[0])
    stats.reads['wgt'] = float(df['WBUF Read'].iloc[0])
    stats.reads['dram'] = float(df['DRAM Read'].iloc[0])
    stats.writes['act'] = float(df['IBUF Write'].iloc[0])
    stats.writes['out'] = float(df['OBUF Write'].iloc[0])
    stats.writes['wgt'] = float(df['WBUF Write'].iloc[0])
    stats.writes['dram'] = float(df['DRAM Write'].iloc[0])
    return stats

sim_sweep_columns = ['N', 'M',
        'Max Precision (bits)', 'Min Precision (bits)',
        'Network', 'Layer',
        'Cycles', 'Memory wait cycles',
        'WBUF Read', 'WBUF Write',
        'OBUF Read', 'OBUF Write',
        'IBUF Read', 'IBUF Write',
        'DRAM Read', 'DRAM Write',
        'Bandwidth (bits/cycle)',
        'WBUF Size (bits)', 'OBUF Size (bits)', 'IBUF Size (bits)',
        'Batch size']

batch_size = 64

list_bench = [
    'llama7b',
    'llama13b',
    'llama30b',
    'llama65b',
    'opt6b',
    'opt13b',
    'opt30b',
    'opt66b',
]

results_dir = './results'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

In [3]:
# ANT configuration file
config_file = 'conf_ant.ini'
# Create simulator object
bf_e_sim = Simulator(config_file, False)
bf_e_sim_sweep_csv = os.path.join(results_dir, 'ant_os.csv')
bf_e_sim_sweep_df = pandas.DataFrame(columns=sim_sweep_columns)
# TODO: use list bench to only call OPT and LLAMA
bf_e_results = check_pandas_or_run(bf_e_sim, bf_e_sim_sweep_df, bf_e_sim_sweep_csv, 
                                   batch_size=batch_size, bench_type='ant', list_bench=list_bench)
bf_e_results = bf_e_results.groupby('Network',as_index=False).agg(np.sum)
bf_e_cycles_ant = []
bf_e_energy_ant = []
for name in list_bench:
    bf_e_stats = df_to_stats(bf_e_results.loc[bf_e_results['Network'] == name])
    bf_e_cycles_ant.append(bf_e_stats.total_cycles)
    bf_e_energy_ant.append(bf_e_stats.get_energy_breakdown(bf_e_sim.get_energy_cost()))

INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama7b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024


No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')


INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama13b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama30b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama65b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simul

In [4]:
bf_e_results

Unnamed: 0,Network,N,M,Max Precision (bits),Min Precision (bits),Layer,Cycles,Memory wait cycles,WBUF Read,WBUF Write,...,OBUF Write,IBUF Read,IBUF Write,DRAM Read,DRAM Write,Bandwidth (bits/cycle),WBUF Size (bits),OBUF Size (bits),IBUF Size (bits),Batch size
0,llama13b,128,128,32,16,fc0/MatMulfc1/MatMulfc2/MatMulfc3/MatMul,649613803648,128,10643272556871680,166301133701120,...,665204534804480,10643272556871680,166301133701120,332602267402240,7146825580544,4096,524288,524288,1048576,256
1,llama30b,128,128,32,16,fc0/MatMulfc1/MatMulfc2/MatMulfc3/MatMul,1095753531520,128,17952825858326528,280512904036352,...,1122051616145408,17952825858326528,280512904036352,561025808072704,9277129359360,4096,524288,524288,1048576,256
2,llama65b,128,128,32,16,fc0/MatMulfc1/MatMulfc2/MatMulfc3/MatMul,1657857376384,128,27162335252578304,424411488321536,...,1697645953286144,27162335252578304,424411488321536,848822976643072,11407433138176,4096,524288,524288,1048576,256
3,llama7b,128,128,32,16,fc0/MatMulfc1/MatMulfc2/MatMulfc3/MatMul,414464344240,176,6790583813144576,64871186038784,...,424411488321536,6790583813144576,106102872080384,170974058119168,5703716569088,4096,524288,524288,1048576,256
4,opt13b,128,128,32,16,fc0/MatMulfc1/MatMulfc2/MatMulfc3/MatMul,644245094528,128,10555311626649600,164926744166400,...,659706976665600,10555311626649600,164926744166400,329853488332800,6184752906240,4096,524288,524288,1048576,256
5,opt30b,128,128,32,16,fc0/MatMulfc1/MatMulfc2/MatMulfc3/MatMul,1262720385152,128,20688410788233216,323256418566144,...,1293025674264576,20688410788233216,323256418566144,646512837132288,8658654068736,4096,524288,524288,1048576,256
6,opt66b,128,128,32,16,fc0/MatMulfc1/MatMulfc2/MatMulfc3/MatMul,2087354105984,128,34199209670344704,534362651099136,...,2137450604396544,34199209670344704,534362651099136,1068725302198272,11132555231232,4096,524288,524288,1048576,256
7,opt6b,128,128,32,16,fc0/MatMulfc1/MatMulfc2/MatMulfc3/MatMul,412316860592,176,6755399441055744,70368744177664,...,422212465065984,6755399441055744,105553116266496,175921860444160,4947802324992,4096,524288,524288,1048576,256
