In [1]:
from re import I
import pandas
import configparser
import os
import numpy as np
import bitfusion.src.benchmarks.benchmarks as benchmarks
from bitfusion.src.simulator.stats import Stats
from bitfusion.src.simulator.simulator import Simulator
from bitfusion.src.sweep.sweep import SimulatorSweep, check_pandas_or_run
from bitfusion.src.utils.utils import *
from bitfusion.src.optimizer.optimizer import optimize_for_order, get_stats_fast

def df_to_stats(df):
    stats = Stats()
    stats.total_cycles = float(df['Cycles'].iloc[0])
    stats.mem_stall_cycles = float(df['Memory wait cycles'].iloc[0])
    stats.reads['act'] = float(df['IBUF Read'].iloc[0])
    stats.reads['out'] = float(df['OBUF Read'].iloc[0])
    stats.reads['wgt'] = float(df['WBUF Read'].iloc[0])
    stats.reads['dram'] = float(df['DRAM Read'].iloc[0])
    stats.writes['act'] = float(df['IBUF Write'].iloc[0])
    stats.writes['out'] = float(df['OBUF Write'].iloc[0])
    stats.writes['wgt'] = float(df['WBUF Write'].iloc[0])
    stats.writes['dram'] = float(df['DRAM Write'].iloc[0])
    return stats

sim_sweep_columns = ['N', 'M',
        'Max Precision (bits)', 'Min Precision (bits)',
        'Network', 'Layer',
        'Cycles', 'Memory wait cycles',
        'WBUF Read', 'WBUF Write',
        'OBUF Read', 'OBUF Write',
        'IBUF Read', 'IBUF Write',
        'DRAM Read', 'DRAM Write',
        'Bandwidth (bits/cycle)',
        'WBUF Size (bits)', 'OBUF Size (bits)', 'IBUF Size (bits)',
        'Batch size']

batch_size = 64

list_bench = [
    'llama7b',
    'llama13b',
    'llama30b',
    'llama65b',
    'opt6b',
    'opt13b',
    'opt30b',
    'opt66b',
]

results_dir = './results'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

In [2]:
# ANT configuration file
config_file = 'conf_ant.ini'
# Create simulator object
bf_e_sim = Simulator(config_file, False)
bf_e_sim_sweep_csv = os.path.join(results_dir, 'ant_os.csv')
bf_e_sim_sweep_df = pandas.DataFrame(columns=sim_sweep_columns)
# TODO: use list bench to only call OPT and LLAMA
bf_e_results = check_pandas_or_run(bf_e_sim, bf_e_sim_sweep_df, bf_e_sim_sweep_csv, 
                                   batch_size=batch_size, bench_type='ant', list_bench=list_bench)
bf_e_results = bf_e_results.groupby('Network',as_index=False).agg(np.sum)
bf_e_cycles_ant = []
bf_e_energy_ant = []
for name in list_bench:
    bf_e_stats = df_to_stats(bf_e_results.loc[bf_e_results['Network'] == name])
    bf_e_cycles_ant.append(bf_e_stats.total_cycles)
    bf_e_energy_ant.append(bf_e_stats.get_energy_breakdown(bf_e_sim.get_energy_cost()))

INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama7b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024


No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')


INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama13b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama30b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama65b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 32 x 32
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simul

In [3]:
# OLAceel configuration file
config_file = 'conf_olaccel.ini'
# Create simulator object
bf_e_sim = Simulator(config_file, False)
bf_e_sim_sweep_csv = os.path.join(results_dir, 'olaceel.csv')
bf_e_sim_sweep_df = pandas.DataFrame(columns=sim_sweep_columns)
bf_e_results = check_pandas_or_run(bf_e_sim, bf_e_sim_sweep_df, bf_e_sim_sweep_csv, 
                                    batch_size=batch_size, bench_type='ola', list_bench=list_bench)
bf_e_results = bf_e_results.groupby('Network',as_index=False).agg(np.sum)
# area_stats = bf_e_sim.get_area()
bf_e_cycles_ola = []
bf_e_energy_ola = []
for name in list_bench:
    bf_e_stats = df_to_stats(bf_e_results.loc[bf_e_results['Network'] == name])
    bf_e_cycles_ola.append(bf_e_stats.total_cycles)
    bf_e_energy_ola.append(bf_e_stats.get_energy_breakdown(bf_e_sim.get_energy_cost()))

INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama7b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024


No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')


INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama13b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama30b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama65b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 16 x 18
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 8
INFO:bitfusion.src.sweep.sweep.Simul

In [4]:
# Oltron configuration file
config_file = 'conf_oltron.ini'
# Create simulator object
bf_e_sim = Simulator(config_file, False)
bf_e_sim_sweep_csv = os.path.join(results_dir, 'oltron.csv')
bf_e_sim_sweep_df = pandas.DataFrame(columns=sim_sweep_columns)
# TODO: use list bench to only call OPT and LLAMA
bf_e_results = check_pandas_or_run(bf_e_sim, bf_e_sim_sweep_df, bf_e_sim_sweep_csv, 
                                   batch_size=batch_size, bench_type='oltron', list_bench=list_bench)
bf_e_results = bf_e_results.groupby('Network',as_index=False).agg(np.sum)
bf_e_cycles_oltron = []
bf_e_energy_oltron = []
for name in list_bench:
    bf_e_stats = df_to_stats(bf_e_results.loc[bf_e_results['Network'] == name])
    bf_e_cycles_oltron.append(bf_e_stats.total_cycles)
    bf_e_energy_oltron.append(bf_e_stats.get_energy_breakdown(bf_e_sim.get_energy_cost(), act_cost_alpha=1+1/64))

INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama7b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024


No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')
No entry found in /home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti_sweep.csv, running cacti
('/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/cacti/cacti', '-infile', '/home/xuechenhao/ANT-Quantization/ant_simulator/bitfusion/sram/sweep.cfg')


INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama13b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama30b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Min Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simulator:Batch size: 64
INFO:bitfusion.src.sweep.sweep.Simulator:Bandwidth (bits/cycle): 1024
INFO:bitfusion.src.sweep.sweep.Simulator:Simulating Benchmark: llama65b
INFO:bitfusion.src.sweep.sweep.Simulator:N x M = 64 x 64
INFO:bitfusion.src.sweep.sweep.Simulator:Max Precision (bits): 4
INFO:bitfusion.src.sweep.sweep.Simul

In [5]:
import numpy  as np
from copy import deepcopy

all_cycles = {
    'ant': bf_e_cycles_ant,
    'olaccel': bf_e_cycles_ola,
    'oltron': bf_e_cycles_oltron, 
}

def get_speedup(cycles, baseline='olaccel'):
    speedup = {k: [] for k in cycles.keys()}
    
    for i in range(len(list_bench)):
        baseline_cycle = cycles[baseline][i]
        for k in cycles.keys():
            speedup[k].append(cycles[k][i] / baseline_cycle)
    return speedup

all_speedup = get_speedup(all_cycles)
all_speedup_geomean = {k: np.mean(v) for k, v in all_speedup.items()}

In [6]:
all_energy = {
    'ant': bf_e_energy_ant,
    'olaccel': bf_e_energy_ola,
    'oltron': bf_e_energy_oltron,
}

def get_relative_energy(energy, baseline='olaccel'):
    relative_energy = {k: [] for k in energy.keys()}
    relative_energy_breakdown = [{k: [] for k in energy.keys()} for _ in range(4)]

    for model in range(len(list_bench)):
        baseline_energy = energy[baseline][model]
        total_baseline_energy = np.sum(baseline_energy)
        for arch in energy:
            target_energy = deepcopy(energy[arch][model])
            for i in range(len(target_energy)):
                target_energy[i] /= total_baseline_energy
                relative_energy_breakdown[i][arch].append(target_energy[i])
            relative_energy[arch].append(target_energy)
    
    # average breakdown
    for i in range(4):
        for arch in relative_energy_breakdown[i]:
            relative_energy_breakdown[i][arch] = np.mean(relative_energy_breakdown[i][arch])

    return relative_energy, relative_energy_breakdown

relative_energy, relative_energy_geomean = get_relative_energy(all_energy)

In [7]:
all_speedup

{'ant': [0.27782270832170214,
  0.2758620684556964,
  0.2724511883493292,
  0.2785243873767876,
  0.2807016991903762,
  0.27745663804850884,
  0.2774566426282647,
  0.27745664451293794],
 'olaccel': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 'oltron': [0.27782270832170214,
  0.2758620684556964,
  0.2724511883493292,
  0.2785243873767876,
  0.2807016991903762,
  0.27745663804850884,
  0.2774566426282647,
  0.27745664451293794]}

In [8]:
all_speedup_geomean

{'ant': 0.2772164971104504, 'olaccel': 1.0, 'oltron': 0.2772164971104504}

In [9]:
relative_energy

{'ant': [[0.10649320409748289,
   0.02967646504680379,
   0.1264314891228778,
   0.4094680398830818],
  [0.10602275485580102,
   0.036161287935089895,
   0.12630253730040114,
   0.40765916042460537],
  [0.10483565869173352,
   0.03554642754668378,
   0.12488584934197648,
   0.4030947588288875],
  [0.1107141330361522,
   0.037400940483944135,
   0.1318869336266601,
   0.4256975862237078],
  [0.10814715489187542,
   0.030980178256851072,
   0.12845601365836612,
   0.41582750657428325],
  [0.10709206285125891,
   0.036407408829356745,
   0.1275749525398663,
   0.4117706664517548],
  [0.10711722048216937,
   0.036183843455396124,
   0.12760212872059884,
   0.41186740020788454],
  [0.11036086291878536,
   0.03714667575164356,
   0.1314644807238728,
   0.4243392574371035]],
 'olaccel': [[0.38331353380289596,
   0.0659301183771901,
   0.13623707682720215,
   0.4145192709927118],
  [0.3843324870625638,
   0.06573898599984042,
   0.13430727344741852,
   0.4156212534901772],
  [0.384786938632531

In [10]:
relative_energy_geomean

[{'ant': 0.10759788147815733,
  'olaccel': 0.3881268343003825,
  'oltron': 0.10759788147815733},
 {'ant': 0.03493790341322114,
  'olaccel': 0.057379782820029,
  'oltron': 0.03493790341322114},
 {'ant': 0.12807554812932745,
  'olaccel': 0.13476890460818047,
  'oltron': 0.1290834229288509},
 {'ant': 0.41371554700391355,
  'olaccel': 0.419724478271408,
  'oltron': 0.17347005939811094}]

In [11]:
# print the results to results/oltron_res.csv
arch_list = ['oltron', 'ant', 'olaccel']

with open('./results/oltron_res.csv', 'w') as f:
    # performance title
    f.write(', ')
    for model in list_bench:
        f.write(model + ', ' * len(arch_list)) 
    f.write('Geomean'+ ', ' * len(arch_list))
    f.write('\n')

    # arch names
    f.write('Arch, ')
    for m in range(len(list_bench) + 1):
        for arch in arch_list:
            f.write(arch + ', ')
    f.write('\n')

    # performance
    f.write('Time, ')
    for m in range(len(list_bench)):
        for arch in arch_list:
            f.write(f"{all_speedup[arch][m]:.4f}, ")
    for arch in arch_list:
        f.write(f"{all_speedup_geomean[arch]:.4f}, ")
    f.write('\n')

    # performance title
    f.write(', ')
    for model in list_bench:
        f.write(model + ', ' * len(arch_list)) 
    f.write('Geomean'+ ', ' * len(arch_list))
    f.write('\n')

    # arch names
    f.write('Arch, ')
    for m in range(len(list_bench) + 1):
        for arch in arch_list:
            f.write(arch + ', ')
    f.write('\n')

    energy_titles = [
        "Static",
        "Dram",
        "Buffer",
        'Core',
    ]
    for t in range(len(energy_titles)):
        f.write(energy_titles[t] + ', ')
        for m in range(len(list_bench)):
            for arch in arch_list:
                f.write(f"{relative_energy[arch][m][t]:.4f}, ")
        for arch in arch_list:
            f.write(f"{relative_energy_geomean[t][arch]:.4f}, ")
        f.write('\n')
    f.write('\n')


    

