In [23]:
import polars as pl
import numpy as np
from tqdm.auto import tqdm
import glob
import altair as alt
import vegafusion as vf
vf.enable(row_limit=100000)
pl.Config.set_fmt_str_lengths(100)

polars.config.Config

In [24]:
BASE_DIR = '/home/as8319/hpc_experiments/spec2006_test/spec2006_test/xalancbmk'

In [25]:
def build_dataset(path):
    
    # Read simpoint files
    with open(f'{path}/input/m5out/simpoint_weights.txt', 'r') as weights_file, \
        open(f'{path}/input/m5out/simpoints.txt', 'r') as intervals_file:

        lines = weights_file.readlines()

        intervals = np.empty(len(lines))
        weights   = np.empty(len(lines))
            
#             if len(lines) <= 2:
#                 continue

        for i, line in enumerate(lines):
            weight, _ = line.split(' ')
            weights[i] = float(weight)

        lines = intervals_file.readlines()
        for i, line in enumerate(lines):
            interval, _ = line.split(' ')
            intervals[i] = int(interval)

    # read dfs
    dfs= []
    for df_idx in range(len(intervals)):
        dfs.append(pl.read_parquet(f'{path}/input/m5out/trace_{df_idx}.parquet'))

    # Sort by simpoint intervals
    # Traces are saved and labeled in order of their occurence
    # i.e. trace_0 corresponds to the lowest interval
    sort_idxs = np.argsort(intervals)
    for df_idx, simpoint_idx in enumerate(sort_idxs):
        dfs[df_idx] = dfs[df_idx].with_columns([
            pl.lit(weights[simpoint_idx]).alias('simpoint_weight'), 
            pl.lit(df_idx).alias('checkpoint')
        ])
    
    df = pl.concat(dfs)
    
    return df

In [26]:
# Obj alloc context
benches = glob.glob(f'{BASE_DIR}/*')

op = []

for p in tqdm(benches):
    df = build_dataset(p)
    
    op.append(df.lazy().groupby([
        pl.col('inst_addr'),
        pl.col('alloc_context'),
        pl.col('checkpoint'),
        pl.col('simpoint_weight'),
        pl.col('virtual'),
        pl.col('return')
    ]).agg(
        pl.col('taken').cast(pl.UInt32).sum(),
        pl.col('mispredicted').cast(pl.UInt32).sum(),
        pl.count()
    ).groupby([
        pl.col('inst_addr'),
        pl.col('alloc_context'),
        pl.col('virtual'),
        pl.col('return')
    ]).agg([
        (pl.col('taken') * pl.col('simpoint_weight')).sum(),
        (pl.col('mispredicted') * pl.col('simpoint_weight')).sum(),
        (pl.col('count') * pl.col('simpoint_weight')).sum()
    ]).with_columns([
        (pl.col('taken') / pl.col('count')).alias('taken rate'),
        (pl.col('mispredicted') / pl.col('count')).alias('misprediction rate'),
        pl.col('count').sum().alias('total count')
    ]).filter(
        (pl.min(pl.col('taken rate'), 1 - pl.col('taken rate')) < pl.col('misprediction rate')) & 
        (pl.col('alloc_context') != 0) # & pl.col('virtual') & ~pl.col('return')
    ).select([
        100 * ((pl.col('misprediction rate') - pl.min(pl.col('taken rate'), 1 - pl.col('taken rate'))) * pl.col('count') / pl.col('total count')).sum(),
    ]).collect(streaming=True)['literal'][0])
    
    
    del df

print(np.mean(op))


  0%|          | 0/1 [00:00<?, ?it/s]

0.7969748162573852


In [4]:
benches = glob.glob(f'{BASE_DIR}/*')

mp = []
op = []

for p in tqdm(benches):
    df = build_dataset(p)
    
    mp.append(df.lazy().groupby([
        pl.col('checkpoint'),
        pl.col('simpoint_weight')
    ]).agg([
        pl.col('mispredicted').cast(pl.UInt32).sum(),
        pl.count()
    ]).select([
        pl.col('mispredicted') * pl.col('simpoint_weight'),
        pl.col('count') * pl.col('simpoint_weight')
    ]).select(
        pl.col('mispredicted').sum() / pl.col('count').sum()
    ).collect(streaming=True)['mispredicted'][0])
    
    op.append(df.lazy().with_columns([
#     pl.col('inst_rel_addr').str.split('+').list.first().alias('function'),
        pl.col('ras_rel').list.get(0).alias('ras top'),
    ]).groupby([
        pl.col('inst_rel_addr'),
        pl.col('ras top'),
        pl.col('checkpoint'),
        pl.col('simpoint_weight')
    ]).agg(
        pl.col('taken').cast(pl.UInt32).sum(),
        pl.col('mispredicted').cast(pl.UInt32).sum(),
        pl.count()
    ).groupby([
        pl.col('inst_rel_addr'),
        pl.col('ras top')
    ]).agg([
        (pl.col('taken') * pl.col('simpoint_weight')).sum(),
        (pl.col('mispredicted') * pl.col('simpoint_weight')).sum(),
        (pl.col('count') * pl.col('simpoint_weight')).sum()
    ]).with_columns([
        (pl.col('taken') / pl.col('count')).alias('taken rate'),
        (pl.col('mispredicted') / pl.col('count')).alias('misprediction rate'),
        pl.col('count').sum().alias('total count')
    ]).filter(
        pl.min(pl.col('taken rate'), 1 - pl.col('taken rate')) < pl.col('misprediction rate')
    ).select([
        100 * ((pl.col('misprediction rate') - pl.min(pl.col('taken rate'), 1 - pl.col('taken rate'))) * pl.col('count') / pl.col('total count')).sum(),
    ]).collect(streaming=True)['literal'][0])
    
    
    del df
print(np.mean(mp)*100)
print(np.mean(op))


  0%|          | 0/80 [00:00<?, ?it/s]

5.956669724522444
2.7914588879880133


In [None]:
print(df.select(pl.count()))

In [5]:
df.head(3)

tick,disassembly,inst_addr,inst_rel_addr,pred_addr,pred_rel_addr,jump_addr,jump_rel_addr,pred_taken,mispredicted,ras,regs,virtual,return,call,taken,ras_rel,alloc_context,simpoint_weight,checkpoint
u64,str,u64,str,u64,str,u64,str,bool,bool,list[u64],struct[15],bool,bool,bool,bool,list[str],u64,f64,i32
421886216000,""" tbnz w8, #0x1, <propagate_block+808>""",6867204,"""propagate_block+31c""",6867216,"""propagate_block+328""",6867216,"""propagate_block+328""",True,False,[6859904],"{274874435168,274875166192,0,274875421312,274875421312,4,7,6,0,17179869184,0,8,48,11,0}",False,False,False,True,"[""update_life_info+43c""]",0,0.0683995,0
421886216000,""" b <propagate_block+812>""",6867216,"""propagate_block+328""",6867220,"""propagate_block+32c""",6867220,"""propagate_block+32c""",False,False,[6859904],"{274874435168,274875166192,0,274875421312,274875421312,4,7,6,1,17179869184,0,8,48,11,0}",False,False,False,False,"[""update_life_info+43c""]",0,0.0683995,0
421886217000,""" b <propagate_block+616>""",6867228,"""propagate_block+334""",6867024,"""propagate_block+268""",6867024,"""propagate_block+268""",True,False,[6859904],"{274874435168,274875166192,0,274875421312,274875421312,4,7,6,1,17179869184,0,8,48,11,0}",False,False,False,True,"[""update_life_info+43c""]",0,0.0683995,0


In [4]:
# Global Misprediction Rate
df.lazy().groupby([
    pl.col('checkpoint'),
    pl.col('simpoint_weight')
]).agg([
    pl.col('mispredicted').cast(pl.UInt32).sum(),
    pl.count()
]).select([
    pl.col('mispredicted') * pl.col('simpoint_weight'),
    pl.col('count') * pl.col('simpoint_weight')
]).select(
    pl.col('mispredicted').sum() / pl.col('count').sum()
).collect(streaming=True)

mispredicted
f64
0.095476


In [7]:
# Misprediction Rate & Share of Mispredictions Per Branch
df_mp = df.groupby([
    pl.col('inst_rel_addr'),
    pl.col('checkpoint'),
    pl.col('simpoint_weight')
]).agg([
    pl.col('mispredicted').cast(pl.UInt32).sum(),
    pl.count()
]).select([
        pl.col('inst_rel_addr'), 
        pl.col('mispredicted') * pl.col('simpoint_weight'),
        pl.col('count') * pl.col('simpoint_weight')
]).groupby(
    pl.col('inst_rel_addr')
).agg([
    pl.sum('mispredicted'), 
    pl.sum('count')
]).select([
        pl.col('inst_rel_addr'),
        (pl.col('mispredicted') / pl.col('count')).alias('misprediction rate'), 
        (pl.col('mispredicted') / pl.col('mispredicted').sum()).alias('share of mispredictions')
]).sort(
    'share of mispredictions', 
    descending=True
)

# print(df_mp.head(10).to_pandas().to_latex(
#     float_format=lambda x : '{:.2f}%'.format(x * 100),
#     index=False
# ))
      

df_mp.head(10)

inst_rel_addr,misprediction rate,share of mispredictions
str,f64,f64
"""for_each_rtx+dc""",0.170418,0.027399
"""ggc_mark_rtx_children_1+4e8""",0.143121,0.014501
"""side_effects_p+50""",0.146908,0.013011
"""reload_cse_simplify_operands+4d8""",0.373482,0.012541
"""__memset_generic+b4""",0.368392,0.011242
"""reg_scan_mark_refs+74""",0.251329,0.01121
"""for_each_rtx+124""",0.097084,0.010565
"""record_reg_classes+7bc""",0.392279,0.007988
"""bitmap_clear+24""",0.286331,0.007671
"""side_effects_p+f4""",0.09969,0.007601


In [8]:
alt.Chart(df_mp).mark_circle().encode(
    alt.X("misprediction rate:Q"),
    alt.Y('share of mispredictions:Q'),
).configure(
    numberFormat='%'
)

In [9]:
# Misprediction Rate & Share of Mispredictions Per Function
# print(
df_mpf = df.groupby([
    pl.col('inst_rel_addr'),
    pl.col('checkpoint'),
    pl.col('simpoint_weight')
]).agg([
    pl.col('mispredicted').cast(pl.UInt32).sum(),
    pl.count()
]).select([
        pl.col('inst_rel_addr').str.split('+').list.first().alias('function'), 
        pl.col('mispredicted') * pl.col('simpoint_weight'),
        pl.col('count') * pl.col('simpoint_weight')
]).groupby(
    pl.col('function')
).agg([
    pl.sum('mispredicted'), 
    pl.sum('count')
]).select([
        pl.col('function'),
        (pl.col('mispredicted') / pl.col('count')).alias('misprediction rate'), 
        (pl.col('mispredicted') / pl.col('mispredicted').sum()).alias('share of mispredictions')
]).sort(
    'share of mispredictions', 
    descending=True
)
# .to_pandas().to_latex(
#     float_format=lambda x : '{:.2f}%'.format(x * 100),
#     index=False
# ))

alt.Chart(df_mpf).mark_circle().encode(
    alt.X("misprediction rate:Q"),
    alt.Y('share of mispredictions:Q'),
).configure(
    numberFormat='%'
)

In [10]:
# Function & Ras Pairs that could be improved

df.lazy().filter(
    pl.col('mispredicted')
).groupby([
    pl.col('inst_rel_addr'),
    pl.col('checkpoint'),
    pl.col('simpoint_weight')
]).agg(
    pl.col('taken').cast(pl.UInt32).sum(),
    pl.count()
).groupby([
    pl.col('inst_rel_addr')
]).agg([
    (pl.col('taken') * pl.col('simpoint_weight')).sum(),
    (pl.col('count') * pl.col('simpoint_weight')).sum(),
]).with_columns([
    pl.col('taken') / pl.col('count'),
    pl.col('count') / pl.col('count').sum()
]).sort(
    'count',
    descending=True
).collect()

inst_rel_addr,taken,count
str,f64,f64
"""for_each_rtx+dc""",0.509646,0.027399
"""ggc_mark_rtx_children_1+4e8""",0.626587,0.014501
"""side_effects_p+50""",0.071104,0.013011
"""reload_cse_simplify_operands+4d8""",0.62071,0.012541
"""__memset_generic+b4""",0.102924,0.011242
"""reg_scan_mark_refs+74""",0.057871,0.01121
"""for_each_rtx+124""",0.435797,0.010565
"""record_reg_classes+7bc""",0.055658,0.007988
"""bitmap_clear+24""",0.646618,0.007671
"""side_effects_p+f4""",0.714509,0.007601


In [11]:
%%time
# print(
df.lazy().with_columns([
    pl.col('inst_rel_addr').str.split('+').list.first().alias('function'),
    pl.col('ras_rel').list.get(0).alias('RAS Top Element'),
]).filter(
    pl.col('inst_rel_addr') == 'for_each_rtx+dc'
).groupby([
    pl.col('RAS Top Element'),
    pl.col('checkpoint'),
    pl.col('simpoint_weight')
]).agg(
    pl.col('taken').cast(pl.UInt32).sum(),
    pl.col('mispredicted').cast(pl.UInt32).sum(),
    pl.count()
).groupby([
    pl.col('RAS Top Element')
]).agg([
    (pl.col('taken') * pl.col('simpoint_weight')).sum(),
    (pl.col('mispredicted') * pl.col('simpoint_weight')).sum(),
    (pl.col('count') * pl.col('simpoint_weight')).sum()
]).select([
    pl.col('RAS Top Element'),
    (pl.col('taken') / pl.col('count')).alias('Taken Rate'),
    (pl.col('mispredicted') / pl.col('count')).alias('Misprediction Rate'),
    (pl.col('mispredicted') / pl.col('mispredicted').sum()).alias('Share of Mispredictions'),
]).sort(
    'Share of Mispredictions',
    descending=True
).collect(streaming=True)
# .to_pandas().to_latex(
#     float_format=lambda x : '{:.2f}%'.format(x * 100),
#     index=False
# ))

CPU times: user 83.5 ms, sys: 52.1 ms, total: 136 ms
Wall time: 89.4 ms


RAS Top Element,Taken Rate,Misprediction Rate,Share of Mispredictions
str,f64,f64,f64
"""for_each_rtx+148""",0.371938,0.213555,0.76243
"""inherently_necessary_register+24""",0.12384,0.080009,0.062653
"""for_each_rtx+1f4""",0.37479,0.22022,0.053723
"""ssa_eliminate_dead_code+5e4""",0.123313,0.055521,0.03287
"""approx_reg_cost+3c""",0.342087,0.164656,0.025429
"""rename_insn_1+3f8""",0.353087,0.131083,0.020216
"""returnjump_p+44""",0.333979,0.073886,0.013844
"""cse_basic_block+31c""",0.366316,0.214737,0.01118
"""rename_block+c4""",0.404446,0.113357,0.011084
"""rename_block+d8""",0.333249,0.182217,0.004646


In [5]:
df.lazy().with_columns([
#     pl.col('inst_rel_addr').str.split('+').list.first().alias('function'),
    pl.col('ras_rel').list.get(0).alias('ras top'),
]).groupby([
    pl.col('inst_rel_addr'),
    pl.col('ras top'),
    pl.col('checkpoint'),
    pl.col('simpoint_weight')
]).agg(
    pl.col('taken').cast(pl.UInt32).sum(),
    pl.col('mispredicted').cast(pl.UInt32).sum(),
    pl.count()
).groupby([
    pl.col('inst_rel_addr'),
    pl.col('ras top')
]).agg([
    (pl.col('taken') * pl.col('simpoint_weight')).sum(),
    (pl.col('mispredicted') * pl.col('simpoint_weight')).sum(),
    (pl.col('count') * pl.col('simpoint_weight')).sum()
]).with_columns([
    (pl.col('taken') / pl.col('count')).alias('taken rate'),
    (pl.col('mispredicted') / pl.col('count')).alias('misprediction rate'),
    pl.col('count').sum().alias('total count')
]).filter(
    pl.min(pl.col('taken rate'), 1 - pl.col('taken rate')) < pl.col('misprediction rate')
).select([
    100 * ((pl.col('misprediction rate') - pl.min(pl.col('taken rate'), 1 - pl.col('taken rate'))) * pl.col('count') / pl.col('total count')).sum(),
]).collect(streaming=True)


literal
f64
0.608201


In [13]:
df.filter(
    pl.col('inst_rel_addr') == 'find_reg_note+58'
).groupby([
    pl.col('ras_rel').list.first(),
    pl.col('checkpoint'),
    pl.col('simpoint_weight')
]).agg([
    pl.count().alias('occurences'),
    pl.col('mispredicted').cast(pl.UInt32).sum().alias('# mispredicted'),
    pl.col('taken').cast(pl.UInt32).sum().alias('# taken')
]).select([
    pl.col('ras_rel'),
    pl.col('occurences') * pl.col('simpoint_weight'),
    pl.col('# taken') * pl.col('simpoint_weight'),
    pl.col('# mispredicted') * pl.col('simpoint_weight')
]).groupby(
    pl.col('ras_rel')
).agg([
    pl.col('occurences').sum(),
    pl.col('# mispredicted').sum(),
    pl.col('# taken').sum(),
]).select([
    pl.col('ras_rel'),
    pl.col('occurences'),
    pl.col('# mispredicted'),
    (pl.col('# mispredicted') / pl.col('occurences')).alias('misprediction ratio'),
    (pl.col('# taken') / pl.col('occurences')).alias('taken ratio')
]).sort(
    '# mispredicted',
    descending=True
)


ras_rel,occurences,# mispredicted,misprediction ratio,taken ratio
str,f64,f64,f64,f64
"""init_alias_analysis+460""",246.766242,76.249035,0.308993,0.493608
"""propagate_one_insn+7c""",373.733179,40.87141,0.10936,0.86038
"""delete_trivially_dead_insns+1bc""",271.247751,36.145027,0.133255,0.841201
"""delete_trivially_dead_insns+100""",271.229967,33.027379,0.121769,0.841145
"""noop_move_p+50""",196.585379,21.868662,0.111243,0.885153
"""delete_insn+184""",98.626617,21.781141,0.220844,0.791015
"""reload+60c""",45.209251,16.120365,0.356572,0.49528
"""combine_predictions_for_insn+20""",39.274961,15.857727,0.403762,0.418809
"""find_reg_equal_equiv_note+b8""",182.46267,10.91657,0.059829,0.933333
"""purge_reg_equiv_notes+3c""",101.03003,9.47879,0.093822,0.913043
