In [5]:
import sys

import numpy as np
import pandas as pd
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt

import utils

import vegafusion as vf
vf.enable(row_limit=100000)

vegafusion.enable(mimetype='html', row_limit=100000, embed_options=None)

In [6]:
full_df = utils.load_data('../../m5out/trace_gcc.out.gz', 10000000)
print(full_df.dtypes)
print(f'Table has {len(full_df)} rows')

KeyboardInterrupt: 

In [3]:
full_df.head(3)

Unnamed: 0,tick,disassembly,inst_addr,inst_rel_addr,pred_addr,pred_rel_addr,pred_taken,mispredicted,ras,regs,ras_rel,taken
0,2256896,bl <__libc_start_main>,4199764,_start+48,5247984,__libc_start_main+0,0,0,[],"{'r0': 0, 'r1': 549755748219, 'r2': 0, 'r3': 0...",[],0
1,4530176,"cbz x7, <__libc_start_main+76>",5248044,__libc_start_main+3c,5248048,__libc_start_main+40,0,1,[4199764],"{'r0': 4195696, 'r1': 16, 'r2': 549755747448, ...",[_start+48],1
2,7356416,"cbnz x1, <__libc_start_main+120>",5248108,__libc_start_main+7c,5248112,__libc_start_main+80,0,0,[4199764],"{'r0': 4195696, 'r1': 0, 'r2': 549755747448, '...",[_start+48],0


In [3]:
# rates = full_df.groupby('inst_addr')['mispredicted'].mean().reset_index(name='aa')
# df = full_df[full_df['inst_addr'].isin(rates[rates['aa'] > 0.3]['inst_addr'])]

alt.Chart(full_df[['taken', 'inst_rel_addr']]).mark_circle().encode(
    x = 'mispred_rate:Q',
    y = alt.Y(
        'count:Q',
        scale=alt.Scale(type="log")  # Here the scale is applied
    ),
    tooltip='inst_rel_addr'
).transform_aggregate(
    mispred_rate = 'mean(taken)',
    count = 'count()',
    groupby=['inst_rel_addr']
)

In [4]:
data = full_df[['mispredicted','taken','inst_rel_addr']]

alt.Chart(data).mark_circle().encode(
    x = 'mispred_rate:Q',
    y = 'entropy:Q',
    color = alt.Color('count:Q', scale=alt.Scale(scheme='viridis')), #'count:Q',
    tooltip='inst_rel_addr'
).transform_aggregate(
    mispred_rate = 'mean(mispredicted)',
    avg_taken = 'mean(taken)',
    count = 'count()',
    groupby=['inst_rel_addr']
).transform_calculate(
    entropy = '0-log(datum.avg_taken)-log(1-datum.avg_taken)'
).interactive()

In [6]:
num_to_keep = 10

# branch_name = 'make_node+390' # Cloning Opportunity
branch_name = 'invalidate+114'

df = full_df[full_df['inst_rel_addr'] == branch_name].copy()

bhr_len = 64
current_bhr = 0
bhrs = []

mask = (1 << bhr_len) - 1

for i in range(len(df)):
    bhrs.append(current_bhr)
    
    mispred = df.iloc[i]['mispredicted']
    pred_taken = df.iloc[i]['pred_taken']
    
    taken = mispred and not pred_taken or not mispred and pred_taken
    current_bhr = ((current_bhr << 1) | int(taken)) & mask

    
df['bhr'] = bhrs

# # Only taken but mispredicted branches
# df = full_df[(full_df['mispredicted'] == 1) & (full_df['pred_taken'] == 0)]

# # Count all occurences
# occurences = df.groupby('inst_rel_addr').size().sort_values().reset_index(name = 'occurences')

# # Print occurences of best samples
# print(occurences[-num_to_keep:])

# # Filter only best num_to_keep branches
# df = df[df['inst_rel_addr'].isin(occurences[-num_to_keep:]['inst_rel_addr'])]




In [7]:
def list_to_embed(l):
#     l = list(l)
#     if len(l) < 20:
#         l = (20 - len(l))*[0] + l
    l = np.array(l, dtype=np.uint64).reshape(-1, 1)
    masks = 2**np.arange(64, dtype=np.uint64)
    return np.concatenate(l & masks != 0) + 0   

# df['state'] = df['regs'].apply(lambda x : list_to_embed(list(x.values())))
# df['state'] = df['ras'].apply(lambda x : list_to_embed(x))
df['state'] = df['bhr'].apply(lambda x : list_to_embed([x]))


In [8]:
subset = df.copy() # df.sample(5000).copy()

states = np.array([e for e in subset['state']], dtype=float)

# pca = PCA(50)

# states = pca.fit_transform(states)

tsne = TSNE(verbose = 1, n_jobs=20, metric = 'cityblock')

embeddings = tsne.fit_transform(states)

subset['embed_x'] = embeddings[:,0]
subset['embed_y'] = embeddings[:,1]

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 6707 samples in 0.001s...
[t-SNE] Computed neighbors for 6707 samples in 1.519s...
[t-SNE] Computed conditional probabilities for sample 1000 / 6707
[t-SNE] Computed conditional probabilities for sample 2000 / 6707
[t-SNE] Computed conditional probabilities for sample 3000 / 6707
[t-SNE] Computed conditional probabilities for sample 4000 / 6707
[t-SNE] Computed conditional probabilities for sample 5000 / 6707
[t-SNE] Computed conditional probabilities for sample 6000 / 6707
[t-SNE] Computed conditional probabilities for sample 6707 / 6707
[t-SNE] Mean sigma: 0.000000
[t-SNE] KL divergence after 250 iterations with early exaggeration: 81.427826
[t-SNE] KL divergence after 1000 iterations: 1.030666


In [9]:

data = subset[['embed_x','embed_y','mispredicted','pred_taken','bhr']]
alt.Chart(data).mark_point().encode(
    x='embed_x',
    y='embed_y',
    color=alt.Color('mispredicted:O', scale=alt.Scale(scheme='tableau10')), #'mispredicted:O',
    shape = 'pred_taken:N',
    tooltip='bhr'
).properties(
    width=500,
    height=500
).interactive()

In [112]:
data = df[['ras_rel','mispredicted','taken','pred_taken']].copy()

data['ras_rel'] = data['ras_rel'].apply(lambda x : str(x[:1]))

avg_taken_plot = alt.Chart(data).mark_bar().encode(
    x='avg_taken:Q',
    y='ras_rel:O',
    tooltip = ['ras_rel', 'count:Q']
).transform_aggregate(
    avg_taken = 'mean(taken)',
    count = 'count()',
    groupby = ['ras_rel']
).transform_filter(
    'datum.count > 10'
)

avg_pred_taken_plot = alt.Chart(data).mark_bar().encode(
    x='avg_pred_taken:Q',
    y='ras_rel:O',
    tooltip = ['ras_rel', 'count:Q']
).transform_aggregate(
    avg_pred_taken = 'mean(pred_taken)',
    count = 'count()',
    groupby = ['ras_rel']
).transform_filter(
    'datum.count > 10'
)


avg_taken_plot | avg_pred_taken_plot

In [10]:
data = df[['ras_rel','mispredicted','taken','pred_taken']].copy()

data['ras_rel'] = data['ras_rel'].apply(lambda x : str(x[:1]))

alt.Chart(data).transform_aggregate(
    avg_taken = 'mean(taken)',
    avg_pred_taken = 'mean(pred_taken)',
    count = 'count()',
    groupby = ['ras_rel']
).transform_filter(
    'datum.count > 100'
).transform_fold(
    ['avg_taken', 'avg_pred_taken'],
    ['type', 'value'],
).mark_bar().encode(
    row='ras_rel:N',
    color='type:N',
    x='value:Q',
    y='type:O',
    tooltip=['avg_taken:Q', 'avg_pred_taken:Q', 'ras_rel', 'count:Q']
).properties(
    width=500,
    height=50
)
