# MNIST Performance
> Performance data for the MNIST benchmark

- toc: true 
- badges: true
- comments: true
- categories: [l3, performance, MNIST]
- image: images/mnist.png

In [None]:
csv_path = "./data/cleaned_csv/backup.csv"

## Rooflines

## L3 Performance

In [1]:
#hide
import pandas as pd
import numpy as np
import altair as alt

W = 600
H = 480

In [None]:
#hide
#%writefile scripts/utils.py
def norm_by_group(df, column, group_col):
    """ Normalizes pandas series by group """
    df["norm-"+column] = df.groupby(group_col)[column].apply(lambda x: (x / x.max()))
    return df

def select_color(sel, column):
    """ Easy way to set colors based on selection for altair plots
    """
    return alt.condition(sel, 
                      alt.Color(column),
                      alt.value('lightgray'))

def get_pareto_df(df, groupcol, xcol, ycol):
    pareto_line_df = df.groupby(groupcol)[xcol].max().to_frame("x")
    pareto_line_df['y'] = df.groupby(groupcol)[ycol].agg(lambda x: x.value_counts().index[0])
    pareto_line_df.sort_values('y', ascending=False, inplace=True)
    pareto_line_df['x'] = pareto_line_df.x.cummax()
    pareto_line_df.drop_duplicates('x', keep='first', inplace=True)
    pareto_line_df['group'] = pareto_line_df.index
    return pareto_line_df

def label_point(x, y, val, ax, rot=0):
    """ from https://stackoverflow.com/questions/46027653/adding-labels-in-x-y-scatter-plot-with-seaborn"""
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x']+.02, point['y'], str(point['val']), rotation=rot)

In [None]:
#hide
master_df = pd.read_csv(csv_path)
is_maxp = lambda row: row.HWType != "GPU" or row["Op mode"].split(",")[0] == "maxp"
maxp_df = master_df[master_df.apply(is_maxp, axis=1)]
maxp_df["hw_quant_prun"] = maxp_df.apply(lambda r: "_".join([r.HWType, r.Precision, r.PruningFactor]), axis=1)
mlp_df = maxp_df[(maxp_df["NN_Topology"] == "MLP") & maxp_df['lat-comp'].notna()]
mlp_df["hw_quant_prun"] = mlp_df.apply(lambda r: "_".join([r.HWType, r.Precision, r.PruningFactor]), axis=1)
mlp_df["PruningFactor"] = mlp_df["PruningFactor"].str.strip("%").astype(float)
norm_by_group(mlp_df, "lat-comp", "NN_Topology");
mlp_df["quant_model"] = mlp_df.Precision + '_' + mlp_df.HWType

mnist_df    = maxp_df[(maxp_df.NN_Topology == 'MLP') & maxp_df["top1 [%]"].notna()]
mnist_df.rename(columns={"top1 [%]": "top1"}, inplace=True)

## Table Statistics

In [None]:
#hide_input
mnist_df.describe()

In [None]:
#hide
figa_df = mlp_df[(mlp_df["HWType"].isin(["NCS", "ZCU104-Bismo", "A53-gemmlowp"]))]
figb_df = mlp_df[(mlp_df["HWType"].isin(["GPU", "ZCU104-FINN", "A53-gemmlowp"]))]



## Line Plot

In [None]:
#hide_input
fig25s = []
fig25_dfs = [figa_df, figb_df]
for df in fig25_dfs:
    sel = alt.selection_multi(fields=["hw_quant_prun"], bind="legend")
    fig25_dot = alt.Chart(df).mark_point().encode(
        x='lat-comp',
        y=alt.Y('fps-comp', scale=alt.Scale(type="log")),
        color=select_color(sel, 'hw_quant_prun:N'),
        tooltip=['fps-comp', 'lat-comp', 'HWType', 'batch/thread/stream'],
    )
    fig25_line = alt.Chart(df).mark_line().encode(
        x='lat-comp',
        y='fps-comp',
        color=select_color(sel, 'hw_quant_prun:N'),
        tooltip=['fps-comp', 'lat-comp', 'HWType', 'batch/thread/stream'],
    )

    fig = (fig25_dot+fig25_line).properties(
        title="Latency versus Performance for Pruned and Quantized MLP Variants",
        width=W/len(fig25_dfs),
        height=H,
    ).add_selection(sel).interactive()
    
    fig25s.append(fig)
    
alt.hconcat(*fig25s)

## Boxplots

In [None]:
#hide_input
box1 = alt.Chart(mlp_df).mark_boxplot().encode(
    x='PruningFactor:O',
    y=alt.Y("lat-comp", scale=alt.Scale(type="log")),
    color='PruningFactor:O',
).facet(column="quant_model").properties(
    title="Latency by Hardware/Framework and Pruning for MLP"
).interactive()
box1

In [None]:
#hide_input
box1 = alt.Chart(mlp_df).mark_boxplot().encode(
    x='PruningFactor:O',
    y=alt.Y("fps-comp", scale=alt.Scale(type="log")),
    color='PruningFactor:O',
).facet(column="quant_model").properties(
    title="Throughput by Hardware/Framework and Pruning for MLP",
).interactive()
box1

## Pareto

In [None]:
#hide_input
mnist_pareto = get_pareto_df(mnist_df, 'hw_quant_prun', 'fps-comp', 'top1')

mnist_lines = alt.Chart(mnist_df).mark_line(point=True).encode(
    x="fps-comp",
    y=alt.Y("top1:Q", scale=alt.Scale(zero=False)),
    color=alt.Color("hw_quant_prun", legend=alt.Legend(columns=1)),
    tooltip=["HWType", "Precision", "PruningFactor", "batch/thread/stream", "top1", "fps-comp"],
)
mnist_pareto_plot = alt.Chart(mnist_pareto).mark_line().encode(
    x="x",
    y=alt.Y("y", scale=alt.Scale(zero=False)),
)
(mnist_lines+mnist_pareto_plot).interactive().properties(
    width=W,
    height=H,
    title="MNIST Cassification Design Space: Accuracy versus Performance"
)

# Performance Predictions for MNIST

In [3]:
#hide_input
%run scripts/heatmaps.py  #run the heatmaps script
#load mnist dataset and plot it
heatmap(pd.read_csv("data/processed_csv/mnist_heatmap.csv"), 'red')