# Initial Data Analysis
> An exploration of initial data from the paper

- toc: true 
- badges: true
- comments: true
- categories: [jupyter]
- image: images/chart-preview.png

In [None]:
#hide
import matplotlib.pyplot as plt
from pathlib import Path
from os import listdir
import pandas as pd
import chart_studio.plotly as py
import pickledb
from glob import glob
import seaborn as sns
import numpy as np
import altair as alt

plt.style.use('seaborn-whitegrid')
plt.rcParams['figure.figsize'] = 12, 8
plt.rc('text', usetex=False)
 
W = 600
H = 480

## Organize

In [None]:
#hide
master_df = pd.read_csv("../../data/cleaned_csv/backup.csv")

is_maxp = lambda row: row.HWType != "GPU" or row["Op mode"].split(",")[0] == "maxp"

maxp_df = master_df[master_df.apply(is_maxp, axis=1)]

## Figure  25

Note: Don't have data for left side and missing int4/int2 data for fpga
Original:  

![original](figs/fig25.png)

In [None]:
maxp_df["hw_quant_prun"] = maxp_df.apply(lambda r: "_".join([r.HWType, r.Precision, r.PruningFactor]), axis=1)

In [None]:
mlp_df = maxp_df[(maxp_df["NN_Topology"] == "MLP")]

In [None]:
mlp_df["hw_quant_prun"] = mlp_df.apply(lambda r: "_".join([r.HWType, r.Precision, r.PruningFactor]), axis=1)

In [None]:
figa_df = mlp_df[(mlp_df["HWType"].isin(["NCS", "ZCU104-Bismo", "A53-gemmlowp"]))]
figb_df = mlp_df[(mlp_df["HWType"].isin(["GPU", "ZCU104-FINN", "A53-gemmlowp"]))]

In [None]:
def select_color(sel, column):
    return alt.condition(sel, 
                      alt.Color(column),
                      alt.value('lightgray'))

In [None]:
fig25s = []
fig25_dfs = [figa_df, figb_df]
for df in fig25_dfs:
    sel = alt.selection_multi(fields=["hw_quant_prun"], bind="legend")
    fig25_dot = alt.Chart(df).mark_point().encode(
        x='lat-comp',
        y=alt.Y('fps-comp', scale=alt.Scale(type="log")),
        color=select_color(sel, 'hw_quant_prun:N'),
        tooltip=['fps-comp', 'lat-comp', 'HWType', 'batch/thread/stream'],
    )
    fig25_line = alt.Chart(df).mark_line().encode(
        x='lat-comp',
        y='fps-comp',
        color=select_color(sel, 'hw_quant_prun:N'),
        tooltip=['fps-comp', 'lat-comp', 'HWType', 'batch/thread/stream'],
    )

    fig = (fig25_dot+fig25_line).properties(
        title="Latency versus Performance for Pruned and Quantized MLP Variants",
        width=W/len(fig25_dfs),
        height=H,
    ).add_selection(sel).interactive()
    
    fig25s.append(fig)
    
alt.hconcat(*fig25s)

In [None]:
maxp_df["quant_model"] = maxp_df.Precision + '_' + maxp_df.HWType

In [None]:
def norm_by_group(df, column, group_col):
    df["norm-"+column] = df.groupby(group_col)[column].apply(lambda x: (x / x.max()))
    return df

In [None]:
norm_by_group(maxp_df, "lat-comp", "NN_Topology");

In [None]:
cnv_df = maxp_df[(maxp_df.NN_Topology == 'CNV') & maxp_df['norm-lat-comp'].notna()]
fig = sns.boxplot(x="quant_model", y="norm-lat-comp", hue="PruningFactor", data=cnv_df)
fig.set_title("latency by chip and net pruning for CNV")
plt.yscale("log")
plt.xticks(rotation=45)

> TODO: get log axis working for grouped bar

In [None]:
cnv_df["pf"] = cnv_df.PruningFactor.str.strip('%').astype(float)

In [None]:
box1 = alt.Chart(cnv_df).mark_boxplot().encode(
    x='pf:O',
    y=alt.Y("norm-lat-comp", scale=alt.Scale(type="log")),
    color='pf:O'
).facet(column="quant_model").properties(
    title="Latency by Hardware/Framework and Pruning for CNV"
).interactive()
box1

In [None]:
fig = sns.boxplot(x="quant_model", y="fps-comp", hue="PruningFactor", data=maxp_df[(maxp_df.NN_Topology == 'CNV') & maxp_df["fps-comp"].notna()])
fig.set_title("fig 13: FPS by chip and net pruning (CNV)")
plt.yscale("log")
plt.xticks(rotation=45)

# Paretos

In [None]:
mnist_df    = maxp_df[(maxp_df.NN_Topology == 'MLP') & maxp_df["top1 [%]"].notna()]
cifar_df    = maxp_df[(maxp_df.NN_Topology == 'CNV') & maxp_df["top1 [%]"].notna()]
imagenet_df = maxp_df[maxp_df.NN_Topology.isin(['GNv1','RN50','MNv1']) & maxp_df["top1 [%]"].notna()]

In [None]:
bad_precisions = ["FP"+str(i) for i in range(17,24)]
imagenet_df.Precision = imagenet_df.Precision.apply(lambda x: 'FP16' if x in bad_precisions else x)

In [None]:
%%writefile utils.py
def get_pareto_df(df, groupcol, xcol, ycol):
    pareto_line_df = df.groupby(groupcol)[xcol].max().to_frame("x")
    pareto_line_df['y'] = df.groupby(groupcol)[ycol].agg(lambda x: x.value_counts().index[0])
    pareto_line_df.sort_values('y', ascending=False, inplace=True)
    pareto_line_df['x'] = pareto_line_df.x.cummax()
    pareto_line_df.drop_duplicates('x', keep='first', inplace=True)
    pareto_line_df['group'] = pareto_line_df.index
    return pareto_line_df

def label_point(x, y, val, ax, rot=0):
    """ from https://stackoverflow.com/questions/46027653/adding-labels-in-x-y-scatter-plot-with-seaborn"""
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        ax.text(point['x']+.02, point['y'], str(point['val']), rotation=rot)

### MNIST

In [None]:
sns.set(font_scale=0.8)
mnist_pareto = get_pareto_df(mnist_df, 'hw_quant_prun', 'fps-comp', 'top1 [%]')
fig, ax = plt.subplots()
sns.lineplot(x='x', y='y', data=mnist_pareto, ax=ax, label="Pareto Frontier")
ax.lines[0].set_linestyle("--")
sns.lineplot(x='fps-comp', y='top1 [%]', hue='hw_quant_prun', data=mnist_df, ax=ax)
sns.scatterplot(x='fps-comp', y='top1 [%]', hue='hw_quant_prun', data=mnist_df, ax=ax, legend=False)
plt.title("MNIST Cassification Design Space: Accuracy versus Performance")
plt.xlabel('FPS [hz]')
plt.ylabel('Top1 Accuracy [%]')
plt.legend(loc="upper right", ncol=4)
plt.ylim([96, 102])
plt.xlim([-100000, 4800000])
label_point(mnist_pareto.x, mnist_pareto.y, mnist_pareto.group, plt.gca(), 35)

In [None]:
mnist_df.rename(columns={"top1 [%]": "top1"}, inplace=True)

In [None]:
mnist_lines = alt.Chart(mnist_df).mark_line(point=True).encode(
    x="fps-comp",
    y=alt.Y("top1:Q", scale=alt.Scale(zero=False)),
    color=alt.Color("hw_quant_prun", legend=alt.Legend(columns=2)),
    tooltip=["HWType", "Precision", "PruningFactor", "batch/thread/stream", "top1", "fps-comp"],
)
mnist_pareto_plot = alt.Chart(mnist_pareto).mark_line().encode(
    x="x",
    y=alt.Y("y", scale=alt.Scale(zero=False)),
)
(mnist_lines+mnist_pareto_plot).interactive().properties(
    width=W,
    height=H,
    title="MNIST Cassification Design Space: Accuracy versus Performance"
)

### CIFAR
Original:  
![example](figs/fig22.png)

In [None]:
cifar_pareto = get_pareto_df(cifar_df, 'hw_quant_prun', 'fps-comp', 'top1 [%]')
fig, ax = plt.subplots()
sns.lineplot(x='x', y='y', data=cifar_pareto, ax=ax, label="Pareto Frontier")
ax.lines[0].set_linestyle("--")
sns.lineplot(x='fps-comp', y='top1 [%]', hue='hw_quant_prun', data=cifar_df, ax=ax)
sns.scatterplot(x='fps-comp', y='top1 [%]', hue='hw_quant_prun', data=cifar_df, ax=ax, legend=False)
plt.title("CIFAR 10 Classification Design Space: Accuracy versus Performance")
plt.xlabel('FPS [hz]')
plt.ylabel('Top1 Accuracy [%]')
plt.ylim([73, 98])
plt.xlim([0, 80000])
plt.legend(loc="upper right", ncol=3)
label_point(cifar_pareto.x, cifar_pareto.y, cifar_pareto.group, plt.gca(), 30)

In [None]:
cifar_df.rename(columns={"top1 [%]": "top1"}, inplace=True)
cifar_lines = alt.Chart(cifar_df).mark_line(point=True).encode(
    x="fps-comp",
    y=alt.Y("top1:Q", scale=alt.Scale(zero=False)),
    color=alt.Color("hw_quant_prun", legend=alt.Legend(columns=1)),
    tooltip=["HWType", "Precision", "PruningFactor", "batch/thread/stream", "top1", "fps-comp"],
)
cifar_pareto_plot = alt.Chart(cifar_pareto).mark_line().encode(
    x="x",
    y=alt.Y("y", scale=alt.Scale(zero=False)),
)
(cifar_lines+cifar_pareto_plot).interactive().properties(
    width=W,
    height=H,
    title="CIFAR Cassification Design Space: Accuracy versus Performance"
)

In [None]:
imagenet_df["hw_precision_net_prun"] = imagenet_df.apply(lambda r: "_".join([r.HWType, r.Precision, r.NN_Topology, r.PruningFactor]), axis=1)

In [None]:
imagenet_pareto = get_pareto_df(imagenet_df, 'hw_precision_net_prun', 'fps-comp', 'top1 [%]')
fig, ax = plt.subplots()
sns.lineplot(x='x', y='y', data=imagenet_pareto, ax=ax, label="Pareto Frontier")
ax.lines[0].set_linestyle("--")
sns.lineplot(x='fps-comp', y='top1 [%]', hue='hw_precision_net_prun', data=imagenet_df, ax=ax)
sns.scatterplot(x='fps-comp', y='top1 [%]', hue='hw_precision_net_prun', data=imagenet_df, ax=ax, legend=False)
plt.title("ImageNet Classification Design Space: Accuracy versus Performance")
plt.xlabel('FPS [hz]')
plt.ylabel('Top1 Accuracy [%]')
plt.ylim([66, 79])
plt.xlim([0, 750])
plt.legend(loc="upper right", ncol=2)
label_point(imagenet_pareto.x, imagenet_pareto.y, imagenet_pareto.group, plt.gca(), 15)

In [None]:
imagenet_df.rename(columns={"top1 [%]": "top1"}, inplace=True)
imagenet_lines = alt.Chart(imagenet_df).mark_line(point=True).encode(
    x="fps-comp",
    y=alt.Y("top1:Q", scale=alt.Scale(zero=False)),
    color=alt.Color("hw_precision_net_prun", legend=alt.Legend(columns=1)),
    tooltip=["HWType", "Precision", "PruningFactor", "batch/thread/stream", "top1", "fps-comp"],
)
imagenet_pareto_plot = alt.Chart(imagenet_pareto).mark_line().encode(
    x="x",
    y=alt.Y("y", scale=alt.Scale(zero=False)),
)
(imagenet_lines+imagenet_pareto_plot).interactive().properties(
    width=W,
    height=H,
    title="ImageNet Cassification Design Space: Accuracy versus Performance"
)