# Rooflines 
> Plotting rooflines with Altair

- toc: true 
- badges: true
- comments: true
- categories: [jupyter]

In [1]:
#hide
import altair as alt
from altair import datum
    
import numpy as np
import pandas as pd

import json
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
import csv

from tabulate import tabulate
from pandas.plotting import table 

from labellines import labelLine, labelLines
from IPython.display import display, HTML

In [2]:
#hide
## Just some needed functions
#Function to read from a csv file and return a numpy 2D array
def read_from_csv(filename):  
    array= []
    with open(filename, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            array.append(row)
    array = np.asarray(array)
    return array

In [3]:
#hide
## Just some needed functions
def load_and_display(filenames):
    dataframes=[]
    for filename in filenames:
        table = read_from_csv(filename)  # To read from a csv file into a 2D numpy array
        dataframe = pd.DataFrame(data=table[2:,:], columns=[table[0,0:], table[1,0:]])  #To transform to dataframe the first and second row will be header
        dataframe.loc[dataframe.duplicated(dataframe.columns[0]) , dataframe.columns[0]] = ''  #To remove duplicates from first column
        dataframes.append(dataframe)     #To save all dataframes in here
    return dataframes

In [4]:
#hide
## Loading Hardware platforms and Neural networks csv
data = pd.read_csv('Data/peakPerfBandHardPlatf.csv', sep=',')
df = pd.DataFrame(data)

df_topology=pd.read_csv('Data/topology_details.csv',)

# Overview of the experiments done

In [5]:
#hide_input
filenames = ['Data/imagenet.csv', 'Data/mnist.csv', 'Data/cifar10.csv']
dataframes = load_and_display(filenames)
for i in range(len(dataframes)):    
    display(HTML(dataframes[i].to_html(index=False)))

Unnamed: 0_level_0,Unnamed: 1_level_0,ImageNet Classification,ImageNet Classification,ImageNet Classification
Hardware,Platform,ResNet50,GoogLeNetV1,MobileNet
FPGA,ZCU102-DPU,"[INT8]*[100%,80%,50%,30%]",INT8,na
,ZCU104-DPU,INT8,INT8,na
,Ultra96-DPU,"[INT8]*[100%,80%,50%,30%]",INT8,INT8
,ZCU104-FINN,na,na,na
,ZCU104-BISMO,na,na,na
GPU,TX2-maxn,"FP16,FP32","FP16,FP32",na
,TX2-maxp,"FP16,FP32","FP16,FP32",na
,TX2-maxq,"FP16,FP32","FP16,FP32",na
TPU,TPU-fast clk,na,INT8,INT8
,TPU-slow clk,na,INT8,INT8


Unnamed: 0_level_0,Unnamed: 1_level_0,MNIST Classification
Hardware,Platform,MLP
FPGA,ZCU102-DPU,na
,ZCU104-DPU,na
,Ultra96-DPU,na
,ZCU104-FINN,"[INT2, INT4] * [100%,50%,25%,12.5%]"
,ZCU104-BISMO,"[INT2, INT4] * [100%,50%,25%,12.5%]"
GPU,TX2-maxn,"[FP16, FP32] * [100%,50%,25%,12.5%]"
,TX2-maxp,"[FP16, FP32] * [100%,50%,25%,12.5%]"
,TX2-maxq,"[FP16, FP32] * [100%,50%,25%,12.5%]"
TPU,TPU-fast clk,na
,TPU-slow clk,na


Unnamed: 0_level_0,Unnamed: 1_level_0,CIFAR10 Classification
Hardware,Platform,CNV
FPGA,ZCU102-DPU,na
,ZCU104-DPU,na
,Ultra96-DPU,na
,ZCU104-FINN,"[INT2,INT4]*[100%,50%,25%,12.5%]"
,ZCU104-BISMO,"[INT2,INT4]*[100%,50%,25%,12.5%]"
GPU,TX2-maxn,"[FP16,FP32]*[100%,50%,25%,12.5%]"
,TX2-maxp,"[FP16,FP32]*[100%,50%,25%,12.5%]"
,TX2-maxq,"[FP16,FP32]*[100%,50%,25%,12.5%]"
TPU,TPU-fast clk,na
,TPU-slow clk,na


In [6]:
#hide
## Calculate the Arithmetic intensity (x axis) for each NN based on Fwd ops and Total params
i=0.1
n_bytes=1 
calc_arith = lambda operations, params, n_bytes: operations/(params*n_bytes)

for index, row in df_topology.iterrows():             #nditer is a iterator object    
    arith_intens = calc_arith(row['Fwd Ops'], row['Total Params'], n_bytes)   #calculate the arith intensity with the lambda function
    df_topology.at[index, 'arith_intens'] = arith_intens              #saving it to the dataframe
    
#to duplicate the dataframe so each row with (Platform, arith_intens) will be filled with 100 and then 0s to plot the vertical line later    
df_topology = pd.concat([df_topology, df_topology])
df_topology = pd.concat([df_topology, df_topology])
df_topology = df_topology.drop(columns=['Total Params','Fwd Ops']) #deleting unnecessary columns (Fwd ops and Total params)

In [7]:
#hide
df_topology.head()

Unnamed: 0,Name,arith_intens
0,MobileNet V1,23878
1,AlexNet,2995
2,GoogLeNet V1,29988
3,ResNet-18,39950
4,ResNet-34,43169


In [8]:
#hide
df.head()

Unnamed: 0,Name,Peak_Performance,Bandwidth
0,Ultra96 DPU INT8,0.96,0.00426
1,ZCU104 INT8,4.6,0.0192
2,ZCU102 INT8,6.71,0.0192
3,ZCU104 FINN INT2,30.7,0.0192
4,ZCU104 FINN INT4,8.8,0.0192


In [9]:
#hide
## Preparing the NNs dataset to be ploted as vertical lines later
# creating a y list [100,100,100,100....0.0001,0.0001,0.0001...] to plot a vertical line later
df_topology['performance'] = [100] * round((len(df_topology.index))/4)  +  [25] * round((len(df_topology.index))/4) + [75] * round((len(df_topology.index))/4) +[0.000001] * round((len(df_topology.index))/4) 

In [10]:
#hide
## Calculating the rooflines (y axis) for each hardware platform (dataframe = df_topology + df)
#--------------------------------Calculating the values to plot for the roofline model-----------
maxX=160000
x_axis = np.arange(0.1,maxX,1) #to create a list that represents the x axis with numbers between 0 and 1000
dataframe = pd.DataFrame(columns=['Name','arith_intens','performance']) 

for index, row in df.iterrows():             #nditer is a iterator object 
    dataframe = dataframe.append([pd.Series([df.at[index,'Name'],1,row['Bandwidth'] ],dataframe.columns)], ignore_index=True)
    for i in np.nditer(x_axis):
        point = row['Bandwidth'] * i
        if point > row['Peak_Performance']:
            dataframe = dataframe.append([pd.Series([df.at[index,'Name'],i,row['Peak_Performance']],dataframe.columns)], ignore_index=True)
            dataframe = dataframe.append([pd.Series([df.at[index,'Name'],maxX, df.at[index,'Peak_Performance']],dataframe.columns)], ignore_index=True)
            break

In [11]:
#hide
dataframe.head()

Unnamed: 0,Name,arith_intens,performance
0,Ultra96 DPU INT8,1.0,0.00426
1,Ultra96 DPU INT8,226.1,0.96
2,Ultra96 DPU INT8,160000.0,0.96
3,ZCU104 INT8,1.0,0.0192
4,ZCU104 INT8,240.1,4.6


In [12]:
#hide
## Merging NNs dataset with Hardware Platforms dataset
dataframe = pd.concat([dataframe,df_topology])
#dataframe.head()

#hide
# Plotting Rooflines and NNs Arithmetic intensity: interactive boxes as legend

In [13]:
#hide
make = pd.DataFrame({'Name': dataframe.Name.unique()})

selection = alt.selection_single(fields=['Name'])
color = alt.condition(selection, alt.Color('Name:N', legend = None), alt.value('lightgray'))

make_selector = alt.Chart(make).mark_rect().encode(
    y='Name', 
    color=color
).add_selection(selection)

chart = alt.Chart(dataframe, width = 700, height=500).mark_line(clip=True).encode( 
     alt.X('arith_intens:Q', 
          title = 'ARITHMETIC INTENSITY (OPS/BYTE)', 
          scale = alt.Scale(type='log', domain = (0.1,maxX) )
         ), 
    y = alt.Y('performance:Q', 
          title = 'PERFORMANCE (TOPS/S)', 
          scale=alt.Scale(type='log', domain = (0.2,40) )
         ),
    color= alt.Color('Name:N', legend = None),
).transform_filter(selection)

make_selector | chart

# Rooflines for all hardware platforms with Neural Networks arithmetic Intensity

In [14]:
#hide
# Checkboxes with on-plot tooltips
def line_chart_w_checkbox(data, condition, selection):
    width =700 
    height = 500
    chart = alt.Chart(data, width=width,height=height).properties(title='Comparing Hardware Platforms Rooflines and Neural Networks Arithmetic Intensity with checkboxes').mark_line(clip=True).encode(
        alt.X('arith_intens:Q', 
              title = 'ARITHMETIC INTENSITY (OPS/BYTE)', 
              scale = alt.Scale(type='log', domain = (0.1,maxX) )
             ),
        alt.Y('performance:Q', 
              title = 'PERFORMANCE (TOPS/S)', 
              scale=alt.Scale(type='log', domain = (0.2,40) )
             ),    
        color=condition
    ).add_selection(selection)
    return chart

In [15]:
#hide
def line_chart_no_checkbox(data, condition, selection):
    width =600 
    height = 400
    chart = alt.Chart(data, width=width,height=height).properties(title='Comparing Hardware Platforms Rooflines and Neural Networks Arithmetic Intensity with checkboxes').mark_line(clip=True).encode(
        alt.X('arith_intens:Q', 
              title = 'ARITHMETIC INTENSITY (OPS/BYTE)', 
              scale = alt.Scale(type='log', domain = (0.1,maxX) )
             ),
        alt.Y('performance:Q', 
              title = 'PERFORMANCE (TOPS/S)', 
              scale=alt.Scale(type='log', domain = (0.2,40) )
             ),    
        color=alt.Color("Name:N")
    )
    return chart

In [16]:
#hide_input
width =700 
height = 500
data=dataframe

#Selecting data for each checkbox, from dataset. Each checkbox will be tied to each one of these data
FPGA_data   = dataframe[dataframe['Name'].str.contains("Ultra96 DPU|ZCU")]
NVIDIA_data = dataframe[dataframe['Name'].str.contains("TX2")]
GOOGLE_data = dataframe[dataframe['Name'].str.contains("TPU")]
INTEL_data  = dataframe[dataframe['Name'].str.contains("NCS")]

IMAGENET_data = dataframe[dataframe['Name'].str.contains("ResNet|GoogLeNet|MobileNet|VGG|AlexNet")]
MNIST_data    = dataframe[dataframe['Name'].str.contains("MLP")]
CIFAR_data    = dataframe[dataframe['Name'].str.contains("CNV")]
MASKRCNN_data = dataframe[dataframe['Name'].str.contains("MaskRCNN")]
GNMT_data     = dataframe[dataframe['Name'].str.contains("GNMT")]

#To say that the binding type will be a checkbox
#BindCheckbox({ input: 'checkbox'})
filter_checkbox = alt.binding_checkbox()

#To create all checkboxes with the specifications info for each set
#Selection('FPGAs:', SelectionDef({ bind: BindCheckbox({ input: 'checkbox' }), fields: ['Ultra96 DPU,ZCU104,ZCU102,ZCU104 FINN,ZCU104 BISMO'], type: 'single' }))
FPGA_select   = alt.selection_single( fields=["Hide"], bind=filter_checkbox, name="FPGAs Ultra96 DPU ZCU")                 
NVIDIA_select = alt.selection_single( fields=["Hide"], bind=filter_checkbox, name="HNVIDIA TX2 maxn,maxp,maxq")
GOOGLE_select = alt.selection_single( fields=["Hide"], bind=filter_checkbox, name="GOOGLE TPU,fast,slow")
INTEL_select  = alt.selection_single( fields=["Hide"], bind=filter_checkbox, name="INTEL NCS")

IMAGENET_select = alt.selection_single( fields=["Hide"], bind=filter_checkbox, name="IMAGENET ResNet GoogLeNet MobileNet VGG AlexNet")    
MNIST_select    = alt.selection_single( fields=["Hide"], bind=filter_checkbox, name="MNIST MLP")   
CIFAR_select    = alt.selection_single( fields=["Hide"], bind=filter_checkbox, name="CIFAR10 CNV")   
MASKRCNN_select = alt.selection_single( fields=["Hide"], bind=filter_checkbox, name="MASKRCNN")
GNMT_select     = alt.selection_single( fields=["Hide"], bind=filter_checkbox, name="GNMT")

#Color Condiotions for each plot
#{'condition': {'selection': 'FPGAs:', 'type': 'nominal', 'field': 'Name'}, 'value': 'lightgray'}
FPGA_cond     = alt.condition(FPGA_select, alt.Color("Name:N"), alt.value("lightgray"))
NVIDIA_cond   = alt.condition(NVIDIA_select, alt.Color("Name:N"), alt.value("lightgray"))
GOOGLE_cond   = alt.condition(GOOGLE_select, alt.Color("Name:N"), alt.value("lightgray"))
INTEL_cond    = alt.condition(INTEL_select, alt.Color("Name:N"), alt.value("lightgray"))

IMAGENET_cond = alt.condition(IMAGENET_select, alt.Color("Name:N"), alt.value("lightgray"))
MNIST_cond    = alt.condition(MNIST_select, alt.Color("Name:N"), alt.value("lightgray"))
CIFAR_cond    = alt.condition(CIFAR_select, alt.Color("Name:N"), alt.value("lightgray"))
MASKRCNN_cond = alt.condition(MASKRCNN_select, alt.Color("Name:N"), alt.value("lightgray"))
GNMT_cond     = alt.condition(GNMT_select, alt.Color("Name:N"), alt.value("lightgray"))

#Creating all plots 
FPGA_chart     = line_chart_w_checkbox(FPGA_data,     FPGA_cond,    FPGA_select)
NVIDIA_chart   = line_chart_w_checkbox(NVIDIA_data,   NVIDIA_cond,  NVIDIA_select)
GOOGLE_chart   = line_chart_w_checkbox(GOOGLE_data,   GOOGLE_cond,  GOOGLE_select)                         
INTEL_chart    = line_chart_w_checkbox(INTEL_data,    INTEL_cond,   INTEL_select)

IMAGENET_chart = line_chart_w_checkbox(IMAGENET_data, IMAGENET_cond, IMAGENET_select)
MNIST_chart    = line_chart_w_checkbox(MNIST_data,    MNIST_cond,    MNIST_select)
CIFAR_chart    = line_chart_w_checkbox(CIFAR_data,    CIFAR_cond,    CIFAR_select)
MASKRCNN_chart = line_chart_w_checkbox(MASKRCNN_data, MASKRCNN_cond, MASKRCNN_select)
GNMT_chart     = line_chart_w_checkbox(GNMT_data,     GNMT_cond,     GNMT_select)

#--------------------------------------------------------------------------------------------------
#Adapted from https://stackoverflow.com/questions/53287928/tooltips-in-altair-line-charts
# Step 1: create the lines
lines = alt.Chart().mark_line(clip=True).interactive().encode(
        alt.X('arith_intens:Q'), 
        alt.Y('performance:Q'),
        alt.Color('Name:N', legend=alt.Legend(columns=2))
)

# Step 2: Selection that chooses nearest point based on value on x-axis
nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['arith_intens']) #to leave suggestions on, just replace arith_intens wiith anything else


# Step 3: Transparent selectors across the chart. This is what tells us the name of the Platform / Neural network
selectors = alt.Chart().mark_point(clip=True).encode(
            alt.X('arith_intens:Q'), 
            alt.Y('performance:Q'),
            opacity=alt.value(0),
).add_selection(nearest)

# Step 4: Add text, show values about platforms when it's the nearest point to 
# mouseover, else show blank
text = (lines).mark_text(align='left', dx=3, dy=-3,clip=True).encode(  text=alt.condition(nearest, 'Name:N', alt.value(' ')))

# Layer them all together
alt.layer(FPGA_chart + NVIDIA_chart + GOOGLE_chart + INTEL_chart + IMAGENET_chart + MNIST_chart + CIFAR_chart + MASKRCNN_chart + GNMT_chart, selectors, text, data=dataframe, width=700, height=500)
#---------------------------------------------------------------------------------------------------

# HEATMAPS

In [20]:
#hide
## Reading csv file and converting data to (Neural network, Platform, Value)
df = pd.read_csv('data/GOPperNN.csv')
df.head()
pd.set_option('display.max_rows', None)
#----- Creating a dataframe with 3 columns x, y gop_frame
cleanedList = [x for x in df.platform if x==x] # to take all the nans out
x, y = np.meshgrid(df.model, cleanedList) 
gop_frame, _ = np.meshgrid(df.gop_frame, cleanedList)

#to crate a 1D array from each variable, creating a dataframe with 3 columns
source = pd.DataFrame({'x': x.ravel(),     
                       'y': y.ravel(),
                       'gop_frame':gop_frame.ravel()}) #auxilary column

#---Adding a fourth column: top_second  ---- auxilary column
tops_second= []    #creating a lsit which will contain all top_second columns from the dataframe
columns = list(df) # creating a list of dataframe columns 
  
for i in columns:   
    if 'top_second' in i:
        tops_second.append(df[i])
        
source['top_second'] = pd.concat(tops_second,ignore_index=True)

#------Adding a fith column: values-----------
source['values'] = source.top_second * 1000 / source.gop_frame

#---Drop auxilary columns: gop_frame top_scond----
source = source.drop(columns=['gop_frame','top_second'])
source = source.round(0)
source.head()

#Separate dataframe into: IMAGENET, MNIST, CIFAR10 dataframes
df_imagenet = source[source['x'].str.contains('GoogleNetv|MobileNetv1|ResNet50|EfficientNet')]
df_cifar10 = source[source['x'].str.contains('CNV')]
df_MNIST = source[source['x'].str.contains('MLP')]

#Saving above dataframes to csv file
df_imagenet.to_csv('data/processed_csv/imagenet_heatmap.csv', index = False)
df_cifar10.to_csv('data/processed_csv/cifar10_heatmap.csv', index = False)
df_MNIST.to_csv('data/processed_csv/mnist_heatmap.csv', index = False)

In [None]:
#hide
# Heatmap  with all models together
middleOfScale = 10000
single_nearest = alt.selection_single(on='mouseover', nearest=True)
color_selection = color = alt.Color('values:Q', title= 'Input/second',scale = alt.Scale(type='log', scheme='lightmulti'))

heatmap = alt.Chart(source, width = 1300, height=350).mark_rect(stroke='black', strokeWidth=1, invalid = None).add_selection(single_nearest).properties(title='Performance predictions for all models').encode(
    alt.X('x:O', title = 'Models'),
    alt.Y('y:O', title = 'Hardware Platfroms'),
    color = alt.condition(single_nearest, alt.value('lightgrey'), color_selection),
    tooltip = [alt.Tooltip('values:Q', title = 'Input/sec'),
               alt.Tooltip('x:N', title = 'Model'),
               alt.Tooltip('y:N', title = 'Hardware Platform'),
              ]
)

text = alt.Chart(source).mark_text(color = 'white').encode(
    alt.X('x:O',  title = 'Models'),
    alt.Y('y:O',  title = 'Hardware Platfroms' ),
    text = alt.Text('values:Q', format = '.0f'),
    color=alt.condition(
        #(alt.datum.values < 1) |  (alt.datum.values > 1000),
        (alt.datum.values > 1),
        alt.value('black'),
        alt.value('white')
    ),
    tooltip = [
               alt.Tooltip('values:Q', title = 'Input/sec', format = '.3f'),
               alt.Tooltip('x:N', title = 'Model'),
               alt.Tooltip('y:N', title = 'Hardware Platform'),
              ]
)

alt.layer(heatmap, text)
# same as alt.layer heatmap + text

In [None]:
#hide
#Function to create a heatmap
def heatmap(df, title, color):
    return alt.Chart(df, width=700, height=350).mark_rect(stroke='black', strokeWidth=1, invalid = None).add_selection(single_nearest).properties(title=title).encode(
        alt.X('x:O', title = 'Models'),
        alt.Y('y:O', title = 'Hardware Platfroms'),
        color = alt.condition(single_nearest, alt.value(color), color_selection),
        tooltip = [alt.Tooltip('values:Q', title = 'Input/sec'),
               alt.Tooltip('x:N', title = 'Model'),
               alt.Tooltip('y:N', title = 'Hardware Platform'),
              ]
     
)

In [None]:
#hide
#Function to create a text to sum with heatmap
def text(df, color_condition):
    return alt.Chart(df).mark_text(color = 'white').encode(
    alt.X('x:O',  title = 'Models'),
    alt.Y('y:O',  title = 'Hardware Platfroms' ),
    text = alt.Text('values:Q', format = '.0f'),
    color= color_condition,
    tooltip = [
               alt.Tooltip('values:Q', format = '.0f', title = 'Input/sec'),
               alt.Tooltip('x:N', title = 'Model'),
               alt.Tooltip('y:N', title = 'Hardware Platform'),
              ]
)

# Heatmap for Imagenet classification

In [None]:
#hide_input
#-----IMAGENET plot--------
middleOfScale = 1000

color_selection = alt.Color('values:Q', title = 'Input/second', scale=alt.Scale(type='log', scheme = 'lightmulti'))
color_condition = alt.condition((alt.datum.values > 1), alt.value('black'), alt.value('white'))

ImagenetHeatmap = heatmap(df_imagenet, 'Performance predictions for ImageNet', 'lightgrey')
text_i = text(df_imagenet, color_condition)
ImagenetHeatmap = ImagenetHeatmap + text_i

#----CIFAR 10 plot------
middleOfScale = 50000
single_nearest = alt.selection_single(on='mouseover', nearest=True)
color_selection = alt.Color('values:Q', title= 'Input/second',scale=alt.Scale(type='log', scheme='lightmulti'))
color_condition=alt.condition(alt.datum.values > 1, alt.value('black'), alt.value('white'))

Cifar10Heatmap = heatmap(df_cifar10, 'Performance predictions for CIFAR 10', 'pink')
text_c = text(df_cifar10, color_condition) 
Cifar10Heatmap = Cifar10Heatmap + text_c

#----MNIST plot------
middleOfScale = 50000
single_nearest = alt.selection_single(on='mouseover', nearest=True)
color_selection = alt.Color('values:Q', title= 'Input/second',scale=alt.Scale(type='log', scheme='lightmulti'))
color_condition = alt.condition( alt.datum.values > 1, alt.value('black'), alt.value('white'))

MMNIST_Heatmap = heatmap(df_MNIST, 'Performance predictions for MNIST', 'red')
text_m = text(df_MNIST, color_condition) 
MMNIST_Heatmap = MMNIST_Heatmap + text_m


alt.vconcat(ImagenetHeatmap, Cifar10Heatmap, MMNIST_Heatmap).resolve_scale(color='independent')