# Notebook to process and clean csv's

In [6]:
import pandas as pd
import numpy as np
import altair as alt
import csv

# Performance Predictions for mnist, cifar10 and imagenet - Heatmaps
## Read csv > process data > save to another csv

In [5]:
#hide
## Reading csv file and converting data to (Neural network, Platform, Value)
df = pd.read_csv('data/performance_predictions_imagenet_mnist_cifar.csv')
df.head()
pd.set_option('display.max_rows', None)
#----- Creating a dataframe with 3 columns x, y gop_frame
cleanedList = [x for x in df.platform if x==x] # to take all the nans out
x, y = np.meshgrid(df.model, cleanedList) 
gop_frame, _ = np.meshgrid(df.gop_frame, cleanedList)

#to crate a 1D array from each variable, creating a dataframe with 3 columns
source = pd.DataFrame({'x': x.ravel(),     
                       'y': y.ravel(),
                       'gop_frame':gop_frame.ravel()}) #auxilary column

#---Adding a fourth column: top_second  ---- auxilary column
tops_second= []    #creating a lsit which will contain all top_second columns from the dataframe
columns = list(df) # creating a list of dataframe columns 
  
for i in columns:   
    if 'top_second' in i:
        tops_second.append(df[i])
        
source['top_second'] = pd.concat(tops_second,ignore_index=True)

#------Adding a fith column: values-----------
source['values'] = source.top_second * 1000 / source.gop_frame

#---Drop auxilary columns: gop_frame top_scond----
source = source.drop(columns=['gop_frame','top_second'])
source = source.round(0)
source.head()

#Separate dataframe into: IMAGENET, MNIST, CIFAR10 dataframes
df_imagenet = source[source['x'].str.contains('GoogleNetv|MobileNetv1|ResNet50|EfficientNet')]
df_cifar10 = source[source['x'].str.contains('CNV')]
df_MNIST = source[source['x'].str.contains('MLP')]

#Saving above dataframes to csv file
df_imagenet.to_csv('data/processed_csv/performance_prediction_imagenet.csv', index = False)
df_cifar10.to_csv('data/processed_csv/performance_prediction_cifar10.csv', index = False)
df_MNIST.to_csv('data/processed_csv/performance_prediction_mnist.csv', index = False)
source.to_csv('data/processed_csv/performance_prediction_imagenet_mnist_cifar10.csv', index = False)

# --------------------------------------------------
# --------------------Rooflines----------------
# --------------------------------------------------

In [7]:
#hide
## Loading Hardware platforms and Neural networks csv
data = pd.read_csv('Data/peakPerfBandHardPlatf.csv', sep=',')
df = pd.DataFrame(data)

df_topology=pd.read_csv('Data/topology_details.csv',)

In [8]:
## Calculate the Arithmetic intensity (x axis) for each NN based on Fwd ops and Total params
i=0.1
n_bytes=1 
calc_arith = lambda operations, params, n_bytes: operations/(params*n_bytes)

for index, row in df_topology.iterrows():             #nditer is a iterator object    
    arith_intens = calc_arith(row['Fwd Ops'], row['Total Params'], n_bytes)   #calculate the arith intensity with the lambda function
    df_topology.at[index, 'arith_intens'] = arith_intens              #saving it to the dataframe
    
#to duplicate the dataframe so each row with (Platform, arith_intens) will be filled with 100 and then 0s to plot the vertical line later    
df_topology = pd.concat([df_topology, df_topology])
df_topology = pd.concat([df_topology, df_topology])
df_topology = df_topology.drop(columns=['Total Params','Fwd Ops']) #deleting unnecessary columns (Fwd ops and Total params)

In [9]:
## Preparing the NNs dataset to be ploted as vertical lines later
# creating a y list [100,100,100,100....0.0001,0.0001,0.0001...] to plot a vertical line later
df_topology['performance'] = [100] * round((len(df_topology.index))/4)  +  [25] * round((len(df_topology.index))/4) + [75] * round((len(df_topology.index))/4) +[0.000001] * round((len(df_topology.index))/4) 

In [10]:
## Calculating the rooflines (y axis) for each hardware platform (dataframe = df_topology + df)
#--------------------------------Calculating the values to plot for the roofline model-----------
maxX=160000
x_axis = np.arange(0.1,maxX,1) #to create a list that represents the x axis with numbers between 0 and 1000
dataframe = pd.DataFrame(columns=['Name','arith_intens','performance']) 

for index, row in df.iterrows():             #nditer is a iterator object 
    dataframe = dataframe.append([pd.Series([df.at[index,'Name'],1,row['Bandwidth'] ],dataframe.columns)], ignore_index=True)
    for i in np.nditer(x_axis):
        point = row['Bandwidth'] * i
        if point > row['Peak_Performance']:
            dataframe = dataframe.append([pd.Series([df.at[index,'Name'],i,row['Peak_Performance']],dataframe.columns)], ignore_index=True)
            dataframe = dataframe.append([pd.Series([df.at[index,'Name'],maxX, df.at[index,'Peak_Performance']],dataframe.columns)], ignore_index=True)
            break

In [11]:
## Merging NNs dataset with Hardware Platforms dataset
dataframe = pd.concat([dataframe,df_topology])

In [None]:
dataframe.to_csv('data/processed_csv/rooflines_hardware_neural_networks.csv', index = False)