In [153]:
import subprocess as sp
import numpy as np
import pandas as pd
from io import StringIO
import os
import re
import shutil

In [154]:
#input and output dirs
datadirs = ["../data"]
outputdir = "."

# Functions

In [155]:
selectkeys = ['Kernel Name', 'Metric Name', 'Metric Type']
resultkeys = ['Kernel Name', 'Calls']

def merge_results(df, name):
    if name+'_x' and name+'_y' in df.columns:
        df[name] = df[name+'_x'] + df[name+'_y']
        del df[name+'_x']
        del df[name+'_y']
        
def transpose_frame(df_results, df_metrics):    
    tc_peak_perf_flops = 125*10**12

    # Cleanup: group metric values
    metricdf = df_metrics.copy()
    metricdf = metricdf.groupby(selectkeys).sum().reset_index()
    metricdf = metricdf[['Kernel Name', 'Calls', 'Metric Name', 'Metric Type', 'Metric Value']]
    metricdf.sort_values(by=resultkeys)
    
    # Raise if data not consistent
    if df_results.empty:
        df_results = metricdf[resultkeys].drop_duplicates().reset_index(drop=True).sort_values(by=resultkeys).copy()
        #display(df_results)
    else:
        tmpMetricdf = metricdf[resultkeys].drop_duplicates().reset_index(drop=True).sort_values(by=resultkeys).copy()
        tmpResultdf = df_results[resultkeys].drop_duplicates().reset_index(drop=True).sort_values(by=resultkeys).copy()
        if not tmpMetricdf.equals(tmpResultdf):
            print("\n##### Data in the current DF")
            df = tmpMetricdf.merge(tmpResultdf, how = 'outer' ,indicator=True).loc[lambda x : x['_merge']=='left_only']
            print(df)
            print("##### Data in the output DF")
            df = tmpResultdf.merge(tmpMetricdf, how = 'outer' ,indicator=True).loc[lambda x : x['_merge']=='left_only']
            print(df)
            raise ValueError("Data not consistent")

    metriclist = metricdf['Metric Name'].unique()
    

    ####### Get number of FLOPs
    
    ### FMA FLOPs = number of FMA instructions x 2
    metricdf.loc[metricdf["Metric Name"].str.contains("fma_pred_on"), ["Metric Value"]] *= 2
    
    
    ### FP32 FLOPs
    metrics = ['smsp__sass_thread_inst_executed_op_fadd_pred_on',
               'smsp__sass_thread_inst_executed_op_ffma_pred_on',
               'smsp__sass_thread_inst_executed_op_fmul_pred_on']
    if any(m in metriclist for m in metrics):
        tmpdf = metricdf.loc[ metricdf["Metric Name"].isin(metrics), resultkeys+["Metric Value"] ].copy()
        metricname = 'FP32 FLOPs'
        tmpdf = tmpdf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        # Merge current df with the result df
        df_results = df_results.merge(tmpdf, on=resultkeys, how="outer")
        # Sum up if exits and remove duplicates
        merge_results(df_results, metricname)
        #print(df_results)
    
    
    ### FP16 FLOPs
    metrics = ['smsp__sass_thread_inst_executed_op_hadd_pred_on',
               'smsp__sass_thread_inst_executed_op_hfma_pred_on',
               'smsp__sass_thread_inst_executed_op_hmul_pred_on']
    if any(m in metriclist for m in metrics):
        tmpdf = metricdf.loc[ metricdf["Metric Name"].isin(metrics), resultkeys+["Metric Value"] ].copy()
        metricname = 'FP16 FLOPs'
        tmpdf = tmpdf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        # Merge current df with the result df
        df_results = df_results.merge(tmpdf, on=resultkeys, how="outer")
        # Sum up if exits and remove duplicates
        merge_results(df_results, metricname)
        #print(df_results)
    
    
    ### TC FLOP Rates
    if any("tensor_op_hmma.avg.pct_of_peak" in m for m in metriclist):
        tmpdf = metricdf.loc[ metricdf["Metric Name"].str.contains("tensor_op_hmma.avg.pct_of_peak"), resultkeys+["Metric Value"] ].reset_index().copy()
        tmpdf["Utilization"] = 0.01 * tmpdf["Metric Value"] / tmpdf['Calls']
        metricname = "TC FLOP Rates"
        tmpdf[metricname] = tc_peak_perf_flops * tmpdf["Utilization"]
        # merge
        df_results = df_results.merge(tmpdf[resultkeys+[metricname]], on=resultkeys, how="outer")
        merge_results(df_results, metricname)
        #print(df_results)
    
    ### Total FLOPs
    #metricdf["FLOPs Avg"] = metricdf["FP32 FLOPs Avg"] + metricdf["FP16 FLOPs Avg"] + metricdf["TC FLOPs Avg"] #+ metricdf["FP64 FLOPs"]

    
    ### FLOPs fractions
    #metricdf["FP64 FLOPs Fraction"] = metricdf["FP64 FLOPs"]/metricdf["FLOPs"]
    #metricdf["FP32 FLOPs Fraction Avg"] = metricdf["FP32 FLOPs Avg"]/metricdf["FLOPs Avg"]
    #metricdf["FP16 FLOPs Fraction Avg"] = metricdf["FP16 FLOPs Avg"]/metricdf["FLOPs Avg"]
    #metricdf["TC FLOPs Fraction Avg"]   = metricdf["TC FLOPs Avg"]/metricdf["FLOPs Avg"]
    #print(metricdf)


    ####### Get timing information

    ### CUDA Time
    if any("smsp__cycles_elapsed" in m for m in metriclist):
        # get cycles
        metricname = "CUDA Cycles"
        cyclesdf = metricdf.loc[(metricdf["Metric Name"]=="smsp__cycles_elapsed") & (metricdf["Metric Type"]=="total"),
                               resultkeys+["Metric Value"]].reset_index(drop=True).rename(columns={"Metric Value": metricname}).copy()
        df_results = df_results.merge(cyclesdf, on=resultkeys, how="outer")
        # get rates
        metricname = "CUDA Rates"
        ratesdf = metricdf.loc[(metricdf["Metric Name"]=="smsp__cycles_elapsed") & (metricdf["Metric Type"]=="rate"),
                               resultkeys+["Metric Value"]].reset_index(drop=True).rename(columns={"Metric Value": metricname}).copy()        
        df_results = df_results.merge(ratesdf[resultkeys+[metricname]], on=resultkeys, how="outer")
        #print(df_results)
    
    
    ### Tensor Core Time
    if any("smsp__pipe_tensor_op_hmma_cycles_active" in m for m in metriclist):
        # get cycles
        metricname = "TC Cycles"
        cyclesdf = metricdf.loc[(metricdf["Metric Name"]=="smsp__pipe_tensor_op_hmma_cycles_active") & (metricdf["Metric Type"]=="total"),
                               resultkeys+["Metric Value"]].reset_index(drop=True).rename(columns={"Metric Value": metricname}).copy()
        df_results = df_results.merge(cyclesdf, on=resultkeys, how="outer")
        # get rates
        metricname = "TC Rates"
        ratesdf = metricdf.loc[(metricdf["Metric Name"]=="smsp__pipe_tensor_op_hmma_cycles_active") & (metricdf["Metric Type"]=="rate"),
                               resultkeys+["Metric Value"]].reset_index(drop=True).rename(columns={"Metric Value": metricname}).copy()        
        df_results = df_results.merge(ratesdf[resultkeys+[metricname]], on=resultkeys, how="outer")
        #print(df_results)
        


    ####### Get number of bytes

    ### Shared transactions
    #project out
    if any("l1tex__data_pipe_lsu_wavefronts_mem_shared_op" in m for m in metriclist):
        shareddf = metricdf.loc[metricdf["Metric Name"].str.contains("l1tex__data_pipe_lsu_wavefronts_mem_shared_op"), resultkeys+["Metric Value"] ].copy()
        metricname = 'Shared Transactions'
        shareddf = shareddf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        df_results = df_results.merge(shareddf, on=resultkeys, how="outer")
        merge_results(df_results, metricname)

    ### L1 atomic transactions
    # project out
    metrics = ['l1tex__t_set_accesses_pipe_lsu_mem_global_op_atom',
               'l1tex__t_set_accesses_pipe_lsu_mem_global_op_red',
               'l1tex__t_set_accesses_pipe_tex_mem_surface_op_atom',
               'l1tex__t_set_accesses_pipe_tex_mem_surface_op_red']
    if any(m in metriclist for m in metrics):
        tmpdf = metricdf.loc[ metricdf["Metric Name"].isin(metrics), resultkeys+["Metric Value"] ].copy()
        metricname = 'L1 Atomic Transactions'
        tmpdf = tmpdf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        # Merge current df with the result df
        df_results = df_results.merge(tmpdf, on=resultkeys, how="outer")
        # Sum up if exits and remove duplicates
        merge_results(df_results, metricname)
        #print(df_results)
        
    ### Local transactions
    #project out
    if any("l1tex__t_sectors_pipe_lsu_mem_local_op" in m for m in metriclist):
        localdf = metricdf.loc[metricdf["Metric Name"].str.contains("l1tex__t_sectors_pipe_lsu_mem_local_op"), resultkeys+["Metric Value"] ].copy()
        metricname = 'Local Transactions'
        localdf = localdf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        df_results = df_results.merge(localdf, on=resultkeys, how="outer")
            
    ### Global transactions
    #project out
    if any("l1tex__t_sectors_pipe_lsu_mem_global_op" in m for m in metriclist):
        globaldf = metricdf.loc[metricdf["Metric Name"].str.contains("l1tex__t_sectors_pipe_lsu_mem_global_op"), resultkeys+["Metric Value"] ].copy()
        metricname = 'Global Transactions'
        globaldf = globaldf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        df_results = df_results.merge(globaldf, on=resultkeys, how="outer")
        
    
    ### L2 atomic & reduction
    metricdf.loc[(metricdf["Metric Name"].str.contains("lts__t_sectors_op")) & (metricdf["Metric Type"]=="total"), ["Metric Value"]] *= 2
    
    ### L2 transactions
    #project out
    if any("lts__t_sectors_op" in m for m in metriclist):
        l2df = metricdf.loc[metricdf["Metric Name"].str.contains("lts__t_sectors_op"), resultkeys+["Metric Value"] ].copy()
        metricname = 'L2 Transactions'
        l2df = l2df.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        df_results = df_results.merge(l2df, on=resultkeys, how="outer")
        merge_results(df_results, metricname)
        
    
    ### DRAM transactions
    #project out
    if any("dram__sectors" in m for m in metriclist):
        dramdf = metricdf.loc[metricdf["Metric Name"].str.contains("dram__sectors"), resultkeys+["Metric Value"] ].copy()
        metricname = 'DRAM Transactions'
        dramdf = dramdf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        display(dramdf)
        df_results = df_results.merge(dramdf, on=resultkeys, how="outer")
        
        
    ### SYSMEM transactions
    #project out
    if any("lts__t_sectors_aperture_sysmem_op" in m for m in metriclist):
        sysmemdf = metricdf.loc[metricdf["Metric Name"].str.contains("lts__t_sectors_aperture_sysmem_op"), resultkeys+["Metric Value"] ].copy()
        metricname = 'SYSMEM Transactions'
        sysmemdf = sysmemdf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        df_results = df_results.merge(sysmemdf, on=resultkeys, how="outer")
    
    
    return df_results
    
    ####### Clean up and return:
    del metricdf["Metric Value"]
    del metricdf["Metric Name"]
    del metricdf["Metric Type"]
    #del metricdf["Invocations"]
    metricdf.drop_duplicates(keep = 'first', inplace = True)
    

    ### Get performance
    metricdf["Performance GFlop/s"]      = metricdf["FLOPs Avg"]      / (metricdf["CUDA Time Avg"]*10**9)
    metricdf["FP32 Performance GFlop/s"] = metricdf["FP32 FLOPs Avg"] / (metricdf["CUDA Time Avg"]*10**9)
    metricdf["FP16 Performance GFlop/s"] = metricdf["FP16 FLOPs Avg"] / (metricdf["CUDA Time Avg"]*10**9)
    metricdf["TC Performance GFlop/s"]   = metricdf["TC FLOPs Avg"]   / (metricdf["TC Time Avg"]*10**9)

    
    ### Get AI
    # L1
    metricdf["L1 AI"]        = metricdf["FLOPs Avg"]      / metricdf["L1 Bytes Avg"]
    metricdf["FP32 L1 AI"]   = metricdf["FP32 FLOPs Avg"] / metricdf["L1 Bytes Avg"]
    metricdf["FP16 L1 AI"]   = metricdf["FP16 FLOPs Avg"] / metricdf["L1 Bytes Avg"]
    metricdf["TC L1 AI"]     = metricdf["TC FLOPs Avg"]   / metricdf["L1 Bytes Avg"]
    # L2
    metricdf["L2 AI"]        = metricdf["FLOPs Avg"]      / metricdf["L2 Bytes Avg"]
    metricdf["FP32 L2 AI"]   = metricdf["FP32 FLOPs Avg"] / metricdf["L2 Bytes Avg"]
    metricdf["FP16 L2 AI"]   = metricdf["FP16 FLOPs Avg"] / metricdf["L2 Bytes Avg"]
    metricdf["TC L2 AI"]     = metricdf["TC FLOPs Avg"]   / metricdf["L2 Bytes Avg"]
    # DRAM
    metricdf["DRAM AI"]      = metricdf["FLOPs Avg"]      / metricdf["DRAM Bytes Avg"]
    metricdf["FP32 DRAM AI"] = metricdf["FP32 FLOPs Avg"] / metricdf["DRAM Bytes Avg"]
    metricdf["FP16 DRAM AI"] = metricdf["FP16 FLOPs Avg"] / metricdf["DRAM Bytes Avg"]
    metricdf["TC DRAM AI"]   = metricdf["TC FLOPs Avg"]   / metricdf["DRAM Bytes Avg"]


    ### Cleanup
    metricdf.sort_values(by=selectkeys).reset_index(drop=True, inplace=True)
    #print(metricdf[['CUDA Time Avg', 'TC Time Avg']])
    
    return metricdf

# Import Data

In [156]:
#get all the files
files = []
for datadir in datadirs:
    files += [ os.path.join(datadir,x) for x in os.listdir(datadir) if ((os.path.splitext(x)[-1] == ".log"))]

#recs
records = []

#build feature list:
for path in files:
    
    #filename
    file = os.path.basename(path)
    
    #path
    path = os.path.dirname(path)
    
    #splitup
    splt = file.split(".")
    
    prefix = ".".join(splt[0:-1])
    
    #append to records
    records.append({"prefix": prefix, "file": os.path.join(path, file)})

#put in df
recorddf = pd.DataFrame(records).sort_values(["prefix"])
#with pd.option_context('display.max_rows', None, 'display.max_columns', None):
display(recorddf)

Unnamed: 0,prefix,file
2,cuda_time,../data/cuda_time.log
9,dram,../data/dram.log
0,ffma_flops,../data/ffma_flops.log
7,hp_flops,../data/hp_flops.log
4,l1_atom,../data/l1_atom.log
3,l1_red,../data/l1_red.log
6,shared,../data/shared.log
8,sp_flops,../data/sp_flops.log
1,tc_flops,../data/tc_flops.log
5,tc_time,../data/tc_time.log


In [157]:
profiledf = pd.DataFrame(columns=resultkeys)

#metrics
for fin in recorddf["file"]:
#for fin in ['../data/time.log']:
    #project frame
    metricdf = pd.read_csv(fin)
    
    #fuse read/write metrics together:
    unique_metrics = metricdf["Metric Name"].unique()
    unique_metrics = set([x.replace(".sum","").replace("_write","").replace("_read","").replace("_ld","").replace("_st","") for x in unique_metrics])
    #add the metric type
    metricdf["Metric Type"] = "total"
    metricdf["Calls"] = 1
    #read
    metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_read"), "Metric Type" ] = "read"
    metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_ld"), "Metric Type" ] = "read"
    #write
    metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_write"), "Metric Type" ] = "write"
    metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_st"), "Metric Type" ] = "write"
    #rate
    metricdf.loc[ metricdf[ "Metric Name" ].str.contains(".per_second"), "Metric Type" ] = "rate"

    for metric in unique_metrics:
        metricdf.loc[ metricdf[ "Metric Name"].str.startswith(metric), "Metric Name" ] = metric
    #cleanups
    tmpdf = metricdf[["Kernel Name", "Calls", "Metric Name", "Metric Type", "Metric Value"]]
    #print(tmpdf)

    #compute the profile
    profiledf = transpose_frame(profiledf,tmpdf)
    #df_profiles.append(profiledf)

Unnamed: 0,Kernel Name,Calls,DRAM Transactions
0,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_128x1...,2,4569197
1,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_128x6...,2,900782
2,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_256x1...,4,882535
3,Volta_hmma_implicit_gemm_wgrad_fp32_nhwc_128x1...,4,1280723
4,Volta_hmma_implicit_gemm_wgrad_fp32_nhwc_128x6...,1,22003005
...,...,...,...
91,volta_fp16_sgemm_fp16_32x32_sliced1x4_nt,1,14423539
92,volta_s884cudnn_fp16_128x128_ldg8_wgrad_exp_in...,1,20945976
93,volta_s884cudnn_fp16_128x128_ldg8_wgrad_idx_ex...,7,17649723
94,volta_s884cudnn_fp16_256x128_ldg8_wgrad_idx_ex...,5,184473192


In [158]:
display(profiledf)
#tmplist = ['Name', 'Invocations', 'Pass', 'L1 Transactions Avg', 'L2 Transactions Avg', 'DRAM Transactions Avg']
#display(profiledf[tmplist])

Unnamed: 0,Kernel Name,Calls,CUDA Cycles,CUDA Rates,DRAM Transactions,FP16 FLOPs,L1 Atomic Transactions,Shared Transactions,FP32 FLOPs,TC FLOP Rates,TC Cycles,TC Rates
0,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_128x1...,2,7.619451e+08,8.363713e+11,4569197,0,0,94010988,56623104,9.594375e+13,5.733089e+08,6.279286e+11
1,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_128x6...,2,8.214659e+07,8.306355e+11,900782,0,0,6712990,14155776,5.355625e+13,3.185050e+07,3.220133e+11
2,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_256x1...,4,1.461480e+08,1.667595e+12,882535,0,0,9782171,7077888,8.161563e+13,6.370099e+07,7.198852e+11
3,Volta_hmma_implicit_gemm_wgrad_fp32_nhwc_128x1...,4,1.804578e+08,1.663145e+12,1280723,0,540,10690909,13565952,5.822188e+13,6.370099e+07,5.886100e+11
4,Volta_hmma_implicit_gemm_wgrad_fp32_nhwc_128x6...,1,1.029495e+09,4.191045e+11,22003005,0,279,87788488,3465216,6.655000e+13,5.096079e+08,2.086477e+11
...,...,...,...,...,...,...,...,...,...,...,...,...
91,volta_fp16_sgemm_fp16_32x32_sliced1x4_nt,1,7.027400e+08,4.197608e+11,14423539,0,0,56761763,14497857536,0.000000e+00,0.000000e+00,0.000000e+00
92,volta_s884cudnn_fp16_128x128_ldg8_wgrad_exp_in...,1,7.401358e+08,4.191068e+11,20945976,0,0,93227925,188743680,9.648750e+13,5.662310e+08,3.179701e+11
93,volta_s884cudnn_fp16_128x128_ldg8_wgrad_idx_ex...,7,1.361048e+09,2.925763e+12,17649723,0,0,156792337,283115520,8.827321e+13,9.555149e+08,1.898086e+12
94,volta_s884cudnn_fp16_256x128_ldg8_wgrad_idx_ex...,5,1.628665e+10,2.096957e+12,184473192,0,0,2007529644,132120576,1.126075e+14,1.350461e+10,1.686956e+12


# Compute AI Results

In [159]:
### CUDA Time
profiledf['CUDA Time'] = profiledf['CUDA Cycles'] / profiledf['CUDA Rates']
del profiledf['CUDA Cycles']
del profiledf['CUDA Rates']

### TC Time
profiledf['TC Time'] = profiledf['TC Cycles'] / profiledf['TC Rates']
profiledf['TC Time'] = profiledf['TC Time']
del profiledf['TC Cycles']
del profiledf['TC Rates']

### TC FLOPs
profiledf['TC FLOPs'] = profiledf['TC FLOP Rates'] * profiledf['TC Time']
del profiledf['TC FLOP Rates']

#df = profiledf[profiledf["FP16 FLOPs"]!=0]
#print(df)

### DRAM Bytes
profiledf['DRAM Bytes'] = profiledf['DRAM Transactions'] * 32.
del profiledf['DRAM Transactions']

profiledf = profiledf.fillna(0.)
print("Total invocations: {}".format(profiledf['Calls'].sum()))
display(profiledf)

Total invocations: 3957


Unnamed: 0,Kernel Name,Calls,FP16 FLOPs,L1 Atomic Transactions,Shared Transactions,FP32 FLOPs,CUDA Time,TC Time,TC FLOPs,DRAM Bytes
0,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_128x1...,2,0,0,94010988,56623104,0.000911,0.000913,8.759819e+10,1.462143e+08
1,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_128x6...,2,0,0,6712990,14155776,0.000099,0.000099,5.297276e+09,2.882502e+07
2,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_256x1...,4,0,0,9782171,7077888,0.000088,0.000088,7.221980e+09,2.824112e+07
3,Volta_hmma_implicit_gemm_wgrad_fp32_nhwc_128x1...,4,0,540,10690909,13565952,0.000109,0.000108,6.300931e+09,4.098314e+07
4,Volta_hmma_implicit_gemm_wgrad_fp32_nhwc_128x6...,1,0,279,87788488,3465216,0.002456,0.002442,1.625438e+11,7.040962e+08
...,...,...,...,...,...,...,...,...,...,...
91,volta_fp16_sgemm_fp16_32x32_sliced1x4_nt,1,0,0,56761763,14497857536,0.001674,0.000000,0.000000e+00,4.615532e+08
92,volta_s884cudnn_fp16_128x128_ldg8_wgrad_exp_in...,1,0,0,93227925,188743680,0.001766,0.001781,1.718219e+11,6.702712e+08
93,volta_s884cudnn_fp16_128x128_ldg8_wgrad_idx_ex...,7,0,0,156792337,283115520,0.000465,0.000503,4.443759e+10,5.647911e+08
94,volta_s884cudnn_fp16_256x128_ldg8_wgrad_idx_ex...,5,0,0,2007529644,132120576,0.007767,0.008005,9.014583e+11,5.903142e+09


In [160]:
#sum over all kernels
combinedselectkeys = ["Precision", "Network Name", "Data Format", "Input Shape", "Kernel Shape", "Stride Size", \
                     "Batch Size", "Pass"]

#copy profiledf
combineddf = profiledf.copy()

#get the aggregated performance, including all kernels:
#compute weights: multiply all measures by the number of invocations
weighted = True
if weighted:
    #first, get all the names of metrics which need to be weighted
    metrics = [x for x in combineddf.columns if "Avg" in x]
    for metric in metrics:
        combineddf[metric] *= combineddf["Invocations"]

#sum up
combineddf = combineddf.groupby(by=combinedselectkeys).sum()#.reset_index()


#the flop fractions need to be recomputed
combineddf["FP32 FLOPs Fraction Avg"] = combineddf["FP32 FLOPs Avg"] / combineddf["FLOPs Avg"]
combineddf["FP16 FLOPs Fraction Avg"] = combineddf["FP16 FLOPs Avg"] / combineddf["FLOPs Avg"]
combineddf["TC FLOPs Fraction Avg"]   = combineddf["TC FLOPs Avg"]   / combineddf["FLOPs Avg"]

### Get performance
combineddf["Performance GFlop/s"]      = combineddf["FLOPs Avg"]      / (combineddf["CUDA Time Avg"]*10**9)
combineddf["FP32 Performance GFlop/s"] = combineddf["FP32 FLOPs Avg"] / (combineddf["CUDA Time Avg"]*10**9)
combineddf["FP16 Performance GFlop/s"] = combineddf["FP16 FLOPs Avg"] / (combineddf["CUDA Time Avg"]*10**9)
combineddf["TC Performance GFlop/s"]   = combineddf["TC FLOPs Avg"]   / (combineddf["TC Time Avg"]*10**9)


### Get AI
# L1
combineddf["L1 AI"]        = combineddf["FLOPs Avg"]      / combineddf["L1 Bytes Avg"]
combineddf["FP32 L1 AI"]   = combineddf["FP32 FLOPs Avg"] / combineddf["L1 Bytes Avg"]
combineddf["FP16 L1 AI"]   = combineddf["FP16 FLOPs Avg"] / combineddf["L1 Bytes Avg"]
combineddf["TC L1 AI"]     = combineddf["TC FLOPs Avg"]   / combineddf["L1 Bytes Avg"]
# L2
combineddf["L2 AI"]        = combineddf["FLOPs Avg"]      / combineddf["L2 Bytes Avg"]
combineddf["FP32 L2 AI"]   = combineddf["FP32 FLOPs Avg"] / combineddf["L2 Bytes Avg"]
combineddf["FP16 L2 AI"]   = combineddf["FP16 FLOPs Avg"] / combineddf["L2 Bytes Avg"]
combineddf["TC L2 AI"]     = combineddf["TC FLOPs Avg"]   / combineddf["L2 Bytes Avg"]
# DRAM
combineddf["DRAM AI"]      = combineddf["FLOPs Avg"]      / combineddf["DRAM Bytes Avg"]
combineddf["FP32 DRAM AI"] = combineddf["FP32 FLOPs Avg"] / combineddf["DRAM Bytes Avg"]
combineddf["FP16 DRAM AI"] = combineddf["FP16 FLOPs Avg"] / combineddf["DRAM Bytes Avg"]
combineddf["TC DRAM AI"]   = combineddf["TC FLOPs Avg"]   / combineddf["DRAM Bytes Avg"]

combineddf.sort_values(by=combinedselectkeys).reset_index(drop=True, inplace=True)

KeyError: 'Precision'

In [None]:
#display(combineddf.columns)
display(combineddf[['CUDA Time Avg', 'FP32 FLOPs Avg', 'FP16 FLOPs Avg', 'TC FLOPs Avg', 'FLOPs Avg', 'L1 Transactions Avg', 'L2 Transactions Avg', 'DRAM Transactions Avg']])
# combineddf.keys
# combineddf.columns

# combineddf['Name']
# combineddf.iloc[0,1]
# combineddf.iloc[2]

In [None]:
#combineddf = combineddf.reset_index()
#seldf = combineddf[ (combineddf["Network Name"]=="ResNet50-2") &\
#           (combineddf["Input Shape"]=="112x112x64") &\
#           (combineddf["Precision"]=="FP32")]
#seldf
#combineddf[["FP32 L2 AI", "FP32 L1 AI"]]
combineddf[["L2 AI", "L1 AI", "DRAM AI", "FP32 DRAM AI", "FP16 DRAM AI"]]

# Export Data

In [None]:
profiledf.to_csv(os.path.join(outputdir,"full_profile.csv"))
combineddf.to_csv(os.path.join(outputdir,"combined_profile.csv"))