In [58]:
import subprocess as sp
import numpy as np
import pandas as pd
from io import StringIO
import os
import re
import shutil

from utils import *

In [59]:
#global parameters
cudadir = "/usr/common/software/cuda/10.1.243"

In [60]:
#input and output dirs
datadirs = ["../scripts/tf_cnn_kernels_2/runs/345569"]
#datadirs = ["../data/tf_2.0b/nsight"]
#outputdir = "./results/tf_2.0b/results_NHWC"

# Functions

In [61]:
def transpose_frame(df_times, df_metrics):
    #Copy the profile frame to make sure not to overwrite it and potentially read it in again if we screwed it up
    selectkeys = ["Precision", "Network Name", "Data Format", "Input Shape", "Kernel Shape", "Stride Size", "Batch Size", "Pass", "Name"]
    tc_peak_perf_flops = 125*10**12

    #just pick the gpu activities for now
    profiledf = df_times.copy()
    profiledf.sort_values(by=selectkeys,inplace=True)
    profiledf.reset_index(drop=True, inplace=True)

    #remove the calibration
    alignkeys = selectkeys[:-2]
    profiledf = profiledf.groupby(alignkeys).apply(lambda x: x[ (~x["Name"].isin(x.loc[x["Pass"].str.startswith("calibrate"), "Name"].values)) ])
    profiledf.reset_index(drop=True, inplace=True)
    
    #as metricdf use df_summary
    metricdf = df_metrics.copy()
    metricdf.sort_values(by=selectkeys,inplace=True)
    metricdf.reset_index(drop=True, inplace=True)

    #now, get the AI-relevant stuff:
    #FLOPS 32
    metrics = ['smsp__sass_thread_inst_executed_op_fadd_pred_on',
               'smsp__sass_thread_inst_executed_op_ffma_pred_on',
               'smsp__sass_thread_inst_executed_op_fmul_pred_on']
    tmpdf = metricdf.loc[ metricdf["Metric Name"].isin(metrics), selectkeys+["Metric Value"] ].copy()
    tmpdf = tmpdf.groupby(selectkeys).sum().reset_index().rename(columns={"Metric Value": "FP32 Flops Avg"})
    #add to timings
    print("\nName from Nsight Systems:\n", profiledf["Name"])
    print("\nName from Nsight Compute\n", tmpdf["Name"])
    profiledf = profiledf.merge(tmpdf[selectkeys+["FP32 Flops Avg"]], on=selectkeys, how="inner")
    
    return profiledf
    
    
    #monitor that: if that changes be warned
    numrows = profiledf.shape[0]

    #FLOPS 16 non-TC
    flopdf = metricdf[ metricdf["Metric Name"].str.contains("flop_count_hp") ].sort_values(selectkeys).rename(columns={"Avg": "FP16 non-TC Flops Avg"})
    #add to timings
    mergedf = profiledf.merge(flopdf[selectkeys+["FP16 non-TC Flops Avg"]], on=selectkeys, how="inner")

    #check
    if mergedf.shape[0] != numrows:
        #print(profiledf, flopdf)
        raise ValueError("Something went wrong, check consistency of inputs")
    else:
        profiledf = mergedf
    
    
    #FLOPS TC
    flopdf = metricdf[ metricdf["Metric Name"].str.contains("tensor_precision_fu_utilization") ].sort_values(selectkeys).rename(columns={"Avg": "TC Flops Avg"})
    tmpdf = flopdf.merge(profiledf, how="inner", on=selectkeys).sort_values(selectkeys)
    tmpdf["TC Flops Avg"] *= tc_peak_perf_flops/10. * tmpdf["Time Avg"]
    #add to timings
    mergedf = profiledf.merge(tmpdf[selectkeys+["TC Flops Avg"]], on=selectkeys, how="inner")

    #check
    if mergedf.shape[0] != numrows:
        raise ValueError("Something went wrong, check consistency of inputs")
    else:
        profiledf = mergedf

    
    #fill NA values here
    profiledf.fillna(0., inplace=True)

    #FLOPS FP16: add TC and non-TC FP16 flops together
    profiledf["FP16 Flops Avg"] = profiledf["TC Flops Avg"] + profiledf["FP16 non-TC Flops Avg"]

    #total flops
    profiledf["Flops Avg"] = profiledf["FP16 Flops Avg"] + profiledf["FP32 Flops Avg"]

    #flop fractions
    profiledf["TC Flops Fraction Avg"] = profiledf["TC Flops Avg"]/profiledf["Flops Avg"]
    profiledf["FP16 Flops Fraction Avg"] = profiledf["FP16 Flops Avg"]/profiledf["Flops Avg"]
    profiledf["FP16 non-TC Flops Fraction Avg"] = profiledf["FP16 non-TC Flops Avg"]/profiledf["Flops Avg"]
    profiledf["FP32 Flops Fraction Avg"] = profiledf["FP32 Flops Avg"]/profiledf["Flops Avg"]


    #shared
    #project out
    shareddf = metricdf[ metricdf["Metric Name"].str.contains("shared") ].sort_values(selectkeys)
    #get reads and writes
    sharedreadsdf = shareddf.loc[(shareddf["Metric Name"]=="shared_transactions") & (shareddf["Metric Mode"]=="read"), selectkeys+["Avg"]]
    sharedwritesdf = shareddf.loc[(shareddf["Metric Name"]=="shared_transactions") & (shareddf["Metric Mode"]=="write"), selectkeys+["Avg"]]
    #combine
    shareddf = sharedwritesdf.merge(sharedreadsdf, on=selectkeys, how="outer").fillna(0.)
    shareddf["Shared Transactions Avg"] = shareddf["Avg_x"] + shareddf["Avg_y"]
    #add to timings
    mergedf = profiledf.merge(shareddf[selectkeys+["Shared Transactions Avg"]], on=selectkeys, how="inner")

    #check
    if mergedf.shape[0] != numrows:
        #get the complement:
        print(profiledf[ ~profiledf.index.isin(mergedf.index) ])
        raise ValueError("Something went wrong, check consistency of inputs")
    else:
        profiledf = mergedf
    
    
    #atomic
    #project out
    atomicdf = metricdf[ metricdf["Metric Name"] == "atomic_transactions" ].sort_values(selectkeys)
    #get reads and writes
    atomicdf = atomicdf[selectkeys+["Avg"]].rename(columns={"Avg": "Atomic Transactions Avg"})
    #add to timings
    mergedf = profiledf.merge(atomicdf[selectkeys+["Atomic Transactions Avg"]], on=selectkeys, how="inner")
    
    #check
    if mergedf.shape[0] != numrows:
        #get the complement:
        print(profiledf[ ~profiledf.index.isin(mergedf.index) ])
        raise ValueError("Something went wrong, check consistency of inputs")
    else:
        profiledf = mergedf

    
    #L1
    #project out
    l1df = metricdf[ (metricdf["Metric Name"].str.contains("gst_")) | (metricdf["Metric Name"].str.contains("gld_")) ].sort_values(selectkeys)
    #get reads and writes
    l1readsdf = l1df.loc[(l1df["Metric Name"]=="gld_transactions"), selectkeys+["Avg"]]
    l1writesdf = l1df.loc[(l1df["Metric Name"]=="gst_transactions"), selectkeys+["Avg"]]
    #combine
    l1df = l1writesdf.merge(l1readsdf, on=selectkeys, how="outer").fillna(0.)
    l1df["L1 Transactions Avg"] = l1df["Avg_x"] + l1df["Avg_y"]
    #add to timings
    mergedf = profiledf.merge(l1df[selectkeys+["L1 Transactions Avg"]], on=selectkeys, how="inner")

    #check
    if mergedf.shape[0] != numrows:
        print(profiledf, l1df)
        raise ValueError("Something went wrong, check consistency of inputs")
    else:
        profiledf = mergedf

    
    #L2
    #project out
    l2df = metricdf[ metricdf["Metric Name"].str.contains("l2") ].sort_values(selectkeys)
    #get reads and writes
    l2readsdf = l2df.loc[(l2df["Metric Name"]=="l2_transactions") & (l2df["Metric Mode"]=="read"), selectkeys+["Avg"]]
    l2writesdf = l2df.loc[(l2df["Metric Name"]=="l2_transactions") & (l2df["Metric Mode"]=="write"), selectkeys+["Avg"]]
    #combine
    l2df = l2writesdf.merge(l2readsdf, on=selectkeys, how="outer").fillna(0.)
    l2df["L2 Transactions Avg"] = l2df["Avg_x"] + l2df["Avg_y"]
    #add to timings
    mergedf = profiledf.merge(l2df[selectkeys+["L2 Transactions Avg"]], on=selectkeys, how="inner")

    #check
    if mergedf.shape[0] != numrows:
        print(profiledf, l2df)
        raise ValueError("Something went wrong, check consistency of inputs")
    else:
        profiledf = mergedf
    
    
    #DRAM
    #project out
    dramdf = metricdf[ metricdf["Metric Name"].str.contains("dram") ].sort_values(selectkeys)
    #get reads and writes
    dramreadsdf = dramdf.loc[(dramdf["Metric Name"]=="dram_transactions") & (dramdf["Metric Mode"]=="read"), selectkeys+["Avg"]]
    dramwritesdf = dramdf.loc[(dramdf["Metric Name"]=="dram_transactions") & (dramdf["Metric Mode"]=="write"), selectkeys+["Avg"]]
    #combine
    dramdf = dramwritesdf.merge(dramreadsdf, on=selectkeys, how="outer").fillna(0.)
    dramdf["DRAM Transactions Avg"] = dramdf["Avg_x"] + dramdf["Avg_y"]
    #add to timings
    mergedf = profiledf.merge(dramdf[selectkeys+["DRAM Transactions Avg"]], on=selectkeys, how="inner")

    #check
    if mergedf.shape[0] != numrows:
        print(profiledf, dramdf)
        raise ValueError("Something went wrong, check consistency of inputs")
    else:
        profiledf = mergedf
    

    #SYSMEM
    #project out
    sysmemdf = metricdf[ metricdf["Metric Name"].str.contains("sysmem") ].sort_values(selectkeys)
    #get reads and writes
    sysmemreadsdf = sysmemdf.loc[(sysmemdf["Metric Name"]=="sysmem_transactions") & (sysmemdf["Metric Mode"]=="read"), selectkeys+["Avg"]]
    sysmemwritesdf = sysmemdf.loc[(sysmemdf["Metric Name"]=="sysmem_transactions") & (sysmemdf["Metric Mode"]=="write"), selectkeys+["Avg"]]
    #combine
    sysmemdf = sysmemwritesdf.merge(sysmemreadsdf, on=selectkeys, how="outer").fillna(0.)
    sysmemdf["Sysmem Transactions Avg"] = sysmemdf["Avg_x"] + sysmemdf["Avg_y"]
    #add to timings
    mergedf = profiledf.merge(sysmemdf[selectkeys+["Sysmem Transactions Avg"]], on=selectkeys, how="inner")

    #check
    if mergedf.shape[0] != numrows:
        print(profiledf, sysmemdf)
        raise ValueError("Something went wrong, check consistency of inputs")
    else:
        profiledf = mergedf
    

    #clean up and sort:
    profiledf.sort_values(selectkeys).reset_index(drop=True, inplace=True)

    #get performance first
    profiledf["Performance GFlop/s"] = profiledf["Flops Avg"]/(profiledf["Time Avg"]*10**9)
    profiledf["FP32 Performance GFlop/s"] = profiledf["FP32 Flops Avg"]/(profiledf["Time Avg"]*10**9)
    profiledf["FP16 Performance GFlop/s"] = profiledf["FP16 Flops Avg"]/(profiledf["Time Avg"]*10**9)
    profiledf["TC Performance GFlop/s"] = profiledf["TC Flops Avg"]/(profiledf["Time Avg"]*10**9)

    #get AI:
    #L1 is L1+shared
    profiledf["L1 AI"] = profiledf["Flops Avg"]/(32.*(profiledf["L1 Transactions Avg"]+profiledf["Shared Transactions Avg"]+profiledf["Atomic Transactions Avg"]))
    profiledf["FP32 L1 AI"] = profiledf["FP32 Flops Avg"]/(32.*(profiledf["L1 Transactions Avg"]+profiledf["Shared Transactions Avg"]+profiledf["Atomic Transactions Avg"]))
    profiledf["FP16 L1 AI"] = profiledf["FP16 Flops Avg"]/(32.*(profiledf["L1 Transactions Avg"]+profiledf["Shared Transactions Avg"]+profiledf["Atomic Transactions Avg"]))
    #L2
    profiledf["L2 AI"] = profiledf["Flops Avg"]/(32.*profiledf["L2 Transactions Avg"])
    profiledf["FP32 L2 AI"] = profiledf["FP32 Flops Avg"]/(32.*profiledf["L2 Transactions Avg"])
    profiledf["FP16 L2 AI"] = profiledf["FP16 Flops Avg"]/(32.*profiledf["L2 Transactions Avg"])
    #DRAM
    profiledf["DRAM AI"] = profiledf["Flops Avg"]/(32.*profiledf["DRAM Transactions Avg"])
    profiledf["FP32 DRAM AI"] = profiledf["FP32 Flops Avg"]/(32.*profiledf["DRAM Transactions Avg"])
    profiledf["FP16 DRAM AI"] = profiledf["FP16 Flops Avg"]/(32.*profiledf["DRAM Transactions Avg"])
    #Sysmem
    profiledf["Sysmem AI"] = profiledf["Flops Avg"]/(32.*profiledf["Sysmem Transactions Avg"])
    profiledf["FP32 Sysmem AI"] = profiledf["FP32 Flops Avg"]/(32.*profiledf["Sysmem Transactions Avg"])
    profiledf["FP16 Sysmem AI"] = profiledf["FP16 Flops Avg"]/(32.*profiledf["Sysmem Transactions Avg"])

    #sort results
    profiledf.sort_values(by=selectkeys).reset_index(drop=True, inplace=True)
    
    return profiledf

# Import Data

In [62]:
#get all the files
files = []
for datadir in datadirs:
    files += [ os.path.join(datadir,x) for x in os.listdir(datadir) if ((os.path.splitext(x)[-1] == ".nsight-cuprof-report")
                                                                        or (os.path.splitext(x)[-1] == ".sqlite"))]

#recs
records = []

#build feature list:
for path in files:
    
    #filename
    file = os.path.basename(path)
    
    #path
    path = os.path.dirname(path)
    
    #splitup
    splt = file.split(".")
    
    prefix = ".".join(splt[0:-2])
    
    #append to records
    records.append({"prefix": prefix, "file": os.path.join(path, file)})

#put in df
recorddf = pd.DataFrame(records).sort_values(["prefix"])

In [63]:
#sort by those keys:
sortkeys = ["Network Name", "Input Shape", "Kernel Shape", \
            "Batch Size", "Stride Size", "Data Format", "Pass", \
            "Precision", "Device", "Name", "Metric Name"]

#limit the input
#recorddf = recorddf[ recorddf["prefix"].str.startswith("profile.name_ResNet50-2.batchsize_16.inputshape_112x112x64.kernelshape_7x7x64x64.stride_2.dataformat_NHWC.fp32") ]

#group by prefixes and files
all_prefixes = set([x.split(".pass")[0] for x in recorddf["prefix"]])
all_passes = set([x.split(".pass_")[1].replace(".pass_","") for x in recorddf["prefix"].unique()])

#print(recorddf.values[0])

#metrics
df_profiles = []

for pref in all_prefixes:
    
    #set empty lists
    df_times = []
    df_timeline = []
    df_summary = []
    
    #print prefix
    #print(pref)
    
    #loop over passes
    df_times = []
    df_metrics = []
    for pas in all_passes:
        
        #project frame
        files = recorddf.loc[ recorddf["prefix"] == pref + ".pass_" + pas, "file" ].values
        
        #project the invididual files
        timefile = [x for x in files if x.endswith(".sqlite")][0]
        metricfile = [x for x in files if x.endswith(".nsight-cuprof-report")][0]
        
        #get the parameters from the filename
        parameters, _ = parse_filename(os.path.basename(metricfile))
        
        #metrics
        metricdf = import_nsight_metric(metricfile, cuda_dir=cudadir)
        for key in parameters:
                metricdf[key] = parameters[key]
        
        #fuse read/write metrics together:
        unique_metrics = metricdf["Metric Name"].unique()
        unique_metrics = set([x.replace(".sum","").replace("_write","").replace("_read","").replace("_ld","").replace("_st","") for x in unique_metrics])
        #add the metric type
        metricdf["Metric Type"] = "total"
        #read
        metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_read"), "Metric Type" ] = "read"
        metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_ld"), "Metric Type" ] = "read"
        #write
        metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_write"), "Metric Type" ] = "write"
        metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_st"), "Metric Type" ] = "write"
        
        for metric in unique_metrics:
            metricdf.loc[ metricdf[ "Metric Name"].str.startswith(metric), "Metric Name" ] = metric
            
        #import time
        timedf = import_nsight_overview(timefile)
        for key in parameters:
                timedf[key] = parameters[key]
                
        #append to DF:
        df_times.append(timedf)
        df_metrics.append(metricdf)
    
    timedf = pd.concat(df_times)
    metricdf = pd.concat(df_metrics)
    
    #compute the profile
    profiledf = transpose_frame(timedf, metricdf)
    #print(profiledf)
    break
        
    #    #loop over metrics
    #    for met in [x for x in all_metrics if x != "time"]:
    #        
    #        #filename
    #        file = selectdf.loc[ selectdf["metric"] == met, "file" ].values[0]
    #    
    #        #extract metric name
    #        parameters, metric = parse_filename(os.path.basename(file))
    #        metrics = metric.split("-")
    # 
    #        #import as timeline
    #        tmpdf = import_nvprof_metric(file, timeline=True, cuda_dir=cudadir)
    #        for key in parameters:
    #            tmpdf[key] = parameters[key]
    #    
    #        #replace "Idle (0)" with 0.:
    #        for metric in metrics:
    #            if metric=="tensor_precision_fu_utilization":
    #                tmpdf[metric] = tmpdf[metric].apply(lambda x: replace_tc_string(x))
    #
    #        #combine read and write metrics
    #        tmpdf = tmpdf.groupby([x for x in tmpdf.columns if x not in metrics]).apply(lambda x: combine_metrics(x, metrics)).reset_index()
    #        lev = [x for x in tmpdf.columns if x.startswith("level_")][0]
    #        del tmpdf[lev]
    #        df_timeline.append(tmpdf)
    # 
    #        #import as summary
    #        tmpdf = import_nvprof_metric(file, timeline=False, cuda_dir=cudadir).sort_values(by="Name").reset_index(drop=True)
    #        tmpdf["Metric Mode"] = "read" if "read" in metric else "write" if "write" in metric else "write" if "store" in metric else "read" if "load" in metric else "total"
    #        tmpdf["Metric Name"] = metric.replace("read","").replace("write","").replace("store","").replace("load","").replace("__","_")
    #        for key in parameters:
    #            tmpdf[key] = parameters[key]
    #        del tmpdf["Metric Description"]
    # 
    #        #replace "Idle (0)" with 0.:
    #        for metric in metrics:
    #            if metric=="tensor_precision_fu_utilization":
    #                tmpdf[ "Min" ] = tmpdf[ "Min" ].apply(lambda x: replace_tc_string(x))
    #                tmpdf[ "Max" ] = tmpdf[ "Max" ].apply(lambda x: replace_tc_string(x))
    #                tmpdf[ "Avg" ] = tmpdf[ "Avg" ].apply(lambda x: replace_tc_string(x))
    #        df_summary.append(tmpdf)
    #    
    #    #do time now
    #    file = selectdf.loc[ selectdf["metric"] == "time", "file" ].values[0]
    #    timedf, markerdf = import_nvprof_overview(file, cuda_dir=cudadir)
    #
    #    #extract metric name
    #    parameters, _ = parse_filename(os.path.basename(file))
    #    for key in parameters:
    #        timedf[key] = parameters[key]
    #    df_times.append(timedf)
        
    ##concat into frame
    #metricdf = pd.concat(df_summary, sort=True)
    #timedf = pd.concat(df_times, sort=True)
    
    ##transpose
    #profiledf = transpose_frame(timedf, metricdf)
    #df_profiles.append(profiledf)

#concat everything
#profiledf = pd.concat(df_profiles)


Name from Nsight Systems:
 0                          cudnnConvolutionBackwardData
1                        cudnnConvolutionBackwardFilter
2                               cudnnConvolutionForward
3     void Eigen::internal::EigenMetaKernel<Eigen::T...
4     void cudnn::detail::convolve_common_engine_flo...
5     void cudnn::detail::convolve_common_engine_flo...
6     void cudnn::detail::wgrad_alg0_engine_NHWC<__h...
7     void foldedNhwcToNhwcKernel<__half, __half, fl...
8     void nhwcToFoldedNhwcKernel<__half, __half, fl...
9     void tensorflow::functor::PadInputCustomKernel...
10    void tensorflow::functor::ShuffleInTensor3Simp...
11    void tensorflow::functor::ShuffleInTensor3Simp...
12                              cudnnConvolutionForward
13    void cudnn::detail::convolve_common_engine_flo...
14    void tensorflow::functor::PadInputCustomKernel...
15    void tensorflow::functor::ShuffleInTensor3Simp...
Name: Name, dtype: object

Name from Nsight Compute
 0                      

In [64]:
metricdf["Metric Name"].unique()

array(['dram__sectors', 'l1tex__t_sectors_pipe_lsu_mem_global_op',
       'l1tex__t_sectors_pipe_lsu_mem_local_op',
       'l1tex__t_set_accesses_pipe_lsu_mem_global_op_atom',
       'l1tex__t_set_accesses_pipe_lsu_mem_global_op_red',
       'l1tex__t_set_accesses_pipe_tex_mem_surface_op_atom',
       'l1tex__t_set_accesses_pipe_tex_mem_surface_op_red',
       'lts__t_sectors_aperture_sysmem_op', 'lts__t_sectors_op',
       'sm__inst_executed_pipe_tensor_op_hmma.avg.pct_of_peak_sustained_active',
       'smsp__inst_executed_op_shared',
       'smsp__sass_thread_inst_executed_op_fadd_pred_on',
       'smsp__sass_thread_inst_executed_op_ffma_pred_on',
       'smsp__sass_thread_inst_executed_op_fmul_pred_on',
       'smsp__sass_thread_inst_executed_op_hadd_pred_on',
       'smsp__sass_thread_inst_executed_op_hfma_pred_on',
       'smsp__sass_thread_inst_executed_op_hmul_pred_on'], dtype=object)

# Compute AI Results

In [65]:
#profiledf[ (profiledf["Network Name"]=="ResNet50-2") &\
#           (profiledf["Input Shape"]=="112x112x64") &\
#           (profiledf["Batch Size"]==16) &\
#           (profiledf["Precision"]=="FP32") &\
#           (profiledf["Stride Size"]==2) &\
#           (profiledf["Pass"]=="forward") &\
#           (profiledf["Kernel Shape"]=="7x7x64x64")
#         ]
profiledf

Unnamed: 0,Name,Time,Invocations,Time Avg,Network Name,Batch Size,Input Shape,Kernel Shape,Stride Size,Data Format,Pass,Precision,FP32 Flops Avg


In [66]:
#sum over all kernels
combinedselectkeys = ["Precision", "Network Name", "Data Format", "Input Shape", "Kernel Shape", "Stride Size", \
                     "Batch Size", "Pass"]

#copy profiledf
combineddf = profiledf.copy()

#get the aggregated performance, including all kernels:
#compute weights: multiply all measures by the number of invocations
weighted = True
if weighted:
    #first, get all the names of metrics which need to be weighted
    metrics = [x for x in combineddf.columns if "Avg" in x]
    for metric in metrics:
        combineddf[metric] *= combineddf["Calls"]
    
#sum up
combineddf = combineddf.groupby(by=combinedselectkeys).sum()

#the flop fractions need to be recomputed
combineddf["TC Flops Fraction Avg"] = combineddf["TC Flops Avg"]/combineddf["Flops Avg"]
combineddf["FP16 Flops Fraction Avg"] = combineddf["FP16 Flops Avg"]/combineddf["Flops Avg"]
combineddf["FP16 non-TC Flops Fraction Avg"] = combineddf["FP16 non-TC Flops Avg"]/combineddf["Flops Avg"]
combineddf["FP32 Flops Fraction Avg"] = combineddf["FP32 Flops Avg"]/combineddf["Flops Avg"]

#get performance first
combineddf["Performance GFlop/s"] = combineddf["Flops Avg"]/(combineddf["Time Avg"]*10**9)
combineddf["FP32 Performance GFlop/s"] = combineddf["FP32 Flops Avg"]/(combineddf["Time Avg"]*10**9)
combineddf["FP16 Performance GFlop/s"] = combineddf["FP16 Flops Avg"]/(combineddf["Time Avg"]*10**9)
combineddf["TC Performance GFlop/s"] = combineddf["TC Flops Avg"]/(combineddf["Time Avg"]*10**9)

#get AI:
#L1 is L1+shared
combineddf["L1 AI"] = combineddf["Flops Avg"]/(32.*(combineddf["L1 Transactions Avg"]+combineddf["Shared Transactions Avg"]+combineddf["Atomic Transactions Avg"]))
combineddf["FP32 L1 AI"] = combineddf["FP32 Flops Avg"]/(32.*(combineddf["L1 Transactions Avg"]+combineddf["Shared Transactions Avg"]+combineddf["Atomic Transactions Avg"]))
combineddf["FP16 L1 AI"] = combineddf["FP16 Flops Avg"]/(32.*(combineddf["L1 Transactions Avg"]+combineddf["Shared Transactions Avg"]+combineddf["Atomic Transactions Avg"]))
combineddf["TC L1 AI"] = combineddf["TC Flops Avg"]/(32.*(combineddf["L1 Transactions Avg"]+combineddf["Shared Transactions Avg"]+combineddf["Atomic Transactions Avg"]))
#L2
combineddf["L2 AI"] = combineddf["Flops Avg"]/(32.*combineddf["L2 Transactions Avg"])
combineddf["FP32 L2 AI"] = combineddf["FP32 Flops Avg"]/(32.*combineddf["L2 Transactions Avg"])
combineddf["FP16 L2 AI"] = combineddf["FP16 Flops Avg"]/(32.*combineddf["L2 Transactions Avg"])
combineddf["TC L2 AI"] = combineddf["TC Flops Avg"]/(32.*combineddf["L2 Transactions Avg"])
#DRAM
combineddf["DRAM AI"] = combineddf["Flops Avg"]/(32.*combineddf["DRAM Transactions Avg"])
combineddf["FP32 DRAM AI"] = combineddf["FP32 Flops Avg"]/(32.*combineddf["DRAM Transactions Avg"])
combineddf["FP16 DRAM AI"] = combineddf["FP16 Flops Avg"]/(32.*combineddf["DRAM Transactions Avg"])
combineddf["TC DRAM AI"] = combineddf["TC Flops Avg"]/(32.*combineddf["DRAM Transactions Avg"])
#Sysmem
combineddf["Sysmem AI"] = combineddf["Flops Avg"]/(32.*combineddf["Sysmem Transactions Avg"])
combineddf["FP32 Sysmem AI"] = combineddf["FP32 Flops Avg"]/(32.*combineddf["Sysmem Transactions Avg"])
combineddf["FP16 Sysmem AI"] = combineddf["FP16 Flops Avg"]/(32.*combineddf["Sysmem Transactions Avg"])
combineddf["TC Sysmem AI"] = combineddf["TC Flops Avg"]/(32.*combineddf["Sysmem Transactions Avg"])

#print
combineddf

KeyError: 'Calls'

In [None]:
display(combineddf)
# combineddf.keys
# combineddf.columns

# combineddf['Name']
# combineddf.iloc[0,1]
# combineddf.iloc[2]

In [None]:
#combineddf = combineddf.reset_index()
#seldf = combineddf[ (combineddf["Network Name"]=="ResNet50-2") &\
#           (combineddf["Input Shape"]=="112x112x64") &\
#           (combineddf["Precision"]=="FP32")]
#seldf
#combineddf[["FP32 L2 AI", "FP32 L1 AI"]]
combineddf[["L2 AI", "L1 AI"]]

# Export Data

In [None]:
profiledf.to_csv(os.path.join(outputdir,"full_profile.csv"))
combineddf.to_csv(os.path.join(outputdir,"combined_profile.csv"))