In [1]:
import subprocess as sp
import numpy as np
import pandas as pd
from io import StringIO
import os
import re

In [2]:
#global parameters
cudadir = "/global/common/cori/software/cuda/10.0"

# Functions

In [3]:
def parse_filename(filename):
    
    #empty dicts
    result={}
    
    #add network name
    result["Network Name"] = re.match(r'.*\.name_(.*?)\.',file).groups()[0]
    result["Batch Size"] = int(re.match(r'.*\.batchsize_(.*?)\.',file).groups()[0])
    result["Input Shape"] = re.match(r'.*\.inputshape_(.*?)\.',file).groups()[0]
    result["Kernel Shape"] = re.match(r'.*\.kernelshape_(.*?)\.',file).groups()[0]
    result["Stride Size"] = int(re.match(r'.*\.stride_(.*?)\.',file).groups()[0])
    result["Data Format"] = re.match(r'.*\.dataformat_(.*?)\.',file).groups()[0]
    result["Pass"] = re.match(r'.*\.pass_(.*?)\.',file).groups()[0]
    prec = int(re.match(r'.*\.fp(.*?)\.',file).groups()[0])
    result["Precision"] = "FP16" if prec==16 else "FP32";
    metric = re.match(r'.*\.metric_(.*?)\.',file).groups()[0]
    
    return result, metric


def import_nvprof_metric(filename, timeline=False):
    #execute nvprof and parse file
    args = [os.path.join(cudadir, "bin/nvprof"),"--csv","-i",filename]
    skiprows = 2
    
    #if timeline is enabled, we have to skip less rows also
    if timeline:
        args.append("--print-gpu-trace")
        skiprows = 1
    
    #open subprocess and communicate
    p = sp.Popen(args, stdout=sp.PIPE, stderr=sp.PIPE)
    stdout, stderr = p.communicate()

    #get timeline from csv
    profiledf = pd.read_csv(StringIO(stderr.decode("utf-8")),skiprows=skiprows).dropna(how="all").rename(columns={"Kernel": "Name"})
    profiledf["Collection Type"] = "kernel"
    
    #return result
    return profiledf


def import_nvprof_overview(filename, nvtx=False):
    #execute nvprof and parse file
    args = [os.path.join(cudadir, "bin/nvprof"),"--csv","-i",filename]
    
    #open subprocess and communicate
    p = sp.Popen(args, stdout=sp.PIPE, stderr=sp.PIPE)
    stdout, stderr = p.communicate()

    #now remove the ranges
    inp = stderr.decode("utf-8")
    
    #get the profiling data
    profile = inp.split("======== NVTX result")[0]
    
    if nvtx:
        marker = inp.split("======== NVTX result")[1]

    #we can readily use the profile info
    profiledf = pd.read_csv(StringIO(profile), skiprows=1, header=[0,1]).dropna(how="all")
    
    #make the time units the same:
    for col in profiledf.columns:
        if col[1] == "ms":
            profiledf[col] *= 10**(-3)
        elif col[1] == "us":
            profiledf[col] *= 10**(-6)
        elif col[1] == "ns":
            profiledf[col] *= 10**(-9)
            
    #now drop that header
    profiledf.columns = profiledf.columns.droplevel(1)
    
    #now sort
    profiledf = profiledf.sort_values(by=["Type", "Name"]).reset_index(drop=True)
    profiledf["Metric Name"] = "time"
    
    #some renamings
    profiledf.loc[ profiledf["Type"] == "GPU activities", "Type" ] = "gpu_activities"
    profiledf.loc[ profiledf["Type"] == "API calls", "Type" ] = "api_calls"
    
    #rename columns
    profiledf.rename(columns={"Type": "Collection Type"}, inplace=True)
    
    if nvtx:
        markerdflist = []
        for it in re.finditer(r"========\s{1,}Range(.*?)(==|$)", marker, flags=re.DOTALL):
            #read into DF
            tmpdf = pd.read_csv(StringIO(it.groups()[0]),skiprows=lambda x: x in [0,2], header=0)
            del tmpdf["Time(%)"]
    
            #drop rows without info
            tmpdf = tmpdf[ ~tmpdf["Type"].str.contains("were profiled in this range") ]
    
            #extract range name:
            rangename = tmpdf.loc[ tmpdf["Type"] == "Range:", "Name" ][0]
        
            #some renamings
            tmpdf.loc[ tmpdf["Type"] == "Range:", "Name" ] = "total"
            tmpdf.loc[ tmpdf["Type"] == "Range:", "Type" ] = "range"
            tmpdf.loc[ tmpdf["Type"] == "GPU activities", "Type" ] = "gpu_activities"
            tmpdf.loc[ tmpdf["Type"] == "API calls", "Type" ] = "api_calls"
    
            #add the rangename to the entries
            tmpdf["Range Name"] = rangename
    
            #renaming
            tmpdf.rename(columns={"Type": "Collection Type"}, inplace=True)
    
            #add to list
            markerdflist.append(tmpdf)
    
        #concat the crap
        markerdf = pd.concat(markerdflist).sort_values(by=["Range Name", "Time"], ascending=[True, False]).reset_index(drop=True)
    else:
        markerdf = pd.DataFrame()
    
    return profiledf, markerdf

def combine_metrics(df, metrics):
    return pd.DataFrame.from_records([{"Metric Count": df[m].values[0], "Metric Name": m.replace("read","").replace("write","").replace("__","_"), \
    "Metric Mode": "read" if "read" in m else "write" if "write" in m else "total"} for m in metrics])


def replace_tc_string(value):
    value = int(re.match(r".*?\((.*?)\)",value).groups()[0])
    return value

# Check for Missing Data

In [31]:
#datadir:
datadirs = ["./data/good_new", "./data/good_new_2"]

#get metric list
files = []
for datadir in datadirs:
    files += [ x for x in os.listdir(datadir) if (os.path.splitext(x)[-1] == ".nvprof") or (os.path.splitext(x)[-1] == ".nvvp") ]

#recs
records = []

#build feature list:
for file in files:
    splt = file.split(".")
    
    prefix = ".".join(splt[1:-2])
    metric = splt[-2].split("metric_")[1]
    
    #append to records
    records.append({"prefix": prefix, "metric": metric})

#put in df
recorddf = pd.DataFrame(records).sort_values(["prefix", "metric"])

#get all metrics
all_metrics = list(recorddf["metric"].unique())

#group by metric:
missingrecorddf = pd.DataFrame(recorddf.groupby("prefix").apply(lambda x: pd.Series([y for y in all_metrics if y not in list(x["metric"])])))

#create exclusion list:
excludelist = list(missingrecorddf.reset_index()["prefix"].unique())

#print the missing ones
excludelist

['name_ResNet50-2.batchsize_32.inputshape_112x112x64.kernelshape_3x3x64x128.stride_3.dataformat_NHWC.fp32.pass_backward',
 'name_ResNet50-2.batchsize_32.inputshape_112x112x64.kernelshape_3x3x64x128.stride_3.dataformat_NHWC.fp32.pass_calibrate',
 'name_ResNet50-2.batchsize_32.inputshape_112x112x64.kernelshape_3x3x64x128.stride_3.dataformat_NHWC.fp32.pass_forward',
 'name_ResNet50-2.batchsize_32.inputshape_112x112x64.kernelshape_3x3x64x256.stride_1.dataformat_NHWC.fp16.pass_backward',
 'name_ResNet50-2.batchsize_32.inputshape_112x112x64.kernelshape_3x3x64x256.stride_1.dataformat_NHWC.fp16.pass_calibrate',
 'name_ResNet50-2.batchsize_32.inputshape_112x112x64.kernelshape_3x3x64x256.stride_1.dataformat_NHWC.fp16.pass_forward',
 'name_ResNet50-2.batchsize_32.inputshape_112x112x64.kernelshape_3x3x64x256.stride_1.dataformat_NHWC.fp32.pass_backward',
 'name_ResNet50-2.batchsize_32.inputshape_112x112x64.kernelshape_3x3x64x256.stride_1.dataformat_NHWC.fp32.pass_calibrate',
 'name_ResNet50-2.batch

# Import Data

In [None]:
#datadir:
datadirs = ["./data/good_new", "./data/good_new_2"]
#datadir = "/global/cscratch1/sd/tkurth/tf_cnn_kernels/runs/65709"
#datadir = "/global/cscratch1/sd/tkurth/tf_cnn_kernels/runs/70633"

#sort by those:
sortkeys = ["Network Name", "Input Shape", "Kernel Shape", \
            "Batch Size", "Stride Size", "Data Format", "Pass", \
            "Precision", "Device", "Name", "Metric Name"]

#init lists to zero
df_timeline = []
df_summary = []
df_summary_derived = []

#get metric list
files = []
for datadir in datadirs:
    files += [ os.path.join(datadir,x) for x in os.listdir(datadir) if (os.path.splitext(x)[-1] == ".nvprof") or (os.path.splitext(x)[-1] == ".nvvp") ]

##DEBUG
#files = [x for x in files if x.startswith("profile.name_ResNet50-2.batchsize_16.inputshape_112x112x64.kernelshape_9x9x64x64.stride_1.dataformat_NHWC.fp16.pass_forward.metric_")]
##DEBUG

#metrics
for file in files:
    
    #extract metric name
    parameters, metric = parse_filename(os.path.basename(file))
    metrics = metric.split("-")
    
    #print("Reading {}".format(file))
    
    if metric == "time":
        continue

    #import as timeline
    tmpdf = import_nvprof_metric(file, timeline=True)
    for key in parameters:
        tmpdf[key] = parameters[key]
        
    #replace "Idle (0)" with 0.:
    for metric in metrics:
        if metric=="tensor_precision_fu_utilization":
            tmpdf[metric] = tmpdf[metric].apply(lambda x: replace_tc_string(x))
    
    #combine read and write metrics
    tmpdf = tmpdf.groupby([x for x in tmpdf.columns if x not in metrics]).apply(lambda x: combine_metrics(x, metrics)).reset_index()
    lev = [x for x in tmpdf.columns if x.startswith("level_")][0]
    del tmpdf[lev]
    df_timeline.append(tmpdf)
    
    #import as summary
    tmpdf = import_nvprof_metric(file, timeline=False).sort_values(by="Name").reset_index(drop=True)
    tmpdf["Metric Mode"] = "read" if "read" in metric else "write" if "write" in metric else "write" if "store" in metric else "total"
    tmpdf["Metric Name"] = metric.replace("read","").replace("write","").replace("store","").replace("__","_")
    for key in parameters:
        tmpdf[key] = parameters[key]
    del tmpdf["Metric Description"]
    
    #replace "Idle (0)" with 0.:
    for metric in metrics:
        if metric=="tensor_precision_fu_utilization":
            tmpdf[ "Min" ] = tmpdf[ "Min" ].apply(lambda x: replace_tc_string(x))
            tmpdf[ "Max" ] = tmpdf[ "Max" ].apply(lambda x: replace_tc_string(x))
            tmpdf[ "Avg" ] = tmpdf[ "Avg" ].apply(lambda x: replace_tc_string(x))
    df_summary.append(tmpdf)

#concat the frames
df_timeline = pd.concat(df_timeline, sort=True)
df_timeline = df_timeline.sort_values(by=sortkeys+["Metric Mode", "Correlation_ID"]).reset_index(drop=True)
df_summary = pd.concat(df_summary, sort=True)

#compute summary df:
tmpdf = df_timeline.groupby([x for x in df_timeline.columns if x not in ["Metric Mode", "Metric Count"]]).apply(lambda x: pd.Series({"Metric Count": x["Metric Count"].values.sum(), "Metric Mode": "total"})).reset_index()
tmpdf.sort_values(sortkeys+["Correlation_ID"]).reset_index(drop=True, inplace=True)
df_summary_derived = tmpdf.groupby(sortkeys).apply(lambda x: pd.Series({"Invocations": x["Metric Count"].count(), \
                                                               "Min": x["Metric Count"].min(), \
                                                               "Max": x["Metric Count"].max(), \
                                                               "STD": x["Metric Count"].std(), \
                                                               "Average": x["Metric Count"].mean()})).reset_index().sort_values(by="Name").fillna(0.)

#timings
files = []
for datadir in datadirs:
    files += [ os.path.join(datadir,x) for x in os.listdir(datadir) if ((os.path.splitext(x)[-1] == ".nvprof") or (os.path.splitext(x)[-1] == ".nvvp")) and "metric_time" in x ]

df_times = []
for file in files:
    timedf, markerdf = import_nvprof_overview(os.path.basename(file))
    
    #extract metric name
    parameters, _ = parse_filename(file)
    for key in parameters:
        timedf[key] = parameters[key]
    
    #append frame
    df_times.append(timedf)
    
df_time = pd.concat(df_times)

# Compute AI Results

In [None]:
#Copy the profile frame to make sure not to overwrite it and potentially read it in again if we screwed it up
selectkeys = ["Precision", "Network Name", "Data Format", "Input Shape", "Kernel Shape", "Stride Size", \
             "Batch Size", "Pass", "Name"]
tc_peak_perf_flops = 125*10**12

#just pick the gpu activities for now
profiledf = df_time[ df_time["Collection Type"] == "gpu_activities" ].copy()
profiledf.sort_values(by=["Name"],inplace=True)
profiledf.reset_index(drop=True, inplace=True)
profiledf.rename(columns={"Avg": "Time Avg"}, inplace=True)
del profiledf["Time(%)"]
del profiledf["Time"]
del profiledf["Min"]
del profiledf["Max"]
del profiledf["Metric Name"]
del profiledf["Collection Type"]

#remove the calibration
alignkeys = selectkeys[:-2]
profiledf = profiledf.groupby(alignkeys).apply(lambda x: x[ (~x["Name"].isin(x.loc[x["Pass"].str.startswith("calibrate"), "Name"].values)) ])
profiledf.reset_index(drop=True, inplace=True)

#as metricdf use df_summary
metricdf = df_summary.copy()

#now, get the AI-relevant stuff:
#FLOPS 32
flopdf = metricdf[ metricdf["Metric Name"].str.contains("flop_count_sp") ].sort_values(selectkeys).rename(columns={"Avg": "FP32 Flops Avg"})
#add to timings
profiledf = profiledf.merge(flopdf[selectkeys+["FP32 Flops Avg"]], on=selectkeys, how="left")

#FLOPS 16 non-TC
flopdf = metricdf[ metricdf["Metric Name"].str.contains("flop_count_hp") ].sort_values(selectkeys).rename(columns={"Avg": "FP16 non-TC Flops Avg"})
#add to timings
profiledf = profiledf.merge(flopdf[selectkeys+["FP16 non-TC Flops Avg"]], on=selectkeys, how="left")

#FLOPS TC
flopdf = metricdf[ metricdf["Metric Name"].str.contains("tensor_precision_fu_utilization") ].sort_values(selectkeys).rename(columns={"Avg": "TC Flops Avg"})
tmpdf = flopdf.merge(profiledf, how="inner", on=selectkeys).sort_values(selectkeys)
tmpdf["TC Flops Avg"] *= tc_peak_perf_flops/10. * tmpdf["Time Avg"]
#add to timings
profiledf = profiledf.merge(tmpdf[selectkeys+["TC Flops Avg"]], on=selectkeys, how="left")

#fill NA values here
profiledf.fillna(0., inplace=True)

#FLOPS FP16: add TC and non-TC FP16 flops together
profiledf["FP16 Flops Avg"] = profiledf["TC Flops Avg"] + profiledf["FP16 non-TC Flops Avg"]

#total flops
profiledf["Flops Avg"] = profiledf["FP16 Flops Avg"] + profiledf["FP32 Flops Avg"]

#flop fractions
profiledf["TC Flops Fraction Avg"] = profiledf["TC Flops Avg"]/profiledf["Flops Avg"]
profiledf["FP16 Flops Fraction Avg"] = profiledf["FP16 Flops Avg"]/profiledf["Flops Avg"]
profiledf["FP16 non-TC Flops Fraction Avg"] = profiledf["FP16 non-TC Flops Avg"]/profiledf["Flops Avg"]
profiledf["FP32 Flops Fraction Avg"] = profiledf["FP32 Flops Avg"]/profiledf["Flops Avg"]


#shared
#project out
shareddf = metricdf[ metricdf["Metric Name"].str.contains("shared") ].sort_values(selectkeys)
#get reads and writes
sharedreadsdf = shareddf.loc[(shareddf["Metric Name"]=="shared_transactions") & (shareddf["Metric Mode"]=="read"), selectkeys+["Avg"]]
sharedwritesdf = shareddf.loc[(shareddf["Metric Name"]=="shared_transactions") & (shareddf["Metric Mode"]=="write"), selectkeys+["Avg"]]
#combine
shareddf = sharedwritesdf.merge(sharedreadsdf, on=selectkeys, how="outer").fillna(0.)
shareddf["Shared Transactions Avg"] = shareddf["Avg_x"] + shareddf["Avg_y"]
#add to timings
profiledf = profiledf.merge(shareddf[selectkeys+["Shared Transactions Avg"]], on=selectkeys, how="inner")


#L1
#project out
l1df = metricdf[ (metricdf["Metric Name"].str.contains("gst_")) | (metricdf["Metric Name"].str.contains("gld_")) ].sort_values(selectkeys)
#get reads and writes
l1readsdf = l1df.loc[(l1df["Metric Name"]=="gld_transactions"), selectkeys+["Avg"]]
l1writesdf = l1df.loc[(l1df["Metric Name"]=="gst_transactions"), selectkeys+["Avg"]]
#combine
l1df = l1writesdf.merge(l1readsdf, on=selectkeys, how="outer").fillna(0.)
l1df["L1 Transactions Avg"] = l1df["Avg_x"] + l1df["Avg_y"]
#add to timings
profiledf = profiledf.merge(l1df[selectkeys+["L1 Transactions Avg"]], on=selectkeys, how="inner")


#L2
#project out
l2df = metricdf[ metricdf["Metric Name"].str.contains("l2") ].sort_values(selectkeys)
#get reads and writes
l2readsdf = l2df.loc[(l2df["Metric Name"]=="l2_transactions") & (l2df["Metric Mode"]=="read"), selectkeys+["Avg"]]
l2writesdf = l2df.loc[(l2df["Metric Name"]=="l2_transactions") & (l2df["Metric Mode"]=="write"), selectkeys+["Avg"]]
#combine
l2df = l2writesdf.merge(l2readsdf, on=selectkeys, how="outer").fillna(0.)
l2df["L2 Transactions Avg"] = l2df["Avg_x"] + l2df["Avg_y"]
#add to timings
profiledf = profiledf.merge(l2df[selectkeys+["L2 Transactions Avg"]], on=selectkeys, how="inner")


#DRAM
#project out
dramdf = metricdf[ metricdf["Metric Name"].str.contains("dram") ].sort_values(selectkeys)
#get reads and writes
dramreadsdf = dramdf.loc[(dramdf["Metric Name"]=="dram_transactions") & (dramdf["Metric Mode"]=="read"), selectkeys+["Avg"]]
dramwritesdf = dramdf.loc[(dramdf["Metric Name"]=="dram_transactions") & (dramdf["Metric Mode"]=="write"), selectkeys+["Avg"]]
#combine
dramdf = dramwritesdf.merge(dramreadsdf, on=selectkeys, how="outer").fillna(0.)
dramdf["DRAM Transactions Avg"] = dramdf["Avg_x"] + dramdf["Avg_y"]
#add to timings
profiledf = profiledf.merge(dramdf[selectkeys+["DRAM Transactions Avg"]], on=selectkeys, how="inner")


#SYSMEM
#project out
sysmemdf = metricdf[ metricdf["Metric Name"].str.contains("sysmem") ].sort_values(selectkeys)
#get reads and writes
sysmemreadsdf = sysmemdf.loc[(sysmemdf["Metric Name"]=="sysmem_transactions") & (sysmemdf["Metric Mode"]=="read"), selectkeys+["Avg"]]
sysmemwritesdf = sysmemdf.loc[(sysmemdf["Metric Name"]=="sysmem_transactions") & (sysmemdf["Metric Mode"]=="write"), selectkeys+["Avg"]]
#combine
sysmemdf = sysmemwritesdf.merge(sysmemreadsdf, on=selectkeys, how="outer").fillna(0.)
sysmemdf["Sysmem Transactions Avg"] = sysmemdf["Avg_x"] + sysmemdf["Avg_y"]
#add to timings
profiledf = profiledf.merge(sysmemdf[selectkeys+["Sysmem Transactions Avg"]], on=selectkeys, how="inner")

#clean up and sort:
profiledf.sort_values(selectkeys).reset_index(drop=True, inplace=True)

#get performance first
profiledf["Performance GFlop/s"] = profiledf["Flops Avg"]/(profiledf["Time Avg"]*10**9)
profiledf["FP32 Performance GFlop/s"] = profiledf["FP32 Flops Avg"]/(profiledf["Time Avg"]*10**9)
profiledf["FP16 Performance GFlop/s"] = profiledf["FP16 Flops Avg"]/(profiledf["Time Avg"]*10**9)
profiledf["TC Performance GFlop/s"] = profiledf["TC Flops Avg"]/(profiledf["Time Avg"]*10**9)

#get AI:
#L1 is L1+shared
profiledf["L1 AI"] = profiledf["Flops Avg"]/(32.*(profiledf["L1 Transactions Avg"]+profiledf["Shared Transactions Avg"]))
profiledf["FP32 L1 AI"] = profiledf["FP32 Flops Avg"]/(32.*(profiledf["L1 Transactions Avg"]+profiledf["Shared Transactions Avg"]))
profiledf["FP16 L1 AI"] = profiledf["FP16 Flops Avg"]/(32.*(profiledf["L1 Transactions Avg"]+profiledf["Shared Transactions Avg"]))
#L2
profiledf["L2 AI"] = profiledf["Flops Avg"]/(32.*profiledf["L2 Transactions Avg"])
profiledf["FP32 L2 AI"] = profiledf["FP32 Flops Avg"]/(32.*profiledf["L2 Transactions Avg"])
profiledf["FP16 L2 AI"] = profiledf["FP16 Flops Avg"]/(32.*profiledf["L2 Transactions Avg"])
#DRAM
profiledf["DRAM AI"] = profiledf["Flops Avg"]/(32.*profiledf["DRAM Transactions Avg"])
profiledf["FP32 DRAM AI"] = profiledf["FP32 Flops Avg"]/(32.*profiledf["DRAM Transactions Avg"])
profiledf["FP16 DRAM AI"] = profiledf["FP16 Flops Avg"]/(32.*profiledf["DRAM Transactions Avg"])
#Sysmem
profiledf["Sysmem AI"] = profiledf["Flops Avg"]/(32.*profiledf["Sysmem Transactions Avg"])
profiledf["FP32 Sysmem AI"] = profiledf["FP32 Flops Avg"]/(32.*profiledf["Sysmem Transactions Avg"])
profiledf["FP16 Sysmem AI"] = profiledf["FP16 Flops Avg"]/(32.*profiledf["Sysmem Transactions Avg"])

#sort results
profiledf.sort_values(by=selectkeys).reset_index(drop=True, inplace=True)

In [None]:
#profiledf[ (profiledf["Network Name"]=="ResNet50-2") &\
#           (profiledf["Input Shape"]=="112x112x64") &\
#           (profiledf["Batch Size"]==16) &\
#           (profiledf["Precision"]=="FP16") &\
#           (profiledf["Stride Size"]==3) &\
#           (profiledf["Pass"]=="backward") &\
#           (profiledf["Kernel Shape"]=="9x9x64x64")
#         ]
profiledf.loc[ profiledf["Pass"]=="backward", ["L1 AI", "L2 AI"] ]

In [None]:
#sum over all kernels
combinedselectkeys = ["Precision", "Network Name", "Data Format", "Input Shape", "Kernel Shape", "Stride Size", \
                     "Batch Size", "Pass"]

#get the aggregated performance, including all kernels:
combineddf = profiledf.groupby(by=combinedselectkeys).sum()

#the flop fractions need to be recomputed
combineddf["TC Flops Fraction Avg"] = combineddf["TC Flops Avg"]/combineddf["Flops Avg"]
combineddf["FP16 Flops Fraction Avg"] = combineddf["FP16 Flops Avg"]/combineddf["Flops Avg"]
combineddf["FP16 non-TC Flops Fraction Avg"] = combineddf["FP16 non-TC Flops Avg"]/combineddf["Flops Avg"]
combineddf["FP32 Flops Fraction Avg"] = combineddf["FP32 Flops Avg"]/combineddf["Flops Avg"]

#get performance first
combineddf["Performance GFlop/s"] = combineddf["Flops Avg"]/(combineddf["Time Avg"]*10**9)
combineddf["FP32 Performance GFlop/s"] = combineddf["FP32 Flops Avg"]/(combineddf["Time Avg"]*10**9)
combineddf["FP16 Performance GFlop/s"] = combineddf["FP16 Flops Avg"]/(combineddf["Time Avg"]*10**9)
combineddf["TC Performance GFlop/s"] = combineddf["TC Flops Avg"]/(combineddf["Time Avg"]*10**9)

#get AI:
#L1 is L1+shared
combineddf["L1 AI"] = combineddf["Flops Avg"]/(32.*(combineddf["L1 Transactions Avg"]+combineddf["Shared Transactions Avg"]))
combineddf["FP32 L1 AI"] = combineddf["FP32 Flops Avg"]/(32.*(combineddf["L1 Transactions Avg"]+combineddf["Shared Transactions Avg"]))
combineddf["FP16 L1 AI"] = combineddf["FP16 Flops Avg"]/(32.*(combineddf["L1 Transactions Avg"]+combineddf["Shared Transactions Avg"]))
combineddf["TC L1 AI"] = combineddf["TC Flops Avg"]/(32.*(combineddf["L1 Transactions Avg"]+combineddf["Shared Transactions Avg"]))
#L2
combineddf["L2 AI"] = combineddf["Flops Avg"]/(32.*combineddf["L2 Transactions Avg"])
combineddf["FP32 L2 AI"] = combineddf["FP32 Flops Avg"]/(32.*combineddf["L2 Transactions Avg"])
combineddf["FP16 L2 AI"] = combineddf["FP16 Flops Avg"]/(32.*combineddf["L2 Transactions Avg"])
combineddf["TC L2 AI"] = combineddf["TC Flops Avg"]/(32.*combineddf["L2 Transactions Avg"])
#DRAM
combineddf["DRAM AI"] = combineddf["Flops Avg"]/(32.*combineddf["DRAM Transactions Avg"])
combineddf["FP32 DRAM AI"] = combineddf["FP32 Flops Avg"]/(32.*combineddf["DRAM Transactions Avg"])
combineddf["FP16 DRAM AI"] = combineddf["FP16 Flops Avg"]/(32.*combineddf["DRAM Transactions Avg"])
combineddf["TC DRAM AI"] = combineddf["TC Flops Avg"]/(32.*combineddf["DRAM Transactions Avg"])
#Sysmem
combineddf["Sysmem AI"] = combineddf["Flops Avg"]/(32.*combineddf["Sysmem Transactions Avg"])
combineddf["FP32 Sysmem AI"] = combineddf["FP32 Flops Avg"]/(32.*combineddf["Sysmem Transactions Avg"])
combineddf["FP16 Sysmem AI"] = combineddf["FP16 Flops Avg"]/(32.*combineddf["Sysmem Transactions Avg"])
combineddf["TC Sysmem AI"] = combineddf["TC Flops Avg"]/(32.*combineddf["Sysmem Transactions Avg"])

#print
combineddf

In [None]:
#combineddf = combineddf.reset_index()
#seldf = combineddf[ (combineddf["Network Name"]=="ResNet50-2") &\
#           (combineddf["Input Shape"]=="112x112x64") &\
#           (combineddf["Precision"]=="FP32")]
#seldf
#combineddf[["FP32 L2 AI", "FP32 L1 AI"]]
combineddf[["L2 Transactions Avg", "L1 Transactions Avg"]]

# Export Data

In [None]:
outputdir = "./results"

profiledf.to_csv(os.path.join(outputdir,"full_profile.csv"))
combineddf.to_csv(os.path.join(outputdir,"combined_profile.csv"))