In [1]:
import subprocess as sp
import numpy as np
import pandas as pd
from io import StringIO
import os
import re
import shutil

In [2]:
#input and output dirs
datadirs = ["../data"]
outputdir = "../results"

# Functions

In [3]:
selectkeys = ['Kernel Name', 'Metric Name', 'Metric Type']
resultkeys = ['Kernel Name', 'Calls']

def merge_results(df, name):
    if name+'_x' and name+'_y' in df.columns:
        df[name] = df[name+'_x'] + df[name+'_y']
        del df[name+'_x']
        del df[name+'_y']
        
def transpose_frame(df_results, df_metrics):    
    tc_peak_perf_flops = 125*10**12

    # Cleanup: group metric values
    metricdf = df_metrics.sort_values(by=resultkeys).copy().reset_index(drop=True)
    #metricdf = metricdf.groupby(selectkeys).sum().reset_index()
    #metricdf = metricdf[['Kernel Name', 'Calls', 'Metric Name', 'Metric Type', 'Metric Value']]
    #display(metricdf)
    
    unique_metrics = metricdf["Metric Name"].unique()
    tmpdf = metricdf.loc[metricdf["Metric Name"]==unique_metrics[0], ["Metric Type"] ].copy()
    #print(unique_metrics)
    unique_types = tmpdf['Metric Type'].unique()

    # Raise if data not consistent
    if df_results.empty:
        df_results = metricdf.loc[ (metricdf["Metric Name"]==unique_metrics[0]) & (metricdf["Metric Type"]==unique_types[0]), resultkeys ].sort_values(by=resultkeys).reset_index(drop=True).copy()
        df_results = df_results.groupby('Kernel Name').sum().reset_index()
        display(df_results)
    else:
        tmpMetricdf = metricdf.loc[ (metricdf["Metric Name"]==unique_metrics[0]) & (metricdf["Metric Type"]==unique_types[0]), resultkeys ].sort_values(by=resultkeys).reset_index(drop=True).copy()
        tmpResultdf = df_results[resultkeys].copy()
        tmpMetricdf = tmpMetricdf.groupby('Kernel Name').sum().reset_index()
        
        if not tmpMetricdf.equals(tmpResultdf):
            #display(tmpMetricdf)
            #display(tmpResultdf)
            print("\n##### Data in the current DF")
            df = tmpMetricdf.merge(tmpResultdf, how = 'outer' ,indicator=True).loc[lambda x : x['_merge']=='left_only']
            print(df)
            print("##### Data in the output DF")
            df = tmpResultdf.merge(tmpMetricdf, how = 'outer' ,indicator=True).loc[lambda x : x['_merge']=='left_only']
            print(df)
            raise ValueError("Data not consistent")

    ####### Get number of FLOPs
    
    ### FMA FLOPs = number of FMA instructions x 2
    metricdf.loc[metricdf["Metric Name"].str.contains("fma_pred_on"), ["Metric Value"]] *= 2
    
    
    ### FP32 FLOPs
    metrics = ['smsp__sass_thread_inst_executed_op_fadd_pred_on',
               'smsp__sass_thread_inst_executed_op_ffma_pred_on',
               'smsp__sass_thread_inst_executed_op_fmul_pred_on']
    if any(m in unique_metrics for m in metrics):
        tmpdf = metricdf.loc[ metricdf["Metric Name"].isin(metrics), resultkeys+["Metric Value"] ].copy()        
        metricname = 'FP32 FLOPs'
        tmpdf = tmpdf.groupby('Kernel Name').sum().reset_index().rename(columns={"Metric Value": metricname})
        tmpdf['Calls'] = tmpdf['Calls'] / len(unique_metrics)
        #display(tmpdf)
        # Merge current df with the result df
        df_results = df_results.merge(tmpdf, on=resultkeys, how="outer")
        # Sum up if exits and remove duplicates
        merge_results(df_results, metricname)
        #display(df_results)
    
    
    ### FP16 FLOPs
    metrics = ['smsp__sass_thread_inst_executed_op_hadd_pred_on',
               'smsp__sass_thread_inst_executed_op_hfma_pred_on',
               'smsp__sass_thread_inst_executed_op_hmul_pred_on']
    if any(m in unique_metrics for m in metrics):
        tmpdf = metricdf.loc[ metricdf["Metric Name"].isin(metrics), resultkeys+["Metric Value"] ].copy()        
        metricname = 'FP16 FLOPs'
        tmpdf = tmpdf.groupby('Kernel Name').sum().reset_index().rename(columns={"Metric Value": metricname})
        tmpdf['Calls'] = tmpdf['Calls'] / len(unique_metrics)
        #display(tmpdf)
        # Merge current df with the result df
        df_results = df_results.merge(tmpdf, on=resultkeys, how="outer")
        # Sum up if exits and remove duplicates
        merge_results(df_results, metricname)
        #display(df_results)
    
    
    ### TC FLOP Rates
    if any("tensor_op_hmma.avg.pct_of_peak" in m for m in unique_metrics):
        tmpdf = metricdf.loc[ metricdf["Metric Name"].str.contains("tensor_op_hmma.avg.pct_of_peak"), resultkeys+["Metric Value"] ].copy()
        tmpdf = tmpdf.groupby('Kernel Name').sum().reset_index()
        tmpdf["Utilization"] = 0.01 * tmpdf["Metric Value"] / tmpdf['Calls']
        metricname = "TC FLOP Rates"
        tmpdf[metricname] = tc_peak_perf_flops * tmpdf["Utilization"]
        #display(tmpdf)
        # merge
        df_results = df_results.merge(tmpdf[resultkeys+[metricname]], on=resultkeys, how="outer")
        #merge_results(df_results, metricname)
        #display(df_results)


    ####### Get timing information

    ### CUDA Time
    if any("smsp__cycles_elapsed" in m for m in unique_metrics):
        # get cycles
        metricname = "CUDA Cycles"
        cyclesdf = metricdf.loc[(metricdf["Metric Name"]=="smsp__cycles_elapsed") & (metricdf["Metric Type"]=="total"),
                               resultkeys+["Metric Value"]].reset_index(drop=True).sort_values(by=resultkeys).rename(columns={"Metric Value": metricname}).copy()
        #display(cyclesdf)
        # get rates
        metricname = "CUDA Rates"
        ratesdf = metricdf.loc[(metricdf["Metric Name"]=="smsp__cycles_elapsed") & (metricdf["Metric Type"]=="rate"),
                               resultkeys+["Metric Value"]].reset_index(drop=True).sort_values(by=resultkeys).rename(columns={"Metric Value": metricname}).copy()
        #display(ratesdf)
        # merge data with duplcaited indexes
        cyclesdf = pd.concat([cyclesdf, ratesdf], axis=1)
        column_names = cyclesdf.columns.values
        column_names[0] = 'Kernel Name_x'
        column_names[1] = 'Calls_x'
        column_names[3] = 'Kernel Name_y'
        column_names[4] = 'Calls_y'
        cyclesdf.columns = column_names
        # check data consistency
        for index, row in cyclesdf.iterrows():
            if (row['Kernel Name_x'] != row['Kernel Name_y']) or (row['Calls_x'] != row['Calls_y']):
                raise ValueError("CUDA Time: Cycles & Rates not consistent")
        # cleanups
        del cyclesdf['Kernel Name_y']
        del cyclesdf['Calls_y']
        
        cyclesdf.rename(columns={"Kernel Name_x": 'Kernel Name'}, inplace=True)
        cyclesdf.rename(columns={"Calls_x": 'Calls'}, inplace=True)
        cyclesdf['CUDA Time'] = cyclesdf['CUDA Cycles'] / cyclesdf['CUDA Rates']
        cyclesdf = cyclesdf[['Kernel Name', 'Calls', 'CUDA Time']]
        cyclesdf = cyclesdf.groupby('Kernel Name').sum().reset_index()
        
        # merge with output df
        df_results = df_results.merge(cyclesdf, on=resultkeys, how='outer')
        #print(df_results['CUDA Time'].sum())
        #display(df_results)
               
    ### Tensor Core Time
    if any("smsp__pipe_tensor_op_hmma_cycles_active" in m for m in unique_metrics):
        # get cycles
        metricname = "TC Cycles"
        cyclesdf = metricdf.loc[(metricdf["Metric Name"]=="smsp__pipe_tensor_op_hmma_cycles_active") & (metricdf["Metric Type"]=="total"),
                               resultkeys+["Metric Value"]].reset_index(drop=True).rename(columns={"Metric Value": metricname}).copy()
        # get rates
        metricname = "TC Rates"
        ratesdf = metricdf.loc[(metricdf["Metric Name"]=="smsp__pipe_tensor_op_hmma_cycles_active") & (metricdf["Metric Type"]=="rate"),
                               resultkeys+["Metric Value"]].reset_index(drop=True).rename(columns={"Metric Value": metricname}).copy()
        #display(ratesdf)
        # merge data with duplcaited indexes
        cyclesdf = pd.concat([cyclesdf, ratesdf], axis=1)
        column_names = cyclesdf.columns.values
        column_names[0] = 'Kernel Name_x'
        column_names[1] = 'Calls_x'
        column_names[3] = 'Kernel Name_y'
        column_names[4] = 'Calls_y'
        cyclesdf.columns = column_names
        # check data consistency
        for index, row in cyclesdf.iterrows():
            if (row['Kernel Name_x'] != row['Kernel Name_y']) or (row['Calls_x'] != row['Calls_y']):
                raise ValueError("TC Time: Cycles & Rates not consistent")
        # cleanups
        del cyclesdf['Kernel Name_y']
        del cyclesdf['Calls_y']
        
        cyclesdf.rename(columns={"Kernel Name_x": 'Kernel Name'}, inplace=True)
        cyclesdf.rename(columns={"Calls_x": 'Calls'}, inplace=True)
        cyclesdf['TC Time'] = cyclesdf['TC Cycles'] / cyclesdf['TC Rates']
        cyclesdf = cyclesdf[['Kernel Name', 'Calls', 'TC Time']]
        cyclesdf = cyclesdf.groupby('Kernel Name').sum().reset_index()
        
        # merge with output df
        df_results = df_results.merge(cyclesdf, on=resultkeys, how='outer')
        #print(df_results['TC Time'].sum())
        #display(df_results)
        

    ####### Get number of bytes

    ### Shared transactions
    #project out
    if any("l1tex__data_pipe_lsu_wavefronts_mem_shared_op" in m for m in unique_metrics):
        shareddf = metricdf.loc[metricdf["Metric Name"].str.contains("l1tex__data_pipe_lsu_wavefronts_mem_shared_op"), resultkeys+["Metric Value"] ].copy()
        metricname = 'Shared Transactions'
        shareddf = shareddf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        df_results = df_results.merge(shareddf, on=resultkeys, how="outer")
        merge_results(df_results, metricname)

    ### L1 atomic transactions
    # project out
    metrics = ['l1tex__t_set_accesses_pipe_lsu_mem_global_op_atom',
               'l1tex__t_set_accesses_pipe_lsu_mem_global_op_red',
               'l1tex__t_set_accesses_pipe_tex_mem_surface_op_atom',
               'l1tex__t_set_accesses_pipe_tex_mem_surface_op_red']
    if any(m in unique_metrics for m in metrics):
        tmpdf = metricdf.loc[ metricdf["Metric Name"].isin(metrics), resultkeys+["Metric Value"] ].copy()
        metricname = 'L1 Atomic Transactions'
        tmpdf = tmpdf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        # Merge current df with the result df
        df_results = df_results.merge(tmpdf, on=resultkeys, how="outer")
        # Sum up if exits and remove duplicates
        merge_results(df_results, metricname)
        #print(df_results)
        
    ### Local transactions
    #project out
    if any("l1tex__t_sectors_pipe_lsu_mem_local_op" in m for m in unique_metrics):
        localdf = metricdf.loc[metricdf["Metric Name"].str.contains("l1tex__t_sectors_pipe_lsu_mem_local_op"), resultkeys+["Metric Value"] ].copy()
        metricname = 'Local Transactions'
        localdf = localdf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        df_results = df_results.merge(localdf, on=resultkeys, how="outer")
            
    ### Global transactions
    #project out
    if any("l1tex__t_sectors_pipe_lsu_mem_global_op" in m for m in unique_metrics):
        globaldf = metricdf.loc[metricdf["Metric Name"].str.contains("l1tex__t_sectors_pipe_lsu_mem_global_op"), resultkeys+["Metric Value"] ].copy()
        metricname = 'Global Transactions'
        globaldf = globaldf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        df_results = df_results.merge(globaldf, on=resultkeys, how="outer")
        
    
    ### L2 atomic & reduction
    metricdf.loc[(metricdf["Metric Name"].str.contains("lts__t_sectors_op")) & (metricdf["Metric Type"]=="total"), ["Metric Value"]] *= 2
    
    ### L2 transactions
    #project out
    if any("lts__t_sectors_op" in m for m in unique_metrics):
        l2df = metricdf.loc[metricdf["Metric Name"].str.contains("lts__t_sectors_op"), resultkeys+["Metric Value"] ].copy()
        metricname = 'L2 Transactions'
        l2df = l2df.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        df_results = df_results.merge(l2df, on=resultkeys, how="outer")
        merge_results(df_results, metricname)
        
    
    ### DRAM transactions
    #project out
    if any("dram__sectors" in m for m in unique_metrics):
        dramreadsdf = metricdf.loc[(metricdf["Metric Name"]=="dram__sectors") & (metricdf["Metric Type"]=="read"), resultkeys+["Metric Value"]].copy()
        dramwritesdf = metricdf.loc[(metricdf["Metric Name"]=="dram__sectors") & (metricdf["Metric Type"]=="write"), resultkeys+["Metric Value"]].copy()
        metricname = 'DRAM Transactions'
        dramreadsdf = dramreadsdf.groupby('Kernel Name').sum().reset_index().rename(columns={"Metric Value": metricname})
        dramwritesdf = dramwritesdf.groupby('Kernel Name').sum().reset_index().rename(columns={"Metric Value": metricname})
        
        dramdf = dramreadsdf.merge(dramwritesdf, on=resultkeys, how="outer")
        merge_results(dramdf, metricname)
        df_results = df_results.merge(dramdf, on=resultkeys, how="outer")
        #display(df_results)
            
    ### SYSMEM transactions
    #project out
    if any("lts__t_sectors_aperture_sysmem_op" in m for m in unique_metrics):
        sysmemdf = metricdf.loc[metricdf["Metric Name"].str.contains("lts__t_sectors_aperture_sysmem_op"), resultkeys+["Metric Value"] ].copy()
        metricname = 'SYSMEM Transactions'
        sysmemdf = sysmemdf.groupby(resultkeys).sum().reset_index().rename(columns={"Metric Value": metricname})
        df_results = df_results.merge(sysmemdf, on=resultkeys, how="outer")
    
    
    return df_results

# Import Data

In [4]:
#get all the files
files = []
for datadir in datadirs:
    files += [ os.path.join(datadir,x) for x in os.listdir(datadir) if ((os.path.splitext(x)[-1] == ".log"))]

#recs
records = []

#build feature list:
for path in files:
    
    #filename
    file = os.path.basename(path)
    
    #path
    path = os.path.dirname(path)
    
    #splitup
    splt = file.split(".")
    
    prefix = ".".join(splt[0:-1])
    
    #append to records
    records.append({"prefix": prefix, "file": os.path.join(path, file)})

#put in df
recorddf = pd.DataFrame(records).sort_values(["prefix"]).reset_index(drop=True)
#with pd.option_context('display.max_rows', None, 'display.max_columns', None):
display(recorddf)

Unnamed: 0,prefix,file
0,cuda_time,../data/cuda_time.log
1,dram,../data/dram.log
2,ffma_flops,../data/ffma_flops.log
3,hp_flops,../data/hp_flops.log
4,sp_flops,../data/sp_flops.log
5,tc_flops,../data/tc_flops.log
6,tc_time,../data/tc_time.log


In [5]:
profiledf = pd.DataFrame(columns=resultkeys)

#metrics
for fin in recorddf["file"]:
#for fin in ['../data/cuda_time.log', '../data/tc_time.log']:
    #project frame
    print("\n### Processing "+fin)
    metricdf = pd.read_csv(fin)
    
    #fuse read/write metrics together:
    unique_metrics = metricdf["Metric Name"].unique()
    unique_metrics = set([x.replace(".sum","").replace("_write","").replace("_read","").replace("_ld","").replace("_st","") for x in unique_metrics])
    #add the metric type
    metricdf["Metric Type"] = "total"
    metricdf["Calls"] = 1
    #read
    metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_read"), "Metric Type" ] = "read"
    metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_ld"), "Metric Type" ] = "read"
    #write
    metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_write"), "Metric Type" ] = "write"
    metricdf.loc[ metricdf[ "Metric Name" ].str.contains("_st"), "Metric Type" ] = "write"
    #rate
    metricdf.loc[ metricdf[ "Metric Name" ].str.contains(".per_second"), "Metric Type" ] = "rate"

    for metric in unique_metrics:
        metricdf.loc[ metricdf[ "Metric Name"].str.startswith(metric), "Metric Name" ] = metric
    #cleanups
    tmpdf = metricdf[["Kernel Name", "Calls", "Metric Name", "Metric Type", "Metric Value"]]
    #print(tmpdf)

    #compute the profile
    profiledf = transpose_frame(profiledf,tmpdf)
    #print(profiledf)


### Processing ../data/cuda_time.log


Unnamed: 0,Kernel Name,Calls
0,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_128x1...,2
1,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_128x6...,2
2,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_256x1...,4
3,Volta_hmma_implicit_gemm_wgrad_fp32_nhwc_128x1...,4
4,Volta_hmma_implicit_gemm_wgrad_fp32_nhwc_128x6...,1
...,...,...
91,volta_fp16_sgemm_fp16_32x32_sliced1x4_nt,1
92,volta_s884cudnn_fp16_128x128_ldg8_wgrad_exp_in...,1
93,volta_s884cudnn_fp16_128x128_ldg8_wgrad_idx_ex...,7
94,volta_s884cudnn_fp16_256x128_ldg8_wgrad_idx_ex...,5



### Processing ../data/dram.log

### Processing ../data/ffma_flops.log

### Processing ../data/hp_flops.log

### Processing ../data/sp_flops.log

### Processing ../data/tc_flops.log

### Processing ../data/tc_time.log


In [6]:
#profiledf = profiledf.groupby('Kernel Name').sum().reset_index()
display(profiledf)
#print(profiledf['CUDA Time'].sum())
#print(profiledf['TC Time'].sum())

Unnamed: 0,Kernel Name,Calls,CUDA Time,DRAM Transactions,FP16 FLOPs,FP32 FLOPs,TC FLOP Rates,TC Time
0,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_128x1...,2,0.001822,4569197,0,56623104,9.594375e+13,0.001825
1,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_128x6...,2,0.000198,900782,0,14155776,5.355625e+13,0.000198
2,Volta_hmma_implicit_gemm_fprop_fp32_nhwc_256x1...,4,0.000351,882535,0,7077888,8.161563e+13,0.000354
3,Volta_hmma_implicit_gemm_wgrad_fp32_nhwc_128x1...,4,0.000434,1280723,0,13565952,5.822188e+13,0.000433
4,Volta_hmma_implicit_gemm_wgrad_fp32_nhwc_128x6...,1,0.002456,22003005,0,3465216,6.655000e+13,0.002442
...,...,...,...,...,...,...,...,...
91,volta_fp16_sgemm_fp16_32x32_sliced1x4_nt,1,0.001674,14423539,0,14497857536,0.000000e+00,0.000000
92,volta_s884cudnn_fp16_128x128_ldg8_wgrad_exp_in...,1,0.001766,20945976,0,188743680,9.648750e+13,0.001781
93,volta_s884cudnn_fp16_128x128_ldg8_wgrad_idx_ex...,7,0.003251,17649723,0,283115520,8.827321e+13,0.003240
94,volta_s884cudnn_fp16_256x128_ldg8_wgrad_idx_ex...,5,0.038814,184473192,0,132120576,1.126075e+14,0.038854


# Compute AI Results

In [7]:
### TC FLOPs
profiledf['TC FLOPs'] = profiledf['TC FLOP Rates'] * profiledf['TC Time']
del profiledf['TC FLOP Rates']

### FLOPs
profiledf['FLOPs'] = profiledf['FP16 FLOPs'] + profiledf['FP32 FLOPs'] + profiledf['TC FLOPs']

#df = profiledf[profiledf["FP16 FLOPs"]!=0]
#print(df)

### DRAM Bytes
profiledf['DRAM Bytes'] = profiledf['DRAM Transactions'] * 32.
del profiledf['DRAM Transactions']

### DRAM AI
profiledf["DRAM AI"] = profiledf["FLOPs"] / profiledf["DRAM Bytes"]

### Performance
profiledf['Performance GFlop/s'] = profiledf['FLOPs'] / (profiledf['CUDA Time']*10**9)

profiledf = profiledf.sort_values('CUDA Time', ascending=False).reset_index(drop=True)

print("Total invocations: {}".format(profiledf['Calls'].sum()))
display(profiledf)

Total invocations: 3957


Unnamed: 0,Kernel Name,Calls,CUDA Time,FP16 FLOPs,FP32 FLOPs,TC Time,TC FLOPs,FLOPs,DRAM Bytes,DRAM AI,Performance GFlop/s
0,volta_fp16_s884cudnn_fp16_128x128_ldg8_dgrad_f...,19,0.051883,0,2251292672,0.052089,5.266367e+12,5.268618e+12,7.386824e+09,713.245423,101547.425981
1,volta_s884cudnn_fp16_256x128_ldg8_wgrad_idx_ex...,5,0.038814,0,132120576,0.038854,4.375246e+12,4.375379e+12,5.903142e+09,741.194844,112728.160619
2,volta_fp16_s884cudnn_fp16_128x128_ldg8_relu_f2...,3,0.032150,70778880,1132462080,0.032146,3.085339e+12,3.086542e+12,3.878862e+09,795.733830,96004.842270
3,volta_fp16_s884cudnn_fp16_256x128_ldg8_relu_f2...,15,0.015570,15982592,255721472,0.015579,1.489262e+12,1.489534e+12,3.028262e+09,491.877581,95664.858569
4,void tensorflow::functor::PadInputCustomKernel...,9,0.014370,0,0,0.000000,0.000000e+00,0.000000e+00,3.575705e+09,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...
91,"void nhwcToFoldedNhwcKernel<__half,__half,floa...",4,0.000031,1806336,1806336,0.000000,0.000000e+00,3.612672e+06,8.642592e+06,0.418008,116.869565
92,void Eigen::internal::EigenMetaKernel<Eigen::T...,1,0.000030,0,21233664,0.000000,0.000000e+00,2.123366e+07,2.129491e+07,0.997124,708.166489
93,void tensorflow::functor::CleanupSegments<Eige...,1,0.000011,528384,0,0.000000,0.000000e+00,5.283840e+05,2.963200e+04,17.831533,49.437126
94,volta_fp16_sgemm_fp16_128x32_nt,1,0.000010,0,8912896,0.000000,0.000000e+00,8.912896e+06,1.256576e+06,7.093002,867.688474


In [8]:
#sum over all kernels

#copy profiledf
combineddf = profiledf.copy()

totalFLOPs = profiledf['FLOPs'].sum()
totalTime  = profiledf['CUDA Time'].sum()
totalBytes = profiledf['DRAM Bytes'].sum()

finalPerf  = totalFLOPs / (totalTime*10**9)
finalAI    = totalFLOPs / totalBytes

df = {'Kernel Name': ['TensorFlow'], 'Performance GFlop/s': [finalPerf], 'DRAM AI': [finalAI]}
combineddf = pd.DataFrame(data=df)
combineddf['FLOPs'] = totalFLOPs
combineddf['CUDA Time'] = totalTime
combineddf['DRAM Bytes'] = totalBytes

display(combineddf)

Unnamed: 0,Kernel Name,Performance GFlop/s,DRAM AI,FLOPs,CUDA Time,DRAM Bytes
0,TensorFlow,60732.288782,265.939498,17556340000000.0,0.289078,66016310000.0


# Export Data

In [9]:
profiledf.to_csv(os.path.join(outputdir,"full_profile.csv"))
combineddf.to_csv(os.path.join(outputdir,"combined_profile.csv"))