In [1]:
import os
from numpy import *
import matplotlib.pyplot as plt
import pandas
import numpy as np
import seaborn as sb
from tabulate import tabulate

import dataframe_image as dfi
sb.set()

In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
plt.rcParams["figure.figsize"] = [7, 5]
# Set up with a higher resolution screen (useful on Mac)
%config InlineBackend.figure_format = 'retina'

<hr style="border-width:4px; border-color:coral"/>

# Timing : Read the data

<hr style="border-width:4px; border-color:coral"/>

In this notebook, you will 

* Read in the data from a data file

* Create a Pandas DataFrame

* Display the DataFrame

* Explore information from the data frame and practice using MultiIndex indexing in Python.

In [4]:
ex_list = ['shockbubble','bump','radial','swirl']  
#ex_list = ['bump'] 
example = ex_list[:]

path_to_data = './results'   # Data from sample shockbubble example.

<hr style="border-width:4px; border-color:coral"/>

# Data Tools (file)

<hr style="border-width:4px; border-color:coral"/>

In [5]:
%%file data_tools.py
import os
import pandas
import numpy as np

cols = ['walltime','advance','ghostfill','regrid','adapt',
            'adv_steps','adv_step2', 'mx', 'patch_comm', 'output', 'grids_proc',
            'memcopy_h2h','memcopy_h2d','memcopy_d2h']

    
dtypes = {'walltime': float,
          'advance': float,
          'ghostfill': float,
          'regrid': float,
          'adapt': float,
          'adv_steps': int,
          'adv_step2': int,
          'mx': int,
          'patch_comm': float,
          'output' : float,
          'grids_proc' : int,          
          'memcopy_h2h': float,
          'memcopy_h2d': float,
          'memcopy_d2h': float}    

def read_data(dir,device):
    fname = os.path.join('{:s}'.format(dir),'{:s}'.format(device),'results.out')
    df = pandas.read_table(fname,delim_whitespace=True)
    df.sort_values('p',inplace=True)

    f = '{:.2f}'.format
    fstr = {'p' : '{:3d}'.format,
            'walltime' : f,
            'advance' : f,
            'ghostfill' : f, 
            'patch_comm' : f,
            'regrid' : f, 
            'partition' : f,
            'adapt' : f,             
            'cfl' : f, 
            'grids_proc' : '{:4d}'.format, 
            'DOF/s' : '{:.1e}'.format,
            'Speedup': '{:.1f}'.format, 
            'Eff.' : '{:.1f}%'.format, 
            'output':'{:.1f}'.format,
            'memcopy_h2h' : '{:.2f}'.format, 
            'memcopy_d2h' : '{:.2f}'.format,
            'memcopy_h2d' : '{:.2f}'.format}

    return df,fstr

Overwriting data_tools.py


<hr style="border-width:4px; border-color:coral"/>

# Read data and set up Pandas MultiIndex

<hr style="border-width:4px; border-color:coral"/>

In [6]:
import data_tools
import os

idx = pandas.IndexSlice

procs = [1,2,4,8,16]
#procs = [1,2,4,6,8,16,24,48]

cols = ['walltime','advance','ghostfill','regrid','adapt',
            'adv_steps','adv_step2', 'mx', 'patch_comm', 'output', 'grids_proc',
            'memcopy_h2h','memcopy_h2d','memcopy_d2h']

iterables = [ex_list, ['GPU','CPU'], procs]

index = pandas.MultiIndex.from_product(iterables,names=['example','device','procs'])
df = pandas.DataFrame(index=index,columns=cols).sort_index()

# #ex_data = {}
for d in ex_list:
    data_dir = os.path.join(path_to_data,d)
    df_gpu,fstr = data_tools.read_data(data_dir,'gpu')
    df.loc[idx[d,'GPU',:],:] = df_gpu[cols].values
    
    df_cpu,fstr = data_tools.read_data(data_dir,'cpu')
    df.loc[idx[d,'CPU',:],:] = df_cpu[cols].values

# df.index.name = 'procs'
for col, dtype in data_tools.dtypes.items():
    df[col] = df[col].astype(dtype)

# example_data = df.transpose().unstack(level=2).stack(level=0)
example_data = df

#df = df.style.background_gradient() #style the table

#dfi.export(df,"results.png") #save the table
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,walltime,advance,ghostfill,regrid,adapt,adv_steps,adv_step2,mx,patch_comm,output,grids_proc,memcopy_h2h,memcopy_h2d,memcopy_d2h
example,device,procs,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
bump,CPU,1,353.98,322.3,29.933,1.466,0.097312,1058390,321,32,0.005469,0.0,89,0.0,0.0,0.0
bump,CPU,2,243.57,162.28,16.521,0.70938,0.42092,529196,162,32,63.236,0.0,44,0.0,0.0,0.0
bump,CPU,4,72.55,57.378,6.9163,0.24586,0.17582,264598,57,32,7.6186,0.0,22,0.0,0.0,0.0
bump,CPU,8,40.94,29.756,4.2617,0.13932,0.16273,132299,29,32,6.3987,0.0,11,0.0,0.0,0.0
bump,CPU,16,23.781,15.332,2.4226,0.083104,0.14016,66150,15,32,5.5155,0.0,5,0.0,0.0,0.0
bump,GPU,1,40.77,30.313,8.136,1.4683,0.009016,212758,30,32,0.004478,0.0,23,3.0259,1.0299,0.95308
bump,GPU,2,37.837,21.431,4.2861,0.72755,0.35804,106379,21,32,10.126,0.0,12,1.4053,0.46034,0.3865
bump,GPU,4,31.854,23.516,1.8977,0.29661,0.025699,53190,23,32,4.801,0.0,5,0.41039,0.20793,0.15123
bump,GPU,8,31.802,22.215,1.2723,0.1405,0.060096,26595,22,32,6.4482,0.0,3,0.17542,0.1254,0.088389
bump,GPU,16,31.485,18.625,0.78441,0.064288,0.08767,13297,18,32,10.505,0.0,1,0.075427,0.078034,0.050153


<hr style="border-width:4px; border-color:coral"/>

# Exploring the data

<hr style="border-width:4px; border-color:coral"/>

The following will show you how to use a MultiIndex to extract data for a variety of purposes. 

In [7]:
# Extract all of the data for one example

example_data = df.loc['bump']

In [8]:
# Verify that same number of patch updates are done for both CPU and GPU codes

idx = pandas.IndexSlice
df_adv = example_data.loc[:,'adv_steps']
df_adv.unstack()

procs,1,2,4,8,16
device,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CPU,1058390,529196,264598,132299,66150
GPU,212758,106379,53190,26595,13297


In [9]:
# Extract the number of advanced steps counter for all examples
adv = df.loc[idx[:, 'CPU':'GPU'], 'adv_steps'].unstack()
#dfi.export(adv,"results.png") #save the table
adv

[0516/033700.031309:INFO:headless_shell.cc(660)] Written to file /var/folders/wb/55mw2drx2y15qr4p01jy43lw0000gn/T/tmpse9zoaus/temp.png.


Unnamed: 0_level_0,procs,1,2,4,8,16
example,device,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
bump,CPU,1058390,529196,264598,132299,66150
bump,GPU,212758,106379,53190,26595,13297
radial,CPU,1058390,529196,264598,132299,66150
radial,GPU,1058390,529196,264598,132299,66150
shockbubble,CPU,258034,129017,64508,32254,16127
shockbubble,GPU,66640,42868,64508,32254,16127
swirl,CPU,3751550,1875780,937888,468944,234472
swirl,GPU,3751440,1875720,937860,468930,234465


In [10]:
# Extract only the GPU data for a particular example

df_gpu = example_data.loc[('GPU'),:]
df_gpu

Unnamed: 0_level_0,walltime,advance,ghostfill,regrid,adapt,adv_steps,adv_step2,mx,patch_comm,output,grids_proc,memcopy_h2h,memcopy_h2d,memcopy_d2h
procs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,40.77,30.313,8.136,1.4683,0.009016,212758,30,32,0.004478,0.0,23,3.0259,1.0299,0.95308
2,37.837,21.431,4.2861,0.72755,0.35804,106379,21,32,10.126,0.0,12,1.4053,0.46034,0.3865
4,31.854,23.516,1.8977,0.29661,0.025699,53190,23,32,4.801,0.0,5,0.41039,0.20793,0.15123
8,31.802,22.215,1.2723,0.1405,0.060096,26595,22,32,6.4482,0.0,3,0.17542,0.1254,0.088389
16,31.485,18.625,0.78441,0.064288,0.08767,13297,18,32,10.505,0.0,1,0.075427,0.078034,0.050153


In [11]:
# Extract only the GPU data for a particular example
df_cpu = example_data.loc[('CPU'),:]
df_cpu

Unnamed: 0_level_0,walltime,advance,ghostfill,regrid,adapt,adv_steps,adv_step2,mx,patch_comm,output,grids_proc,memcopy_h2h,memcopy_h2d,memcopy_d2h
procs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,353.98,322.3,29.933,1.466,0.097312,1058390,321,32,0.005469,0.0,89,0.0,0.0,0.0
2,243.57,162.28,16.521,0.70938,0.42092,529196,162,32,63.236,0.0,44,0.0,0.0,0.0
4,72.55,57.378,6.9163,0.24586,0.17582,264598,57,32,7.6186,0.0,22,0.0,0.0,0.0
8,40.94,29.756,4.2617,0.13932,0.16273,132299,29,32,6.3987,0.0,11,0.0,0.0,0.0
16,23.781,15.332,2.4226,0.083104,0.14016,66150,15,32,5.5155,0.0,5,0.0,0.0,0.0


In [12]:
# Extract data from columns that account for most of the time spent. 

df_cols = example_data.loc[:,['walltime','advance','ghostfill','patch_comm']]

# Account for percentage of time for these three columns
percent_of_time = df_cols['advance'] + df_cols['ghostfill'] + df_cols['patch_comm']
df_cols['Total (%)'] = 100*percent_of_time/df_cols['walltime']
fstr['Total (%)'] = '{:.1f}%'.format
df_cols.style.format(fstr).set_caption('Most of the time is spent in ' \
                                       'advance, ghostfill and patch communication.  The last column ' \
                                      'indicates what percent of total time is spent in these ' \
                                      'three columns.')

Unnamed: 0_level_0,Unnamed: 1_level_0,walltime,advance,ghostfill,patch_comm,Total (%)
device,procs,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CPU,1,353.98,322.3,29.93,0.01,99.5%
CPU,2,243.57,162.28,16.52,63.24,99.4%
CPU,4,72.55,57.38,6.92,7.62,99.1%
CPU,8,40.94,29.76,4.26,6.4,98.7%
CPU,16,23.78,15.33,2.42,5.52,97.9%
GPU,1,40.77,30.31,8.14,0.0,94.3%
GPU,2,37.84,21.43,4.29,10.13,94.7%
GPU,4,31.85,23.52,1.9,4.8,94.9%
GPU,8,31.8,22.21,1.27,6.45,94.1%
GPU,16,31.48,18.62,0.78,10.51,95.0%


In [13]:
# Extract CPU/GPU data for one example and display side-by-side

df_gpu = example_data.loc[:,['walltime','advance','ghostfill','patch_comm']]
df_gpu.unstack(level=0).style.format('{:.2f}'.format)

Unnamed: 0_level_0,walltime,walltime,advance,advance,ghostfill,ghostfill,patch_comm,patch_comm
device,CPU,GPU,CPU,GPU,CPU,GPU,CPU,GPU
procs,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,353.98,40.77,322.3,30.31,29.93,8.14,0.01,0.0
2,243.57,37.84,162.28,21.43,16.52,4.29,63.24,10.13
4,72.55,31.85,57.38,23.52,6.92,1.9,7.62,4.8
8,40.94,31.8,29.76,22.21,4.26,1.27,6.4,6.45
16,23.78,31.48,15.33,18.62,2.42,0.78,5.52,10.51


In [14]:
# Compare speed-up of GPU over the CPU for single example.  Highlight maximum speedup.

idx = pandas.IndexSlice
df1 = example_data.loc[:,['walltime','advance','ghostfill','patch_comm']]
(df1.loc['CPU']/df1.loc['GPU']).style.format('{:.1f}'.format).highlight_max()

Unnamed: 0_level_0,walltime,advance,ghostfill,patch_comm
procs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,8.7,10.6,3.7,1.2
2,6.4,7.6,3.9,6.2
4,2.3,2.4,3.6,1.6
8,1.3,1.3,3.3,1.0
16,0.8,0.8,3.1,0.5


In [15]:
#2 GPUs vs 48 CPUs
df2_gpu = df1.loc['GPU'].iloc[[1]]

df2_gpu

Unnamed: 0_level_0,walltime,advance,ghostfill,patch_comm
procs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,37.837,21.431,4.2861,10.126


In [16]:
# df48_cpu = df1.loc['CPU'].iloc[[7]]
# df48_cpu 

In [17]:
# Extract wall time information for GPUs and CPUs

idx = pandas.IndexSlice
df_scale = pandas.DataFrame(data = example_data.loc[:,'walltime']).unstack(level=0).droplevel(0,axis=1)
df_scale

device,CPU,GPU
procs,Unnamed: 1_level_1,Unnamed: 2_level_1
1,353.98,40.77
2,243.57,37.837
4,72.55,31.854
8,40.94,31.802
16,23.781,31.485


In [18]:
# Strong scale : T_0/T_p
df_scale = pandas.DataFrame(data = example_data.loc[:,'walltime']).unstack(level=0).droplevel(0,axis=1)
df_scale['CPU_scaling'] = df_scale.loc[1,('CPU')]/df_scale.loc[:,('CPU')]
df_scale['GPU_scaling'] = df_scale.loc[1,('GPU')]/df_scale.loc[:,('GPU')]
df_scale.style.format('{:.1f}'.format)\
      .background_gradient(subset=['CPU_scaling','GPU_scaling'],cmap='YlOrBr',low=0,high=1)

device,CPU,GPU,CPU_scaling,GPU_scaling
procs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,354.0,40.8,1.0,1.0
2,243.6,37.8,1.5,1.1
4,72.5,31.9,4.9,1.3
8,40.9,31.8,8.6,1.3
16,23.8,31.5,14.9,1.3


In [19]:
# Compare multiple CPU cores to one GPU or two GPUs. 
df_scale = pandas.DataFrame(data = example_data.loc[:,'walltime']).unstack(level=0).droplevel(0,axis=1)

df_scale['CPUs/1 GPU'] = df_scale.loc[:,'CPU']/df_scale.loc[1,'GPU']
df_scale['CPUs/2 GPUs'] = df_scale.loc[:,'CPU']/df_scale.loc[2,'GPU']
df_scale.style.format('{:.1f}'.format)\
      .background_gradient(subset=['CPUs/1 GPU','CPUs/2 GPUs'],cmap='YlOrBr',low=0,high=1) \
      .set_caption("Numbers indicate speed-up of multiple CPUs vs. 1 or 2 GPUs")

device,CPU,GPU,CPUs/1 GPU,CPUs/2 GPUs
procs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,354.0,40.8,8.7,9.4
2,243.6,37.8,6.0,6.4
4,72.5,31.9,1.8,1.9
8,40.9,31.8,1.0,1.1
16,23.8,31.5,0.6,0.6


In [20]:
# # Compare speed-up of GPU over the CPU: 24 CPUs Vs 1 GPU, 48 CPUS VS 6 GPUs
# df1_gpu = df1.loc['GPU'].iloc[[0]]
# df6_gpu = df1.loc['GPU'].iloc[[3]]
# df24_cpu = df1.loc['CPU'].iloc[[6]]
# df48_cpu = df1.loc['CPU'].iloc[[7]]



# col_names = ['Resources','walltime','advance','ghostfill','patch_comm','Total']
# data = [["24 CPU Cores",np.array(df24_cpu['walltime'])[0]
#         ,np.array(df24_cpu['advance'])[0]
#         ,np.array(df24_cpu['ghostfill'])[0]
#         ,np.array(df24_cpu['patch_comm'])[0], sum([np.array(df24_cpu['walltime'])[0]
#         ,np.array(df24_cpu['advance'])[0]
#         ,np.array(df24_cpu['ghostfill'])[0]
#         ,np.array(df24_cpu['patch_comm'])[0]])
#         ],
#        ["1 GPU",np.array(df1_gpu['walltime'])[0]
#         ,np.array(df1_gpu['advance'])[0]
#         ,np.array(df1_gpu['ghostfill'])[0]
#         ,np.array(df1_gpu['patch_comm'])[0],sum([np.array(df1_gpu['advance'])[0]
#         ,np.array(df1_gpu['ghostfill'])[0]
#         ,np.array(df1_gpu['patch_comm'])[0]])
#        ],
#        ['Speedup',np.array(df24_cpu['walltime'])[0]/np.array(df1_gpu['walltime'])[0],
#         np.array(df24_cpu['advance'])[0]/np.array(df1_gpu['advance'])[0],
#         np.array(df24_cpu['ghostfill'])[0]/np.array(df1_gpu['ghostfill'])[0],
#         np.array(df24_cpu['patch_comm'])[0]/np.array(df1_gpu['patch_comm'])[0],sum([np.array(df24_cpu['walltime'])[0]
#         ,np.array(df24_cpu['advance'])[0]
#         ,np.array(df24_cpu['ghostfill'])[0]
#         ,np.array(df24_cpu['patch_comm'])[0]])/sum([np.array(df1_gpu['advance'])[0]
#         ,np.array(df1_gpu['ghostfill'])[0]
#         ,np.array(df1_gpu['patch_comm'])[0]])

#        ],
#        ["48 CPU Cores",np.array(df48_cpu['walltime'])[0]
#         ,np.array(df48_cpu['advance'])[0]
#         ,np.array(df48_cpu['ghostfill'])[0]
#         ,np.array(df48_cpu['patch_comm'])[0],sum([np.array(df48_cpu['walltime'])[0]
#         ,np.array(df48_cpu['advance'])[0]
#         ,np.array(df48_cpu['ghostfill'])[0]
#         ,np.array(df48_cpu['patch_comm'])[0]])
#        ],
#        ["6 GPUs",np.array(df6_gpu['walltime'])[0]
#         ,np.array(df6_gpu['advance'])[0]
#         ,np.array(df6_gpu['ghostfill'])[0]
#         ,np.array(df6_gpu['patch_comm'])[0],sum([np.array(df1_gpu['walltime'])[0]
#         ,np.array(df6_gpu['advance'])[0]
#         ,np.array(df6_gpu['ghostfill'])[0]
#         ,np.array(df6_gpu['patch_comm'])[0]])
#        ],
#        ['Speedup',np.array(df48_cpu['walltime'])[0]/np.array(df6_gpu['walltime'])[0],
#         np.array(df48_cpu['advance'])[0]/np.array(df6_gpu['advance'])[0],
#         np.array(df48_cpu['ghostfill'])[0]/np.array(df6_gpu['ghostfill'])[0],
#         np.array(df48_cpu['patch_comm'])[0]/np.array(df6_gpu['patch_comm'])[0],sum([np.array(df48_cpu['walltime'])[0]
#         ,np.array(df48_cpu['advance'])[0]
#         ,np.array(df48_cpu['ghostfill'])[0]
#         ,np.array(df48_cpu['patch_comm'])[0]])/sum([np.array(df6_gpu['walltime'])[0]
#         ,np.array(df6_gpu['advance'])[0]
#         ,np.array(df6_gpu['ghostfill'])[0]
#         ,np.array(df6_gpu['patch_comm'])[0]])
#        ]
#        ]

# print('Table: Bump GPU vs CPU speed up for walltime, advance, ghostfill, and patch communication')
# print(tabulate(data, headers=col_names, tablefmt="fancy_grid"))


## Plotting

In [21]:
# #extract rows
# sw = df.loc['swirl']
# sb = df.loc['shockbubble']
# bp = df.loc['bump']
# rd = df.loc['radial']

# sw_gpu = sw.loc[('GPU'),:]
# sb_gpu = sb.loc[('GPU'),:]
# bp_gpu = bp.loc[('GPU'),:]
# rd_gpu = rd.loc[('GPU'),:]

# sw_cpu = sw.loc[('CPU'),:]
# sb_cpu = sb.loc[('CPU'),:]
# bp_cpu = bp.loc[('CPU'),:]
# rd_cpu = rd.loc[('CPU'),:]

# #cpu_walltime
# sw_cpu_w = np.array(sw_cpu['walltime'])
# sb_cpu_w = np.array(sb_cpu['walltime'])
# bp_cpu_w = np.array(bp_cpu['walltime'])
# rd_cpu_w = np.array(rd_cpu['walltime'])

# #gpu_walltime
# sw_gpu_w = np.array(sw_gpu['walltime'])
# sb_gpu_w = np.array(sb_gpu['walltime'])
# bp_gpu_w = np.array(bp_gpu['walltime'])
# rd_gpu_w = np.array(rd_gpu['walltime'])

# wall_cpu = [max(sb_cpu_w), max(bp_cpu_w), max(rd_cpu_w), max(sw_cpu_w)]
# wall_gpu = [max(sb_gpu_w), max(bp_gpu_w), max(rd_gpu_w), max(sw_gpu_w)]

# dfp = pandas.DataFrame({'cpu': wall_cpu,
#                    'gpu': wall_gpu}, index=ex_list)

# ax = dfp.plot.bar(rot=0)
# plt.ylabel('walltime')
# plt.xlabel('examples')
# plt.savefig('wall_time')
# plt.show()


In [22]:
1/sqrt(12)

0.2886751345948129