In [1]:
%matplotlib notebook

import os
import pandas
import numpy as np
import matplotlib.pyplot as plt

In [2]:
%reload_ext autoreload
%autoreload 2

<hr style="border-width:4px; border-color:coral"/>

# Set example

<hr style="border-width:4px; border-color:coral"/>

In [3]:
ex_list = ['swirl','acoustics','radialdam','shockbubble']

# ratio of advance_step2 to nvprof reported time in cudaclaw_flux2_and_update_batch
# 100 steps of each code (advance_step2, nvprof_time)

nvidia_factors = {'radialdam'  : (34.0728,20.7257), 'swirl'     : (2.48783,1.54757), \
                 'shockbubble' : (10.7283,8.06588), 'acoustics' : (19.7589,11.6447)}

example = ex_list[3]

<hr style="border-width:4px; border-color:coral"/>

# Data Tools (file)

<hr style="border-width:4px; border-color:coral"/>

In [4]:
%%file data_tools.py
import os
import pandas
import numpy as np
import matplotlib.pyplot as plt

cmap = plt.get_cmap('tab20').colors

amr_colors = {'advance' : (cmap[0:2]),     # blue
              'ghost'   : (cmap[2:4]),     # orange
              'regrid'  : (cmap[4:6]),     # green
              'comm'    : (cmap[6:8]),     # red
              'memcopy' : (cmap[8:10]),    # purple
              'other'   : (cmap[10:12]),   # brown
              'extra1'  : (cmap[12:14]),   # pink
              'extra2'  : (cmap[14:16]),   # grey
              'extra3'  : (cmap[16:18]),   # light green
              'extra4'  : (cmap[18:20])}   # tourquoise
  
    
cols = ['walltime','advance','ghostfill','regrid','adapt',
            'adv_steps','adv_step2', 'mx', 'patch_comm', 'output', 'grids_proc',
            'memcopy_h2h','memcopy_h2d','memcopy_d2h']

    
dtypes = {'walltime': float,
          'advance': float,
          'ghostfill': float,
          'regrid': float,
          'adapt': float,
          'adv_steps': int,
          'adv_step2': int,
          'mx': int,
          'patch_comm': float,
          'output' : float,
          'grids_proc' : int,          
          'memcopy_h2h': float,
          'memcopy_h2d': float,
          'memcopy_d2h': float}    

def read_data(dir,device):
    fname = os.path.join('{:s}'.format(dir),'{:s}'.format(device),'results.out')
    df = pandas.read_table(fname,delim_whitespace=True)
    df.sort_values('p',inplace=True)

    f = '{:.2f}'.format
    fstr = {'p' : '{:3d}'.format,
            'walltime' : f,
            'advance' : f,
            'ghostfill' : f, 
            'patch_comm' : f,
            'regrid' : f, 
            'partition' : f,
            'adapt' : f,             
            'cfl' : f, 
            'grids_proc' : '{:4d}'.format, 
            'DOF/s' : '{:.1e}'.format,
            'Speedup': '{:.1f}'.format, 
            'Eff.' : '{:.1f}%'.format, 
            'output':'{:.1f}'.format,
            'memcopy_h2h' : '{:.2f}'.format, 
            'memcopy_d2h' : '{:.2f}'.format,
            'memcopy_h2d' : '{:.2f}'.format}

    return df,fstr

def strong_scaling(df,field='walltime'):
    procs = df.index.get_level_values(0).values
    y = df[field].values
    plt.loglog(procs,y,'.-',markersize=15,label=field.capitalize())

    # Plot best-fit speed-up line
    t_strong = np.array(df[field].values)
    c = np.polyfit(np.log(procs[:-1]),np.log(t_strong[:-1]),1)
    plt.loglog(procs,np.exp(np.polyval(c,np.log(procs))),'r-',label='Best-fit (slope={:6.2f})'.format(c[0]),linewidth=1)
    c[0] = -1
    plt.loglog(procs,np.exp(np.polyval(c,np.log(procs))),'k--',label='Theoretical',linewidth=0.5)

    #p0 = np.log2(df['p'].values[0])
    #p1 = np.log2(df['p'].values[-1])
    p0 = np.log2(procs[0])
    p1 = np.log2(procs[-1])
    plt.xlim([2**(p0-1), 2**(p1+1)])
    
    pstr = ([str(p) for p in procs])
    plt.xticks(procs,pstr)

    # pstr = (['{:d}'.format(int(p)) for p in df['p'].values])
    # plt.xticks(procs,pstr)
    
    #ax.xaxis.set_major_locator(plt.MultipleLocator(30*60))   # Multiples of 60 (minutes)
    #ax.xaxis.set_minor_locator(plt.MultipleLocator(15*60))   # Multiples of 60 (minutes)
    #ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda value,tick_number : '{:.2f}'.format(value/60)))
    
    plt.legend()
    plt.show()
    

def efficiency(df,field='walltime'):
 
    # Efficiency
    procs = df.index.get_level_values(0).values
    T0 = df[field][1]   # Processor 1
    S = T0/df[field]
    E = 100*S/procs

    plt.semilogx(procs,E,'.-',markersize=15)
    plt.semilogx(procs,[100]*len(procs),'k--',linewidth=2)

    plt.xlabel('Cores',fontsize=16)
    plt.ylabel('Efficiency (%)',fontsize=16)
    plt.title("Efficiency (%)");
    plt.legend(['Time (s)', 'Perfect efficiency'])
    p0 = np.log2(procs[0])
    p1 = np.log2(procs[-1])
    plt.xlim([2**(p0-1), 2**(p1+1)])
    
    pstr = ([str(p) for p in procs])
    plt.xticks(procs,pstr)

    plt.ylim([10,110])
    plt.grid()
    plt.show()
    
def bar_plot(df_in):
    
    procs = df_in.index.get_level_values(0).values

    df = df_in.copy()
    # --------------- Pre-process data; rename some columns ---------------
    df['ghost'] = df['ghostfill'] + df['patch_comm']

    di = {'walltime' : 'Walltime', 'advance' : 'Advance', 'ghost' : 'Ghost'}
    df = df.rename(columns=di)

    # Get grouping (three groups along y axis: Walltime, Advance, Ghost)
    df_plot = df[[di['ghost'],di['advance'],di['walltime']]].iloc[::-1].transpose()
    c = []
    for p in procs:
        c += ['{:d} proc(s)'.format(int(p))]        
    df_plot.columns = reversed(c)

    # ------------------------ Plotting command --------------------------
    ax = df_plot.plot.barh(width=0.85)
    plt.xlabel('Time (seconds)');

    # --------------- Post-processing (legend, axis tick marks) ----------
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(reversed(handles), reversed(labels), loc='lower right')

    ax.xaxis.set_major_locator(plt.MultipleLocator(10))   # Multiples of 10  (seconds)
    ax.xaxis.set_minor_locator(plt.MultipleLocator(5))   # Multiples of 5 (seconds)

    ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda value,tick_number : '{:.2f}'.format(value)))

    plt.grid()
    # plt.xlim([0,135]);
    plt.show()    
    
def barh_plot(df_in):
    
    procs = df_in.index.get_level_values(0).values

    df = df_in.copy()
    # -------------------------- Pre-processing ----------------------------
    df['ghost'] = df['ghostfill'] + df['patch_comm']
    df['Other'] = df['walltime'] - df['advance'] - df['ghostfill'] - df['patch_comm'] \
            -df['adapt']-df['regrid']-df['output']


    di = {'advance' : 'Advance', 'ghost' : 'Ghost'}

    # For plotting (iloc[::-1] reverses order of the rows)
    df_plot = df[['advance','ghostfill','regrid','patch_comm','Other']].iloc[::-1].copy()
    c = []
    for p in procs:
        c += ['{:d} proc(s)'.format(int(p))]
    df_plot.index = reversed(c)
    
    # -------------------------- Plotting command -------------------------
    ax = df_plot.plot.barh(width=0.85,stacked=True)
    
    plt.xlabel('Time (seconds)');

    # --------------------------- Post-processing -------------------------
    handles, labels = ax.get_legend_handles_labels()
    # ax.legend(reversed(handles), reversed(labels), loc='lower right')

    ax.xaxis.set_major_locator(plt.MultipleLocator(10))   # Multiples of 60 (minutes)
    ax.xaxis.set_minor_locator(plt.MultipleLocator(5))   # Multiples of 60 (minutes)
    ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda value,tick_number : '{:.2f}'.format(value/60)))

    plt.grid()
    # plt.xlim([0,100]);
    plt.show()    
    


Writing data_tools.py


<hr style="border-width:4px; border-color:coral"/>

# Read data and set up Pandas MultiIndex

<hr style="border-width:4px; border-color:coral"/>

In [5]:
import data_tools
import os

idx = pandas.IndexSlice

procs = [1,2,4,8,16]

cols = ['walltime','advance','ghostfill','regrid','adapt',
            'adv_steps','adv_step2', 'mx', 'patch_comm', 'output', 'grids_proc',
            'memcopy_h2h','memcopy_h2d','memcopy_d2h']

iterables = [ex_list, ['GPU','CPU'], procs]

index = pandas.MultiIndex.from_product(iterables,names=['example','device','procs'])
df = pandas.DataFrame(index=index,columns=cols).sort_index()

#ex_data = {}
for d in ex_list:
    data_dir = os.path.join('results',d)
    df_gpu,fstr = data_tools.read_data(data_dir,'gpu')
    df.loc[idx[d,'GPU',:],:] = df_gpu[cols].values
    
    df_cpu,fstr = data_tools.read_data(data_dir,'cpu')
    df.loc[idx[d,'CPU',:],:] = df_cpu[cols].values

# df.index.name = 'procs'
for col, dtype in data_tools.dtypes.items():
    df[col] = df[col].astype(dtype)

# example_data = df.transpose().unstack(level=2).stack(level=0)
example_data = df
df

FileNotFoundError: [Errno 2] File b'results/swirl/gpu/results.out' does not exist: b'results/swirl/gpu/results.out'

<hr style="border-width:4px; border-color:coral"/>

# Practice using a MultiIndex table

<hr style="border-width:4px; border-color:coral"/>

In [23]:
# Extract all of the data for one example

example_data.loc['acoustics']

Unnamed: 0_level_0,Unnamed: 1_level_0,walltime,advance,ghostfill,regrid,adapt,adv_steps,adv_step2,mx,patch_comm,output,grids_proc,memcopy_h2h,memcopy_h2d,memcopy_d2h
device,procs,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
CPU,1,1674.9,1493.8,171.05,5.4031,4.3781,6062850,1491,32,0.01466,0.0,2099,0.0,0.0,0.0
CPU,2,850.02,747.17,93.365,2.933,2.346,3031420,745,32,4.0228,0.0,1049,0.0,0.0,0.0
CPU,4,430.38,374.45,49.575,1.4712,1.2657,1515710,373,32,3.497,0.0,524,0.0,0.0,0.0
CPU,8,236.51,187.23,29.654,1.0435,0.73539,757856,186,32,16.681,0.0,262,0.0,0.0,0.0
CPU,16,127.19,93.668,17.554,0.54539,0.50712,378928,93,32,13.876,0.0,131,0.0,0.0,0.0
GPU,1,480.81,292.26,173.17,10.422,4.1868,6062850,290,32,0.032309,0.0,2099,54.551,15.321,14.761
GPU,2,258.29,151.48,94.702,4.673,2.2565,3031420,150,32,4.6466,0.0,1049,31.151,8.151,7.3223
GPU,4,132.72,76.484,50.008,2.048,1.1441,1515710,75,32,2.7627,0.0,524,15.576,4.1627,3.6495
GPU,8,75.873,39.399,28.534,1.0685,0.66606,757856,39,32,5.3637,0.0,262,7.146,2.0542,1.8523
GPU,16,61.776,30.747,18.244,0.68175,0.45191,378928,30,32,10.56,0.0,131,3.9825,1.1273,0.9807


In [24]:
# Extract only the GPU data for a particular example

df_gpu = example_data.loc[(example,'GPU'),:]
df_gpu

Unnamed: 0_level_0,walltime,advance,ghostfill,regrid,adapt,adv_steps,adv_step2,mx,patch_comm,output,grids_proc,memcopy_h2h,memcopy_h2d,memcopy_d2h
procs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,458.98,342.98,112.42,2.3,1.103,3162510,341,32,0.04646,0.0,382,35.785,10.985,10.485
2,272.85,186.24,70.239,1.3477,0.6573,1581250,185,32,13.867,0.0,190,19.448,5.8454,5.3197
4,171.32,102.78,43.592,0.73535,0.46007,790627,102,32,22.924,0.0,95,7.8659,3.0234,2.6898
8,109.09,59.014,25.037,0.4057,0.32909,395314,58,32,23.268,0.0,47,3.06,1.6402,1.3716
16,124.09,71.691,16.732,0.31207,0.30489,197657,71,32,33.504,0.0,23,1.6573,0.96905,0.73838


In [25]:
# Extract subset of columns for one example

df_gpu = example_data.loc[example,['walltime','advance','adv_step2','ghostfill']]
df_gpu


Unnamed: 0_level_0,Unnamed: 1_level_0,walltime,advance,adv_step2,ghostfill
device,procs,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CPU,1,2505.1,2388.0,2386,112.96
CPU,2,1381.8,1193.8,1192,68.496
CPU,4,775.29,598.27,597,45.187
CPU,8,433.98,298.91,298,28.14
CPU,16,244.74,149.29,149,16.434
GPU,1,458.98,342.98,341,112.42
GPU,2,272.85,186.24,185,70.239
GPU,4,171.32,102.78,102,43.592
GPU,8,109.09,59.014,58,25.037
GPU,16,124.09,71.691,71,16.732


In [26]:
# Extract CPU/GPU data for one example and display side-by-side

df_gpu = example_data.loc['swirl',['walltime','advance','adv_step2','ghostfill']]
df_gpu.unstack(level=0)

Unnamed: 0_level_0,walltime,walltime,advance,advance,adv_step2,adv_step2,ghostfill,ghostfill
device,CPU,GPU,CPU,GPU,CPU,GPU,CPU,GPU
procs,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,1028.1,137.29,980.23,89.99,386,89,41.572,42.294
2,533.92,76.546,490.36,48.804,193,48,22.63,23.549
4,279.63,41.438,247.18,25.291,97,25,13.427,12.698
8,158.94,24.441,124.51,13.483,48,13,7.6878,6.7997
16,85.533,21.943,62.392,13.154,24,13,4.2401,4.3541


In [27]:
# Verify that same number of patch updates are done for both CPU and GPU codes

idx = pandas.IndexSlice
s_adv = example_data.loc[idx[:],'adv_steps']
s_adv.unstack()

Unnamed: 0_level_0,procs,1,2,4,8,16
example,device,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
acoustics,CPU,6062850,3031420,1515710,757856,378928
acoustics,GPU,6062850,3031420,1515710,757856,378928
radialdam,CPU,1525260,762632,381316,190658,95329
radialdam,GPU,1525260,762632,381316,190658,95329
shockbubble,CPU,3162510,1581250,790627,395314,197657
shockbubble,GPU,3162510,1581250,790627,395314,197657
swirl,CPU,3611870,1805930,902967,451484,225742
swirl,GPU,3611870,1805930,902967,451484,225742


In [28]:
# Select series of data from individual run for particular AMR task

idx = pandas.IndexSlice
s_ghost = example_data.loc[(example,'GPU'),'ghostfill']
s_ghost

procs
1     112.420
2      70.239
4      43.592
8      25.037
16     16.732
Name: ghostfill, dtype: float64

In [29]:
###### Check MPI scaling between CPU and GPU

idx = pandas.IndexSlice
df_scale = example_data.loc[idx[example,:],('walltime')].unstack(level=1)
df_scale['CPU_scaling'] = df_scale['CPU'][0]/df_scale['CPU']
df_scale['GPU_scaling'] = df_scale['GPU'][0]/df_scale['GPU']
df_scale.style.format('{:.1f}'.format)\
      .background_gradient(subset=['CPU_scaling','GPU_scaling'],cmap='YlOrBr',low=0,high=1)

Unnamed: 0_level_0,device,CPU,GPU,CPU_scaling,GPU_scaling
example,procs,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
shockbubble,1,2505.1,459.0,1.0,1.0
shockbubble,2,1381.8,272.9,1.8,1.7
shockbubble,4,775.3,171.3,3.2,2.7
shockbubble,8,434.0,109.1,5.8,4.2
shockbubble,16,244.7,124.1,10.2,3.7


In [30]:
# Compare speed-up of GPU over the CPU for single example.  Highlight maximum speedup.

idx = pandas.IndexSlice
df1 = example_data.loc['shockbubble',['walltime','advance','adv_step2','ghostfill']]
(df1.loc['CPU']/df1.loc['GPU']).style.format('{:.1f}'.format).highlight_max()

Unnamed: 0_level_0,walltime,advance,adv_step2,ghostfill
procs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5.5,7.0,7.0,1.0
2,5.1,6.4,6.4,1.0
4,4.5,5.8,5.9,1.0
8,4.0,5.1,5.1,1.1
16,2.0,2.1,2.1,1.0


In [31]:
# Get speed-up for all examples. First, get correct view of data

cols = ['walltime','advance','adv_step2'] 
df1 = example_data.loc[:,cols].unstack(1)
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,walltime,walltime,advance,advance,adv_step2,adv_step2
Unnamed: 0_level_1,device,CPU,GPU,CPU,GPU,CPU,GPU
example,procs,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
acoustics,1,1674.9,480.81,1493.8,292.26,1491,290
acoustics,2,850.02,258.29,747.17,151.48,745,150
acoustics,4,430.38,132.72,374.45,76.484,373,75
acoustics,8,236.51,75.873,187.23,39.399,186,39
acoustics,16,127.19,61.776,93.668,30.747,93,30
radialdam,1,846.74,162.01,796.11,109.51,795,108
radialdam,2,430.45,89.544,398.41,57.87,397,57
radialdam,4,219.79,47.014,199.09,30.033,198,29
radialdam,8,122.22,27.776,99.495,15.947,99,15
radialdam,16,68.692,25.46,49.722,15.065,49,15


In [32]:
# Extract desired columns and drop version index
th_prop = {"selector":"th", "props":[("vertical-align","top")] }

cols = ['walltime','advance','patch_comm','ghostfill']
# cols = ['walltime','advance']

proc = [1,2,4,8]

df_gpu = example_data.loc[idx[:,'GPU',proc],cols].copy()
df_gpu.index = df_gpu.index.droplevel(1)

df_cpu = example_data.loc[idx[:,'CPU',proc],cols].copy()
df_cpu.index = df_cpu.index.droplevel(1)

# Print out ratios and colorize!
#R = ((df_cpu-df_gpu)/df_gpu)  #.unstack(level=0)
R = df_cpu/df_gpu

R.style.format('{:.2f}'.format) \
          .background_gradient(cmap='YlOrBr',low=0,high=1) \
          .set_table_styles([th_prop])\
          .set_caption("Red : CPU > GPU;    Blue : GPU > CPU")

Unnamed: 0_level_0,Unnamed: 1_level_0,walltime,advance,patch_comm,ghostfill
example,procs,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
acoustics,1,3.48,5.11,0.45,0.99
acoustics,2,3.29,4.93,0.87,0.99
acoustics,4,3.24,4.9,1.27,0.99
acoustics,8,3.12,4.75,3.11,1.04
radialdam,1,5.23,7.27,0.84,0.99
radialdam,2,4.81,6.88,1.46,0.98
radialdam,4,4.67,6.63,2.92,1.05
radialdam,8,4.4,6.24,4.68,1.04
shockbubble,1,5.46,6.96,0.79,1.0
shockbubble,2,5.06,6.41,8.47,0.98


<hr style="border-width:4px; border-color:coral"/>

# Strong scaling (gpu)

<hr style="border-width:4px; border-color:coral"/>

In [33]:
import data_tools

df_gpu = example_data.loc[(example,'GPU'),:]

plt.figure()

data_tools.strong_scaling(df_gpu,'walltime')
plt.title('{:s} (GPU)'.format(example.capitalize()),fontsize=16);


<IPython.core.display.Javascript object>

## Efficiency (gpu)

In [34]:
import data_tools
plt.figure()

data_tools.efficiency(df_gpu,'walltime')
data_tools.plt.title('Efficiency (%) ({:s}; GPU)'.format(example.capitalize()),fontsize=16);

<IPython.core.display.Javascript object>

## Bar plot (GPU)

In [38]:
import data_tools

h = data_tools.bar_plot(df_gpu)
data_tools.plt.title('Timing ({:s}; GPU)'.format(example.capitalize()),fontsize=16);

plt = data_tools.plt
ax=data_tools.plt.gca()
ax.xaxis.set_major_locator(plt.MultipleLocator(60))   # Multiples of seconds
ax.xaxis.set_minor_locator(plt.MultipleLocator(20))   # Multiples of seconds
ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda value,tick_number : '{:.0f}'.format(value)))

# data_tools.plt.xlim([0,550])

<IPython.core.display.Javascript object>

## Stacked bar (GPU)

In [39]:
import data_tools

data_tools.barh_plot(df_gpu)

plt = data_tools.plt
ax=data_tools.plt.gca()

ax.xaxis.set_major_locator(plt.MultipleLocator(100))   # Multiples of seconds
ax.xaxis.set_minor_locator(plt.MultipleLocator(25))   # Multiples of seconds
ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda value,tick_number : '{:.1f}'.format(value)))

# data_tools.plt.xlim([0,550])
data_tools.plt.title('Timing ({:s}; GPU)'.format(example.capitalize()),fontsize=16);

<IPython.core.display.Javascript object>

<hr style="border-width:4px; border-color:coral"/>

# CPU Results

<hr style="border-width:4px; border-color:coral"/>

## Strong scaling (CPU)

In [40]:
import data_tools
plt.figure()

df_cpu = example_data.loc[(example,'CPU'),:]

data_tools.strong_scaling(df_cpu)
data_tools.plt.title('Speed-up ({:s}; CPU)'.format(example.capitalize()), fontsize=16);

<IPython.core.display.Javascript object>

## Efficiency (CPU)

In [41]:
import data_tools

data_tools.plt.figure()
data_tools.efficiency(df_cpu,'walltime')
data_tools.plt.title('Efficiency (%) ({:s}; CPU)'.format(example.capitalize()),fontsize=16);

<IPython.core.display.Javascript object>

## Bar plot (CPU)

In [42]:
import data_tools

data_tools.bar_plot(df_cpu)
data_tools.plt.title('Timing ({:s}; CPU)'.format(example),fontsize=16);

plt = data_tools.plt
ax=data_tools.plt.gca()
ax.xaxis.set_major_locator(plt.MultipleLocator(5*60))   # Multiples of seconds
ax.xaxis.set_minor_locator(plt.MultipleLocator(60))   # Multiples of seconds
ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda value,tick_number : '{:.0f}'.format(value/60)))

# data_tools.plt.xlim([0,1800])
plt.xlabel('Time (minutes)')
plt.show();

<IPython.core.display.Javascript object>

## Stacked bar (CPU)

In [43]:
import data_tools

df1 = df_cpu.copy()
data_tools.barh_plot(df1)
data_tools.plt.title('Timing ({:s}; CPU)'.format(example.capitalize()),fontsize=16);

plt = data_tools.plt
ax=data_tools.plt.gca()
ax.xaxis.set_major_locator(plt.MultipleLocator(5*60))   # Multiples of seconds
ax.xaxis.set_minor_locator(plt.MultipleLocator(1*60))   # Multiples of seconds
ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda value,tick_number : '{:.0f}'.format(value/60)))
plt.xlabel('Time (minutes)')
# data_tools.plt.xlim([0,1800])
data_tools.plt.show()

<IPython.core.display.Javascript object>

<hr style="border-width:4px; border-color:coral"/>

# Comparisons between CPU and GPU

<hr style="border-width:4px; border-color:coral"/>

In [44]:
import data_tools
from data_tools import amr_colors
plt.figure()

idx = pandas.IndexSlice
pd = example_data.loc[example].copy()

width = 0.375
d = 0.05

i0 = np.array([0,1,2,3,4])
i1 = list(reversed(i0 + (d + width)/2))
i2 = list(reversed(i0 - (d + width)/2))

ac = 1
ag = 1

plt.grid(axis='x')


# ---------------------------------- bar plots ---------------------------------------

# s_ghost = pd.loc[idx[:,'ghostfill'],:]
# s_ghost.loc[idx[:,'GPU']]

s_ghost =  pd.loc[idx[:,:],'ghostfill']

plt.barh(i1, s_ghost['CPU'], width,color=amr_colors['ghost'][0], label='Ghost')
plt.barh(i2, s_ghost['GPU'], width,color=amr_colors['ghost'][1], label='Ghost (GPU)')
tot = s_ghost.copy()


s_regrid = pd.loc[idx[:,:],'regrid']
plt.barh(i1, s_regrid['CPU'], width,color=amr_colors['regrid'][0], left=tot['CPU'], label='Regrid')
plt.barh(i2, s_regrid['GPU'], width,color=amr_colors['regrid'][1], left=tot['GPU'], label='Regrid (GPU)')
tot += s_regrid

s_comm =  pd.loc[idx[:,:],'patch_comm']
plt.barh(i1, s_comm['CPU'], width,color=amr_colors['comm'][0], left=tot['CPU'], label='Comm')
plt.barh(i2, s_comm['GPU'], width,color=amr_colors['comm'][1], left=tot['GPU'], label='Comm (GPU)')
tot += s_comm

s_adv = pd.loc[idx[:,:],'advance']
plt.barh(i1, s_adv['CPU'], width,color=amr_colors['advance'][0], left=tot['CPU'],label='Advance')
plt.barh(i2, s_adv['GPU'], width,color=amr_colors['advance'][1], left=tot['GPU'],label='Advance (GPU)')
tot += s_adv.copy()

s_other = pd.loc[idx[:,:],'walltime'] - tot - pd.loc[idx[:,:],'output']
plt.barh(i1, s_other['CPU'], width,color=amr_colors['other'][0], left=tot['CPU'], \
         label='Other')
plt.barh(i2, s_other['GPU'], width,color=amr_colors['other'][1], left=tot['GPU'], label='Other (GPU)')
tot += s_other

s_last = s_other

# Add memcopy info
s_h2h = pd.loc[idx[:,:],'memcopy_h2h']
s_dev = pd.loc[idx[:,:],'memcopy_h2d'] + pd.loc[idx[:,:,],'memcopy_d2h']
l_h2h =  tot['GPU'] - s_last['GPU'] - s_h2h['GPU'] - s_dev['GPU'];
l_dev = l_h2h + s_h2h['GPU']

plt.barh(i2, s_h2h['GPU'], width, fill=True, edgecolor=None, color=amr_colors['memcopy'][0],\
          alpha=1,left = l_h2h, label='Memcopy (patches)')

plt.barh(i2, s_dev['GPU'], width, fill=True, edgecolor=None, color=amr_colors['memcopy'][1], \
          alpha=1, left = l_dev, linewidth=0.5,label='Memcopy (device)')


# ---------------------------------- Tick marks, legend ---------------------------------------
plt = data_tools.plt
ax=data_tools.plt.gca()
ax.xaxis.set_major_locator(plt.MultipleLocator(5*60))   # Multiples of seconds
ax.xaxis.set_minor_locator(plt.MultipleLocator(2.5*60))   # Multiples of seconds
ax.xaxis.set_major_formatter(plt.FuncFormatter(lambda value,tick_number : '{:0.0f}'.format(value/60)))

ax.yaxis.set_major_locator(plt.FixedLocator(i0, nbins=None))
ax.set_yticklabels(reversed(['1 Proc','2 Procs','4 Procs','8 Procs','16 Procs']))

plt.title('{:s} example'.format(example.capitalize()))
plt.xlabel('Time (minutes)')
plt.legend()

plt.show()

<IPython.core.display.Javascript object>

In [45]:
idx = pandas.IndexSlice
pd = example_data.loc[example].copy()

mode = 'other'   # 'redhawk'
s_wall = pd["walltime"] - pd["output"]  # Series

if mode is 'one2one':
    # example_data.loc[idx[:,'GPU'],'ghostfill']
    speed_up_wall = s_wall["CPU"]/s_wall["GPU"]    
    speed_up_adv = pd.loc['CPU','advance']/pd.loc['GPU','advance']
    speed_up_adv_memcopy = pd.loc['CPU','advance']/(pd.loc['GPU','advance']-pd.loc['GPU','memcopy_h2h'])
    procs = [1,2,4,8,16]
elif mode is 'redhawk':
    speed_up_wall = s_wall["CPU"]/s_wall["GPU",1]    
    speed_up_adv = (pd.loc['CPU','advance'])/pd.loc[('GPU',1),'advance']
    memcpy = pd.loc[('GPU',1),'memcopy_h2h'] # + pd.loc[1,'memcopy_h2d'] + pd.loc[1,'memcopy_d2h']
    speed_up_adv_memcopy = pd.loc['CPU','advance']/(pd.loc[('GPU',1),'advance']-memcpy)
    procs = [1,2,4,8,16]
else:
    speed_up_wall = s_wall['CPU']/s_wall[('GPU',1)]    
    speed_up_adv = (pd.loc['CPU','advance'])/pd.loc[('GPU',1),'advance']
    memcpy = pd.loc[('GPU',1),'memcopy_h2h'] # + pd.loc[0,'memcopy_h2d'] + pd.loc[0,'memcopy_d2h']
    speed_up_adv_memcopy = pd.loc['CPU','advance']/(pd.loc[('GPU',1),'advance']-memcpy)
    procs = [1,2,4,8,16]
    

# Compute optimal speed-up
pct_advance = pd.loc['CPU','advance'][1]/pd.loc['CPU','walltime'][1]
optimal = 1/(1-pct_advance)    

width = 0.85
d = 1 - width
total_bars = 3   # bars per proc group
total_width = len(procs)*total_bars*(width + d)

i0 = np.arange(0,(total_bars+1)*len(procs),total_bars+1)
w = (width+d)/2.0
i1 = list(i0 - (total_bars//2) )
i2 = list(i0 )
i3 = list(i0 + (total_bars)//2)

plt.figure()



plt.bar(i1,speed_up_wall,color='gold',edgecolor='k', linewidth=1, width=width, \
               label='Walltime (optimal : {:.1f})'.format(optimal))
plt.bar(i2,speed_up_adv,color=amr_colors['advance'][0],width=width,label='Advance')
plt.bar(i3,speed_up_adv_memcopy,color=amr_colors['memcopy'][0],width=width,label='Advance-memcopy')

ax = plt.gca()
ax.xaxis.set_major_locator(plt.FixedLocator(i0))   # Multiples of seconds
ax.xaxis.set_minor_locator(plt.FixedLocator([2,6,10,14,18]))   # Multiples of seconds

ax.set_xticklabels(procs)

ax.yaxis.set_major_locator(plt.MultipleLocator(1))   # Multiples of seconds
ax.yaxis.set_minor_locator(plt.MultipleLocator(1))   # Multiples of seconds

if mode is 'one2one':
    plt.title('{:s} : Speed-up : GPU vs. CPU'.format(example.capitalize()))
elif mode is 'redhawk':
    plt.title('{:s} : 2 GPUs over 4 CPUs (Redhawk)'.format(example.capitalize()))
else:
    plt.title('{:s} : One GPU over multi-core CPU'.format(example.capitalize()))

plt.xlabel('Number of CPU cores (MPI processes)')
plt.ylabel('Speed-up')

# xl = ax.get_xlim();
# plt.plot([-1,20],np.array([[optimal],[optimal]]),color='darkgrey',linewidth=4,label='optimal speed-up')
    
    
plt.ylim([0, 8])
plt.grid(axis='y', which='both')    
plt.legend(loc='upper right')


plt.show()

<IPython.core.display.Javascript object>

In [None]:
(pd.loc['CPU','advance'])/pd.loc[('GPU',1),'advance']