# Parameters for the Scaling Studies

Assume that:

    N = 2**n
    M = 2**m
    T = 2**(t+23)

such that:

In [1]:
import itertools
import pandas as pd
import numpy as np

In [20]:
n = np.arange(7)
m = np.arange(4)
t = np.arange(5)

In [21]:
def N(n):
    return 2**n

def M(m):
    return 2**m

def T(t):
    return 2**(t+23)

So, we see that:

In [22]:
N(n)

array([ 1,  2,  4,  8, 16, 32, 64])

In [23]:
M(m)

array([1, 2, 4, 8])

In [24]:
T(t)

array([  8388608,  16777216,  33554432,  67108864, 134217728])

In [25]:
df = pd.DataFrame(itertools.product(n,m,t), columns=['n', 'm', 't'])
df['N'] = N(df['n'])
df['M'] = M(df['m'])
df['T'] = T(df['t'])
df['Segment Size'] = df['N'] * df['M'] * df['T']
df['Block Size'] = df['M'] * df['T']
df['Total Size'] = 120 * df['Segment Size']

In [26]:
df

Unnamed: 0,n,m,t,N,M,T,Segment Size,Block Size,Total Size
0,0,0,0,1,1,8388608,8388608,8388608,1006632960
1,0,0,1,1,1,16777216,16777216,16777216,2013265920
2,0,0,2,1,1,33554432,33554432,33554432,4026531840
3,0,0,3,1,1,67108864,67108864,67108864,8053063680
4,0,0,4,1,1,134217728,134217728,134217728,16106127360
5,0,1,0,1,2,8388608,16777216,16777216,2013265920
6,0,1,1,1,2,16777216,33554432,33554432,4026531840
7,0,1,2,1,2,33554432,67108864,67108864,8053063680
8,0,1,3,1,2,67108864,134217728,134217728,16106127360
9,0,1,4,1,2,134217728,268435456,268435456,32212254720


So, we need to perform 343 IOR runs for each API we are considering, if we consider the entire range of the parameter space.

The following two frames show the individual runs that are capable of producing the following **Total Size** and **Block Size**, which indicate separate *strong* and *weak* scaling studies.

In [27]:
pd.DataFrame([(tsize / 2**20, 120 * tsize / 2**30, sorted(grp['N'].unique())) for tsize, grp in df.groupby('Segment Size')],
             columns=['Segment Size [MB]', 'Total Size [GB]', 'Number of Nodes'])

Unnamed: 0,Segment Size [MB],Total Size [GB],Number of Nodes
0,8.0,0.9375,[1]
1,16.0,1.875,"[1, 2]"
2,32.0,3.75,"[1, 2, 4]"
3,64.0,7.5,"[1, 2, 4, 8]"
4,128.0,15.0,"[1, 2, 4, 8, 16]"
5,256.0,30.0,"[1, 2, 4, 8, 16, 32]"
6,512.0,60.0,"[1, 2, 4, 8, 16, 32, 64]"
7,1024.0,120.0,"[1, 2, 4, 8, 16, 32, 64]"
8,2048.0,240.0,"[2, 4, 8, 16, 32, 64]"
9,4096.0,480.0,"[4, 8, 16, 32, 64]"


This indicates that if we want scaling plots with points for all 7 node counts (1 to 64), then we can only consider 512MB through 32GB segment sizes, which are 7 different curves.  If we want to all for 5 or more points in our plots, then the segment size can range from 128MB to 128GB.

In [28]:
pd.DataFrame([(bsize / 2**20, sorted(grp['N'].unique())) for bsize, grp in df.groupby('Block Size')],
             columns=['Block Size [MB]', 'Number of Nodes'])

Unnamed: 0,Block Size [MB],Number of Nodes
0,8.0,"[1, 2, 4, 8, 16, 32, 64]"
1,16.0,"[1, 2, 4, 8, 16, 32, 64]"
2,32.0,"[1, 2, 4, 8, 16, 32, 64]"
3,64.0,"[1, 2, 4, 8, 16, 32, 64]"
4,128.0,"[1, 2, 4, 8, 16, 32, 64]"
5,256.0,"[1, 2, 4, 8, 16, 32, 64]"
6,512.0,"[1, 2, 4, 8, 16, 32, 64]"
7,1024.0,"[1, 2, 4, 8, 16, 32, 64]"


This indicates that this scaling study will give full range plots for all block sizes considered.