# Comparison of HPL traces

In [1]:
import io
import zipfile
import pandas
from plotnine import *
import plotnine
plotnine.options.figure_size = (12, 8)
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning) # removing annoying Pandas warning
import os
import json
import re

def read_csv(archive_name, csv_name, columns=None, filter_func=lambda x: x):
    archive = zipfile.ZipFile(archive_name)
    df= pandas.read_csv(io.BytesIO(filter_func(archive.read(csv_name))), names=columns)
    df.columns = df.columns.str.strip()
    return df

def filter_pajeng(pajeng_output, keep):
    lines = pajeng_output.split(b'\n')
    lines = [l for l in lines if l.startswith(keep.encode())]
    return b'\n'.join(lines)

def _read_paje_state(archive_name, csv_name, has_msg_size=False, replace_func=('MPI', 'MPI')):
    columns=['type', 'rank', 'container', 'start', 'end', 'duration', 'level', 'function']
    if has_msg_size:
        columns.append('tmp')
    df = read_csv(archive_name, csv_name, columns=columns, filter_func=lambda x: filter_pajeng(x, 'State'))
    df['function'] = df['function'].str.replace(*replace_func).str.strip()  # for some reason Simgrid uses PMPI_Wait and not MPI_Wait
    df['msg_size'] = -1
    if has_msg_size:
        df.loc[df['function'].isin(['MPI_Recv', 'MPI_Send', 'MPI_Isend', 'MPI_Irecv']), 'msg_size'] = df['tmp']
        df['msg_size'] = df['msg_size'].astype(int)
    df['rank'] = df['rank'].str.slice(5).astype(int)  # changing 'rank42' into 42
    return df

def read_mpi_state(archive_name, csv_name):
    df = _read_paje_state(archive_name, csv_name, True)
    return df

def read_smpi_state(archive_name, csv_name):
    df = _read_paje_state(archive_name, csv_name, True, ('PMPI', 'MPI'))
    df['rank'] = -df['rank']  # in SMPI, we have 'rank-42' and not 'rank42', so there remains a - that we should remove
    return df

def read_smpi_link(archive_name, csv_name):
    df = read_csv(archive_name, csv_name,
                  columns=['type', 'level', 'container', 'start', 'end', 'duration', 'commType', 'src', 'dst', 'msg_size'],
                  filter_func=lambda x: filter_pajeng(x, 'Link'))
    df['src'] = df['src'].str.slice(6).astype(int)  # changing 'rank42' into 42
    df['dst'] = df['dst'].str.slice(6).astype(int)  # changing 'rank42' into 42
#    df['remote'] = (df['src'] // 32) != (df['dst'] // 32)
    return df

def read_archive(archive_name, mode, exp_id, drop_func=['MPI_Comm_rank', 'MPI_Comm_size', 'MPI_Comm_split', 'MPI_Comm_free', 'MPI_Init', 'MPI_Finalize']):
    assert mode in ('reality', 'simgrid')
    print('# Mode %s, exp %d' % (mode, exp_id))
    blas_trace = read_csv(archive_name, 'trace_blas_%d.csv' % exp_id)
    print('\tBLAS trace: %6d lines' % len(blas_trace))
    blas_trace['start'] = blas_trace.timestamp
    blas_trace['end'] = blas_trace.timestamp + blas_trace.duration
    blas_trace = blas_trace[['file', 'line', 'function', 'start', 'end', 'rank', 'm', 'n', 'k']]
    blas_trace['msg_size'] = -1
    blas_trace['kind'] = 'BLAS'
    if mode == 'reality':
        mpi_trace = read_mpi_state(archive_name, 'trace_mpi_%d.csv' % exp_id)
    else:
        mpi_trace = read_smpi_state(archive_name, 'trace_mpi_%d.csv' % exp_id)
    print('\tMPI  trace: %6d lines' % len(mpi_trace))
    mpi_trace = mpi_trace[['function', 'start', 'end', 'rank', 'msg_size']]
    mpi_trace['kind'] = 'MPI'
    df = pandas.concat([blas_trace, mpi_trace])
    df['mode'] = mode
    df['rank+1'] = df['rank'] + 1
    old_len = len(df)
    df = df[~df['function'].str.strip().isin(drop_func)]
    print('Removed %d events with functions in %s' % (old_len-len(df), ', '.join(drop_func)))
    if mode == 'reality':
        df_links = None
    else:
        df_links = read_smpi_link(archive_name, 'trace_mpi_%d.csv' % exp_id)
    df['duration'] = df['end'] - df['start']
    df['exp_id'] = exp_id
    return df, df_links

reality_file = 'grenoble_2018-12-17_1828789.zip'
smpi_dir = '../smpi_hpl/'
simgrid_files = [smpi_dir + f for f in os.listdir(smpi_dir) if f.startswith('grenoble_2018-12-17') or f.startswith('grenoble_2018-12-18')]

def get_optimization(filename):
    history = json.loads(zipfile.ZipFile(filename).read('history.json'))
    make_cmd = [cmd for cmd in history if 'SMPI_OPTIMIZATION' in cmd['command']][0]['command']
    opt = re.findall('SMPI_OPTIMIZATION[^\s-]*', make_cmd)[0]
    if opt == 'SMPI_OPTIMIZATION':
        return 4
    else:
        assert opt[-2] == '='
        return int(opt[-1])

traces, _ = read_archive(reality_file, 'reality', 0)
traces['optimization'] = -1

for file in simgrid_files:
    trace_simgrid, _ = read_archive(file, 'simgrid', 0)
    trace_simgrid['optimization'] = get_optimization(file)
    traces = pandas.concat([traces, trace_simgrid])

print(traces['optimization'].unique())

traces.head()

# Mode reality, exp 0
	BLAS trace: 540758 lines
	MPI  trace: 860312 lines
Removed 541286 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_free, MPI_Init, MPI_Finalize
# Mode simgrid, exp 0
	BLAS trace: 615684 lines
	MPI  trace: 1060892 lines
Removed 16 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_free, MPI_Init, MPI_Finalize
# Mode simgrid, exp 0
	BLAS trace: 695298 lines
	MPI  trace: 1111131 lines
Removed 16 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_free, MPI_Init, MPI_Finalize
# Mode simgrid, exp 0
	BLAS trace: 241460 lines
	MPI  trace: 944102 lines
Removed 16 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_free, MPI_Init, MPI_Finalize
# Mode simgrid, exp 0
	BLAS trace: 694590 lines
	MPI  trace: 1104774 lines
Removed 16 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_free, MPI_Init, MPI_Finalize
# Mode simgrid, exp 0
	

Unnamed: 0,end,file,function,k,kind,line,m,msg_size,n,rank,start,mode,rank+1,duration,exp_id,optimization
0,6.977182,../HPL_pdrpanrlT.c,dtrsm,-1.0,BLAS,172.0,2.0,-1,2.0,0,6.97714,reality,1,4.2e-05,0,-1
1,6.977283,../HPL_pdrpanrlT.c,dgemm,2.0,BLAS,224.0,25038.0,-1,2.0,0,6.97719,reality,1,9.3e-05,0,-1
2,6.97745,../HPL_pdrpanrlT.c,dtrsm,-1.0,BLAS,172.0,0.0,-1,2.0,0,6.97745,reality,1,0.0,0,-1
3,6.97746,../HPL_pdrpanrlT.c,dgemm,2.0,BLAS,224.0,25036.0,-1,0.0,0,6.97746,reality,1,0.0,0,-1
4,6.977461,../HPL_pdrpanrlT.c,dtrsm,-1.0,BLAS,172.0,4.0,-1,4.0,0,6.97746,reality,1,1e-06,0,-1


## HPL result

In [2]:
res = read_csv(reality_file, 'results.csv')
res[['gflops', 'time']]

Unnamed: 0,gflops,time
0,2505.0,33.27


In [3]:
for file in sorted(simgrid_files):
    res = read_csv(file, 'results.csv')
    print('Optimization level: %d' % get_optimization(file))
    print('%s Gflops' % res['gflops'].max())
    print('%s seconds' % res['time'].max())
    print()

Optimization level: 4
2490.0 Gflops
33.47 seconds

Optimization level: 1
2028.0 Gflops
41.09 seconds

Optimization level: 2
2483.0 Gflops
33.56 seconds

Optimization level: 3
2485.0 Gflops
33.54 seconds

Optimization level: 0
3075.0 Gflops
27.1 seconds



## Checking the parameters

In [4]:
read_csv(reality_file, 'exp.csv')

Unnamed: 0,matrix_size,block_size,proc_p,proc_q,pfact,rfact,bcast,depth,swap,mem_align,process_per_node,thread_per_process
0,50000,128,2,4,1,2,2,1,0,8,1,32


In [5]:
read_csv(simgrid_files[0], 'exp_smpi.csv')

Unnamed: 0,matrix_size,block_size,proc_p,proc_q,pfact,rfact,bcast,depth,swap,mem_align,process_per_node,thread_per_process,dgemm_coefficient,dgemm_intercept,dtrsm_coefficient,dtrsm_intercept
0,50000,128,2,4,1,2,2,1,0,8,1,32,2.548998e-12,9.9e-05,4.524788e-12,4e-05


In [6]:
print(zipfile.ZipFile(simgrid_files[0]).read('dahu.xml').decode('ascii'))

<?xml version="1.0"?>
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">

<platform version="4.1">
    <config id="General">
        <prop id="smpi/os" value="0:2.9654918472829634e-07:9.636195922971908e-11;8133:0.0:0.0;15831:0.0:0.0;33956:0.0:0.0;63305:0.0:0.0"/>
        <prop id="smpi/or" value="0:1.3754300044375224e-06:8.456019002037343e-11;8133:0.0:0.0;15831:0.0:0.0;33956:0.0:0.0;63305:0.0:0.0"/>
        <prop id="smpi/ois" value="0:6.93122952108322e-07:7.051204536228499e-11;8133:3.601990790153314e-07:1.2449128895712003e-10;15831:2.977016881030904e-06:4.130261264964205e-11;33956:3.1334661540668004e-06:3.293458765282178e-11;63305:6.939993663604218e-07:0.0"/>
        <prop id="smpi/bw-factor" value="0:0.4709912642540426;8133:0.6770561715947873;15831:0.7091971477925826;33956:0.7033449540683548;63305:0.9867951082730284"/>
        <prop id="smpi/lat-factor" value="0:1.1347371681807956;8133:29.84698135078078;15831:30.274775156187765;33956:64.63917831305292;633

In [7]:
for mode in traces['optimization'].unique():
    for kind in traces['kind'].unique():
        df = traces[(traces['optimization'] == mode) & (traces['kind'] == kind)]
        print('optimization %s' % mode, kind, df.start.min(), df.end.max())
    print()

optimization -1 BLAS 6.9771399999999995 40.191413000000004
optimization -1 MPI 2.7000000000000002e-05 49.946581

optimization 1 BLAS 7.18629 47.981477000000005
optimization 1 MPI 0.0 58.618389

optimization 2 BLAS 0.00020899999999999998 33.556677
optimization 2 MPI 0.0 33.558231

optimization 0 BLAS 7.16707 34.225914
optimization 0 MPI 0.0 44.208057000000004

optimization 4 BLAS 0.007087 33.475977
optimization 4 MPI 0.0 33.477055

optimization 3 BLAS 0.006964 33.546477
optimization 3 MPI 0.0 33.547427



## Comparing dgemm traces

In [8]:
def dump_trace(df, function, columns, exp_col, rank=None):
    columns = columns + ['rank']
    for exp in df[exp_col].unique():
        tmp = df[(df['function'] == function) & (df[exp_col] == exp)][columns]
        if rank is not None:
            tmp = tmp[tmp['rank'] == rank]
        else:
            tmp = tmp.sort_values(by=['rank'])
        filename = '/tmp/trace_%d' % exp
        tmp.to_csv(filename, index=False)
        print('Created file %s' % filename)

In [9]:
dgemm = traces[traces['function'] == 'dgemm'].sort_values(by=['start'])
dgemm['size_product'] = dgemm.m * dgemm.n * dgemm.k
group = dgemm.groupby(['optimization'])[['size_product', 'm', 'n', 'k', 'duration']].agg(['sum'])
group['count'] = dgemm.groupby(['optimization']).count()['m']
group

Unnamed: 0_level_0,size_product,m,n,k,duration,count
Unnamed: 0_level_1,sum,sum,sum,sum,sum,Unnamed: 6_level_1
optimization,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
-1,41586800000000.0,2309108000.0,19781982.0,22608512.0,119.046785,270379
0,41586800000000.0,1433152000.0,19781982.0,3453440.0,82.708438,120730
1,41586800000000.0,3077160000.0,19781982.0,27403776.0,195.486246,307842
2,41586800000000.0,3663246000.0,19781982.0,32499072.0,206.519294,347649
3,41586800000000.0,3659851000.0,19781982.0,32462848.0,206.42063,347366
4,41586800000000.0,3658805000.0,19781982.0,32453760.0,206.413651,347295


In [10]:
dgemm.groupby(['rank', 'optimization'])[['size_product']].agg(['count', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,size_product,size_product
Unnamed: 0_level_1,Unnamed: 1_level_1,count,sum
rank,optimization,Unnamed: 2_level_2,Unnamed: 3_level_2
0,-1,22271,5198147000000.0
0,0,15101,5198147000000.0
0,1,37132,5198147000000.0
0,2,41866,5198147000000.0
0,3,41787,5198147000000.0
0,4,41795,5198147000000.0
1,-1,19956,5218160000000.0
1,0,14823,5218160000000.0
1,1,64950,5218160000000.0
1,2,76198,5218160000000.0


## Comparing dtrsm traces

In [11]:
dtrsm = traces[traces['function'] == 'dtrsm'].sort_values(by=['start'])
dtrsm['size_product'] = dtrsm.m * dtrsm.n ** 2
group = dtrsm.groupby(['optimization'])[['size_product', 'm', 'n', 'duration']].agg(['sum'])
group['count'] = dtrsm.groupby(['optimization']).count()['m']
group

Unnamed: 0_level_0,size_product,m,n,duration,count
Unnamed: 0_level_1,sum,sum,sum,sum,Unnamed: 5_level_1
optimization,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
-1,319466800000.0,19781982.0,22608512.0,3.500695,270379
0,319466800000.0,19781982.0,3453440.0,3.862529,120730
1,319466800000.0,19781982.0,27403776.0,8.340963,307842
2,319466800000.0,19781982.0,32499072.0,9.183471,347649
3,319466800000.0,19781982.0,32462848.0,9.177387,347366
4,319466800000.0,19781982.0,32453760.0,9.176314,347295


In [12]:
dtrsm.groupby(['rank', 'optimization'])[['size_product']].agg(['count', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,size_product,size_product
Unnamed: 0_level_1,Unnamed: 1_level_1,count,sum
rank,optimization,Unnamed: 2_level_2,Unnamed: 3_level_2
0,-1,22271,39905300000.0
0,0,15101,39905300000.0
0,1,37132,39905300000.0
0,2,41866,39905300000.0
0,3,41787,39905300000.0
0,4,41795,39905300000.0
1,-1,19956,40110820000.0
1,0,14823,40110820000.0
1,1,64950,40110820000.0
1,2,76198,40110820000.0


## Comparing MPI_Recv traces

In [13]:
recv = traces[traces['function'] == 'MPI_Recv'].sort_values(by=['start'])
group = recv.groupby(['optimization'])[['msg_size', 'duration']].agg(['sum'])
group['count'] = recv.groupby(['optimization']).count()['start']
group

Unnamed: 0_level_0,msg_size,duration,count
Unnamed: 0_level_1,sum,sum,Unnamed: 3_level_1
optimization,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
-1,30238677876,13.849804,3682
0,30238677876,3.293772,3682
1,30238677876,3.630675,3682
2,35232869308,3.781353,3402
3,35232869308,3.777262,3402
4,35232869308,3.798053,3402


In [14]:
recv.groupby(['rank', 'optimization'])[['msg_size']].agg(['count', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,msg_size,msg_size
Unnamed: 0_level_1,Unnamed: 1_level_1,count,sum
rank,optimization,Unnamed: 2_level_2,Unnamed: 3_level_2
0,-1,421,3776632664
0,0,421,3776632664
0,1,421,3776632664
0,2,305,3775030280
0,3,305,3775030280
0,4,305,3775030280
1,-1,505,3775431788
1,0,505,3775431788
1,1,505,3775431788
1,2,301,3775030332


In [15]:
dump_trace(traces, 'MPI_Recv', ['msg_size'], 'optimization')

Created file /tmp/trace_-1
Created file /tmp/trace_1
Created file /tmp/trace_2
Created file /tmp/trace_0
Created file /tmp/trace_4
Created file /tmp/trace_3


In [16]:
!head /tmp/trace_0
!wc -l /tmp/trace_*
!sha256sum /tmp/trace_*

msg_size,rank
8,0
14894088,0
14763016,0
14763016,0
14631944,0
14500872,0
14500872,0
14369800,0
14238728,0
  3683 /tmp/trace_0
  3683 /tmp/trace_1
  3683 /tmp/trace_-1
  3403 /tmp/trace_2
  3403 /tmp/trace_3
  3403 /tmp/trace_4
 21258 total
62fe773b68fa098bc959c9c322b997c12a57936163c9775e3c5e7befbcd58a1a  /tmp/trace_0
62fe773b68fa098bc959c9c322b997c12a57936163c9775e3c5e7befbcd58a1a  /tmp/trace_1
62fe773b68fa098bc959c9c322b997c12a57936163c9775e3c5e7befbcd58a1a  /tmp/trace_-1
3aaefb09e9daaf4b835fdd02d0fea88a9e0daba1935b6890261ebcfd3e2dcfb3  /tmp/trace_2
be60e024b30a99fbffc4eabad63c66eb60bb63218752a0ce14c4fa93694d7a30  /tmp/trace_3
297f74e27a7ae659bb6ff2a887d5c627a1d6088d05f4adb32a387b8c833272b3  /tmp/trace_4


In [17]:
!git diff --color --no-index /tmp/trace_1 /tmp/trace_2 | head -n 100

[1mdiff --git a/tmp/trace_1 b/tmp/trace_2[m
[1mindex 3e62d29..c009245 100644[m
[1m--- a/tmp/trace_1[m
[1m+++ b/tmp/trace_2[m
[36m@@ -1,7 +1,43 @@[m
 msg_size,rank[m
 8,0[m
[31m-14894088,0[m
[31m-14763016,0[m
[32m+[m[32m18039816,0[m
[32m+[m[32m17908744,0[m
[32m+[m[32m17908744,0[m
[32m+[m[32m17777672,0[m
[32m+[m[32m17646600,0[m
[32m+[m[32m17646600,0[m
[32m+[m[32m17515528,0[m
[32m+[m[32m17384456,0[m
[32m+[m[32m17384456,0[m
[32m+[m[32m17253384,0[m
[32m+[m[32m17122312,0[m
[32m+[m[32m17122312,0[m
[32m+[m[32m16991240,0[m
[32m+[m[32m16860168,0[m
[32m+[m[32m16860168,0[m
[32m+[m[32m18170888,0[m
[32m+[m[32m16729096,0[m
[32m+[m[32m18170888,0[m
[32m+[m[32m18433032,0[m
[32m+[m[32m19743752,0[m
[32m+[m[32m19612680,0[m
[32m+[m[32m19481608,0[m
[32m+[m[32m19481608,0[m
[32m+[m[32m19350536,0[m
[32m+[m[32m19219464,0[m
[32m+[m[32m19219464,0[m
[32m+[m[32m19088392,0[m
[32m+[m[32m189573

## Comparing MPI_Send traces

In [18]:
send = traces[traces['function'] == 'MPI_Send'].sort_values(by=['start'])
group = send.groupby(['optimization'])[['msg_size', 'duration']].agg(['sum'])
group['count'] = send.groupby(['optimization']).count()['start']
group

Unnamed: 0_level_0,msg_size,duration,count
Unnamed: 0_level_1,sum,sum,Unnamed: 3_level_1
optimization,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
-1,45357274452,66.724003,107564
0,45357274452,52.341034,107564
1,45355757692,32.306767,107564
2,40403077028,9.49784,105342
3,40403077028,9.454018,105342
4,40403077028,9.465298,105342


In [19]:
send.groupby(['rank', 'optimization'])[['msg_size']].agg(['count', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,msg_size,msg_size
Unnamed: 0_level_1,Unnamed: 1_level_1,count,sum
rank,optimization,Unnamed: 2_level_2,Unnamed: 3_level_2
0,-1,13557,5684327908
0,0,13557,5684327908
0,1,13557,5683351676
0,2,13346,5069757052
0,3,13346,5069757052
0,4,13346,5069757052
1,-1,13431,5684827848
1,0,13431,5684827848
1,1,13431,5683851192
1,2,13327,5069704024


In [20]:
dump_trace(traces, 'MPI_Send', ['msg_size'], 'optimization')

Created file /tmp/trace_-1
Created file /tmp/trace_1
Created file /tmp/trace_2
Created file /tmp/trace_0
Created file /tmp/trace_4
Created file /tmp/trace_3


In [21]:
!head /tmp/trace_0
!wc -l /tmp/trace_*
!sha256sum /tmp/trace_*

msg_size,rank
8,0
2080,0
2080,0
2080,0
2080,0
2080,0
2080,0
2080,0
2080,0
 107565 /tmp/trace_0
 107565 /tmp/trace_1
 107565 /tmp/trace_-1
 105343 /tmp/trace_2
 105343 /tmp/trace_3
 105343 /tmp/trace_4
 638724 total
553c4e19f8381e01a4a5b1d915cf100697ba2d66775e0c00f4d3c800fe500451  /tmp/trace_0
b735e6edc7fff316797dade5592c38701fb037fc9438a7bf3e0743631e9cae32  /tmp/trace_1
935bc7ba61924ecbc01741ccfa78e682212a7e4de5cf7d9a2ab21054ab0525a7  /tmp/trace_-1
554325a3d63a00aa16695c4895d9630e83fd379726dcc029e3938c3e740c2335  /tmp/trace_2
c279cce1f5eb2d681df26d7a7c80e861c26fa8cfbb9c6352f11feafc50ec7876  /tmp/trace_3
c279cce1f5eb2d681df26d7a7c80e861c26fa8cfbb9c6352f11feafc50ec7876  /tmp/trace_4


In [22]:
!git diff --color --no-index /tmp/trace_-1 /tmp/trace_0 | head -n 100

[1mdiff --git a/tmp/trace_-1 b/tmp/trace_0[m
[1mindex c629e14..7f16a5f 100644[m
[1m--- a/tmp/trace_-1[m
[1m+++ b/tmp/trace_0[m
[36m@@ -18,8 +18,8 @@[m [mmsg_size,rank[m
 2080,0[m
 61920,0[m
 8519680,0[m
[31m-3994080,0[m
 17122312,0[m
[32m+[m[32m3994080,0[m
 8519680,0[m
 4859464,0[m
 2080,0[m
[36m@@ -145,8 +145,8 @@[m [mmsg_size,rank[m
 2080,0[m
 63984,0[m
 8388608,0[m
[31m-3998184,0[m
 16860168,0[m
[32m+[m[32m3998184,0[m
 8388608,0[m
 3932640,0[m
 16991240,0[m
[36m@@ -395,7 +395,7 @@[m [mmsg_size,rank[m
 2080,0[m
 3987928,0[m
 8650752,0[m
[31m-17384456,0[m
[32m+[m[32m4123112,0[m
 2080,0[m
 2080,0[m
 2080,0[m
[36m@@ -418,7 +418,7 @@[m [mmsg_size,rank[m
 2080,0[m
 75336,0[m
 8650752,0[m
[31m-4123112,0[m
[32m+[m[32m17384456,0[m
 2080,0[m
 2080,0[m
 2080,0[m
[36m@@ -509,8 +509,8 @@[m [mmsg_size,rank[m
 2080,0[m
 58824,0[m
 8126464,0[m
[31m-3936752,0[m
 16335880,0[m
[32m+[m[32m3936752,0[m
 8126464,0[m
 

In [23]:
!git diff --color --no-index /tmp/trace_0 /tmp/trace_1 | head -n 100

[1mdiff --git a/tmp/trace_0 b/tmp/trace_1[m
[1mindex 7f16a5f..2c72a6b 100644[m
[1m--- a/tmp/trace_0[m
[1m+++ b/tmp/trace_1[m
[36m@@ -19,9 +19,9 @@[m [mmsg_size,rank[m
 61920,0[m
 8519680,0[m
 17122312,0[m
[31m-3994080,0[m
[32m+[m[32m3594672,0[m
 8519680,0[m
[31m-4859464,0[m
[32m+[m[32m4193784,0[m
 2080,0[m
 17253384,0[m
 2080,0[m
[36m@@ -143,10 +143,10 @@[m [mmsg_size,rank[m
 2080,0[m
 2080,0[m
 2080,0[m
[31m-63984,0[m
[32m+[m[32m60888,0[m
 8388608,0[m
 16860168,0[m
[31m-3998184,0[m
[32m+[m[32m4260360,0[m
 8388608,0[m
 3932640,0[m
 16991240,0[m
[36m@@ -393,9 +393,9 @@[m [mmsg_size,rank[m
 2080,0[m
 2080,0[m
 2080,0[m
[31m-3987928,0[m
[32m+[m[32m4596256,0[m
 8650752,0[m
[31m-4123112,0[m
[32m+[m[32m4596256,0[m
 2080,0[m
 2080,0[m
 2080,0[m
[36m@@ -416,7 +416,7 @@[m [mmsg_size,rank[m
 2080,0[m
 2080,0[m
 2080,0[m
[31m-75336,0[m
[32m+[m[32m65016,0[m
 8650752,0[m
 17384456,0[m
 2080,0[m
[36m@@ -50

In [24]:
!git diff --color --no-index /tmp/trace_1 /tmp/trace_2 | head -n 100

[1mdiff --git a/tmp/trace_1 b/tmp/trace_2[m
[1mindex 2c72a6b..d27f7ec 100644[m
[1m--- a/tmp/trace_1[m
[1m+++ b/tmp/trace_2[m
[36m@@ -16,14 +16,6 @@[m [mmsg_size,rank[m
 2080,0[m
 2080,0[m
 2080,0[m
[31m-61920,0[m
[31m-8519680,0[m
[31m-17122312,0[m
[31m-3594672,0[m
[31m-8519680,0[m
[31m-4193784,0[m
[31m-2080,0[m
[31m-17253384,0[m
 2080,0[m
 2080,0[m
 2080,0[m
[36m@@ -49,10 +41,6 @@[m [mmsg_size,rank[m
 2080,0[m
 2080,0[m
 2080,0[m
[31m-17253384,0[m
[31m-2080,0[m
[31m-2080,0[m
[31m-2080,0[m
 2080,0[m
 2080,0[m
 2080,0[m
[36m@@ -143,21 +131,6 @@[m [mmsg_size,rank[m
 2080,0[m
 2080,0[m
 2080,0[m
[31m-60888,0[m
[31m-8388608,0[m
[31m-16860168,0[m
[31m-4260360,0[m
[31m-8388608,0[m
[31m-3932640,0[m
[31m-16991240,0[m
[31m-16991240,0[m
[31m-2080,0[m
[31m-2080,0[m
[31m-2080,0[m
[31m-2080,0[m
[31m-2080,0[m
[31m-2080,0[m
[31m-2080,0[m
 2080,0[m
 2080,0[m
 2080,0[m
[36m@@ -183,10 +156,17 @@[m [mmsg_size,ra

## Comparing MPI_Irecv traces

In [25]:
irecv = traces[traces['function'] == 'MPI_Irecv'].sort_values(by=['start'])
group = irecv.groupby(['optimization'])[['msg_size', 'duration']].agg(['sum'])
group['count'] = recv.groupby(['optimization']).count()['start']
group

Unnamed: 0_level_0,msg_size,duration,count
Unnamed: 0_level_1,sum,sum,Unnamed: 3_level_1
optimization,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
-1,15118596576,1.361971,3682
0,15118596576,0.0,3682
1,15117079816,0.0,3682
2,5170207720,0.0,3402
3,5170207720,0.0,3402
4,5170207720,0.0,3402


In [26]:
irecv.groupby(['rank', 'exp_id'])[['msg_size']].agg(['count', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,msg_size,msg_size
Unnamed: 0_level_1,Unnamed: 1_level_1,count,sum
rank,exp_id,Unnamed: 2_level_2,Unnamed: 3_level_2
0,0,77586,9421262048
1,0,77304,9517513136
2,0,77319,9446640752
3,0,76524,9420597288
4,0,77586,5795016112
5,0,77304,5756332248
6,0,77319,5809741520
7,0,76524,5697793024


In [27]:
dump_trace(traces, 'MPI_Irecv', ['msg_size'], 'optimization')

Created file /tmp/trace_-1
Created file /tmp/trace_1
Created file /tmp/trace_2
Created file /tmp/trace_0
Created file /tmp/trace_4
Created file /tmp/trace_3


In [28]:
!head /tmp/trace_0
!wc -l /tmp/trace_*
!sha256sum /tmp/trace_*

msg_size,rank
1056,0
1056,0
1056,0
1056,0
1056,0
1056,0
1056,0
1056,0
1056,0
 103883 /tmp/trace_0
 103883 /tmp/trace_1
 103883 /tmp/trace_-1
 101941 /tmp/trace_2
 101941 /tmp/trace_3
 101941 /tmp/trace_4
 617472 total
e751a8721e7470781cfbd8dc74d7dd9d9d0da7dbcd2a5ab4138918bdfcdd3520  /tmp/trace_0
cc7fda8d6eda53fc117c199de33ab2e1ffa4e51a759438d9409d11fa9c4fbaec  /tmp/trace_1
e751a8721e7470781cfbd8dc74d7dd9d9d0da7dbcd2a5ab4138918bdfcdd3520  /tmp/trace_-1
4a3797784ccd68a97082b0b1afb29a7bac2060cea48ed27a491eb48cbdf09ff4  /tmp/trace_2
4a3797784ccd68a97082b0b1afb29a7bac2060cea48ed27a491eb48cbdf09ff4  /tmp/trace_3
4a3797784ccd68a97082b0b1afb29a7bac2060cea48ed27a491eb48cbdf09ff4  /tmp/trace_4


In [29]:
!git diff --color --no-index /tmp/trace_0 /tmp/trace_1 | head -n 100

[1mdiff --git a/tmp/trace_0 b/tmp/trace_1[m
[1mindex 0d9fd34..f362925 100644[m
[1m--- a/tmp/trace_0[m
[1m+++ b/tmp/trace_1[m
[36m@@ -163,9 +163,9 @@[m [mmsg_size,rank[m
 1056,0[m
 1056,0[m
 131072,0[m
[31m-4393488,0[m
[32m+[m[32m3860944,0[m
 8519680,0[m
[31m-3927512,0[m
[32m+[m[32m4193784,0[m
 8519680,0[m
 1056,0[m
 1056,0[m
[36m@@ -279,7 +279,7 @@[m [mmsg_size,rank[m
 1056,0[m
 1056,0[m
 1056,0[m
[31m-4391424,0[m
[32m+[m[32m4048344,0[m
 8781824,0[m
 1056,0[m
 1056,0[m
[36m@@ -289,7 +289,7 @@[m [mmsg_size,rank[m
 1056,0[m
 1056,0[m
 1056,0[m
[31m-4254192,0[m
[32m+[m[32m4460040,0[m
 1056,0[m
 1056,0[m
 1056,0[m
[36m@@ -348,9 +348,9 @@[m [mmsg_size,rank[m
 1056,0[m
 1056,0[m
 131072,0[m
[31m-3785152,0[m
[32m+[m[32m4325888,0[m
 8650752,0[m
[31m-4055520,0[m
[32m+[m[32m4461072,0[m
 8650752,0[m
 1056,0[m
 1056,0[m
[36m@@ -507,7 +507,7 @@[m [mmsg_size,rank[m
 1056,0[m
 1056,0[m
 8126464,0[m
[31m-4063

In [30]:
!git diff --color --no-index /tmp/trace_1 /tmp/trace_2 | head -n 100

[1mdiff --git a/tmp/trace_1 b/tmp/trace_2[m
[1mindex f362925..cf7f5ad 100644[m
[1m--- a/tmp/trace_1[m
[1m+++ b/tmp/trace_2[m
[36m@@ -161,11 +161,8 @@[m [mmsg_size,rank[m
 1056,0[m
 1056,0[m
 1056,0[m
[31m-1056,0[m
 131072,0[m
[31m-3860944,0[m
 8519680,0[m
[31m-4193784,0[m
 8519680,0[m
 1056,0[m
 1056,0[m
[36m@@ -251,7 +248,6 @@[m [mmsg_size,rank[m
 1056,0[m
 1056,0[m
 1056,0[m
[31m-1056,0[m
 131072,0[m
 8781824,0[m
 1056,0[m
[36m@@ -276,11 +272,11 @@[m [mmsg_size,rank[m
 1056,0[m
 1056,0[m
 1056,0[m
[32m+[m[32m8781824,0[m
[32m+[m[32m1056,0[m
 1056,0[m
 1056,0[m
 1056,0[m
[31m-4048344,0[m
[31m-8781824,0[m
 1056,0[m
 1056,0[m
 1056,0[m
[36m@@ -289,7 +285,6 @@[m [mmsg_size,rank[m
 1056,0[m
 1056,0[m
 1056,0[m
[31m-4460040,0[m
 1056,0[m
 1056,0[m
 1056,0[m
[36m@@ -348,9 +343,7 @@[m [mmsg_size,rank[m
 1056,0[m
 1056,0[m
 131072,0[m
[31m-4325888,0[m
 8650752,0[m
[31m-4461072,0[m
 8650752,0[m
 1056,0[m
 1