# Comparison of HPL traces

In [1]:
import io
import zipfile
import pandas
from plotnine import *
import plotnine
plotnine.options.figure_size = (12, 8)
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning) # removing annoying Pandas warning
import os
import json
import re

def read_csv(archive_name, csv_name, columns=None, filter_func=lambda x: x):
    archive = zipfile.ZipFile(archive_name)
    df= pandas.read_csv(io.BytesIO(filter_func(archive.read(csv_name))), names=columns)
    df.columns = df.columns.str.strip()
    return df

def filter_pajeng(pajeng_output, keep):
    lines = pajeng_output.split(b'\n')
    lines = [l for l in lines if l.startswith(keep.encode())]
    return b'\n'.join(lines)

def _read_paje_state(archive_name, csv_name, has_msg_size=False, replace_func=('MPI', 'MPI')):
    columns=['type', 'rank', 'container', 'start', 'end', 'duration', 'level', 'function']
    if has_msg_size:
        columns.append('tmp')
    df = read_csv(archive_name, csv_name, columns=columns, filter_func=lambda x: filter_pajeng(x, 'State'))
    df['function'] = df['function'].str.replace(*replace_func).str.strip()  # for some reason Simgrid uses PMPI_Wait and not MPI_Wait
    df['msg_size'] = -1
    if has_msg_size:
        df.loc[df['function'].isin(['MPI_Recv', 'MPI_Send', 'MPI_Isend', 'MPI_Irecv']), 'msg_size'] = df['tmp']
        df['msg_size'] = df['msg_size'].astype(int)
    df['rank'] = df['rank'].str.slice(5).astype(int)  # changing 'rank42' into 42
    return df

def read_smpi_state(archive_name, csv_name):
    df = _read_paje_state(archive_name, csv_name, True, ('PMPI', 'MPI'))
    df['rank'] = -df['rank']  # in SMPI, we have 'rank-42' and not 'rank42', so there remains a - that we should remove
    return df

def read_smpi_link(archive_name, csv_name):
    df = read_csv(archive_name, csv_name,
                  columns=['type', 'level', 'container', 'start', 'end', 'duration', 'commType', 'src', 'dst', 'msg_size'],
                  filter_func=lambda x: filter_pajeng(x, 'Link'))
    df['src'] = df['src'].str.slice(6).astype(int)  # changing 'rank42' into 42
    df['dst'] = df['dst'].str.slice(6).astype(int)  # changing 'rank42' into 42
#    df['remote'] = (df['src'] // 32) != (df['dst'] // 32)
    return df

def read_archive(archive_name, exp_id, drop_func=['MPI_Comm_rank', 'MPI_Comm_size', 'MPI_Comm_split', 'MPI_Comm_free', 'MPI_Init', 'MPI_Finalize']):
    mpi_trace = read_smpi_state(archive_name, 'trace_mpi_%d.csv' % exp_id)
    print('\tMPI  trace: %6d lines' % len(mpi_trace))
    mpi_trace = mpi_trace[['function', 'start', 'end', 'rank', 'msg_size']]
    mpi_trace['kind'] = 'MPI'
    df = mpi_trace
    df['rank+1'] = df['rank'] + 1
    old_len = len(df)
    df = df[~df['function'].str.strip().isin(drop_func)].copy()
    print('Removed %d events with functions in %s' % (old_len-len(df), ', '.join(drop_func)))
    df['duration'] = df['end'] - df['start']
    df['exp_id'] = exp_id
    return df

smpi_dir = '../smpi_hpl/'
simgrid_files = [smpi_dir + f for f in os.listdir(smpi_dir) if f.startswith('grenoble_2018-12-21')]

def get_optimization(filename):
    history = json.loads(zipfile.ZipFile(filename).read('history.json'))
    make_cmd = [cmd for cmd in history if 'SMPI_OPTIMIZATION' in cmd['command']][0]['command']
    opt = re.findall('SMPI_OPTIMIZATION[^\s-]*', make_cmd)[0]
    if opt == 'SMPI_OPTIMIZATION':
        return 4
    else:
        assert opt[-2] == '='
        return int(opt[-1])

traces = []
for file in simgrid_files:
    i = 0
    trace_simgrid = []
    while True:
        try:
            tmp = read_archive(file, i)
            trace_simgrid.append(tmp)
        except KeyError:
            break
        i += 1
    trace_simgrid = pandas.concat(trace_simgrid)
    trace_simgrid['optimization'] = get_optimization(file)
    traces.append(trace_simgrid)

traces = pandas.concat(traces)

print(traces['optimization'].unique())
print(traces['exp_id'].unique())

traces.head()

	MPI  trace: 1113989 lines
Removed 16 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_free, MPI_Init, MPI_Finalize
	MPI  trace: 1114677 lines
Removed 16 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_free, MPI_Init, MPI_Finalize
	MPI  trace: 1112696 lines
Removed 16 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_free, MPI_Init, MPI_Finalize
	MPI  trace: 1134436 lines
Removed 16 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_free, MPI_Init, MPI_Finalize
	MPI  trace: 1169766 lines
Removed 16 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_free, MPI_Init, MPI_Finalize
	MPI  trace: 1165361 lines
Removed 16 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_free, MPI_Init, MPI_Finalize
	MPI  trace: 954964 lines
Removed 16 events with functions in MPI_Comm_rank, MPI_Comm_size, MPI_Comm_split, MPI_Comm_f

Unnamed: 0,function,start,end,rank,msg_size,kind,rank+1,duration,exp_id,optimization
1,computing,0.0,7e-06,7,-1,MPI,8,7e-06,0,4
2,MPI_Send,7e-06,7e-06,7,4,MPI,8,0.0,0,4
3,computing,7e-06,1e-05,7,-1,MPI,8,3e-06,0,4
4,MPI_Recv,1e-05,0.000111,7,4,MPI,8,0.000101,0,4
5,computing,0.000111,0.000278,7,-1,MPI,8,0.000167,0,4


## HPL result

In [2]:
for file in sorted(simgrid_files):
    res = read_csv(file, 'results.csv')
    print('Optimization level: %d' % get_optimization(file))
    print('%s Gflops' % res['gflops'].max())
    print('%s seconds' % res['time'].max())
    print()

Optimization level: 0
3044.0 Gflops
29.77 seconds

Optimization level: 1
1927.0 Gflops
44.69 seconds

Optimization level: 2
2440.0 Gflops
34.17 seconds

Optimization level: 3
2481.0 Gflops
33.6 seconds

Optimization level: 4
2488.0 Gflops
33.51 seconds



## Checking the parameters

In [3]:
pandas.concat([read_csv(file, 'exp_smpi.csv') for file in simgrid_files]).drop_duplicates()

Unnamed: 0,matrix_size,block_size,proc_p,proc_q,pfact,rfact,bcast,depth,swap,mem_align,process_per_node,thread_per_process,dgemm_coefficient,dgemm_intercept,dtrsm_coefficient,dtrsm_intercept
0,50000,128,2,4,1,2,2,1,0,8,1,32,2.548998e-12,9.9e-05,4.524788e-12,4e-05


In [4]:
print(zipfile.ZipFile(simgrid_files[0]).read('dahu.xml').decode('ascii'))

<?xml version="1.0"?>
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">

<platform version="4.1">
    <config id="General">
        <prop id="smpi/os" value="0:2.9654918472829634e-07:9.636195922971908e-11;8133:0.0:0.0;15831:0.0:0.0;33956:0.0:0.0;63305:0.0:0.0"/>
        <prop id="smpi/or" value="0:1.3754300044375224e-06:8.456019002037343e-11;8133:0.0:0.0;15831:0.0:0.0;33956:0.0:0.0;63305:0.0:0.0"/>
        <prop id="smpi/ois" value="0:6.93122952108322e-07:7.051204536228499e-11;8133:3.601990790153314e-07:1.2449128895712003e-10;15831:2.977016881030904e-06:4.130261264964205e-11;33956:3.1334661540668004e-06:3.293458765282178e-11;63305:6.939993663604218e-07:0.0"/>
        <prop id="smpi/bw-factor" value="0:0.4709912642540426;8133:0.6770561715947873;15831:0.7091971477925826;33956:0.7033449540683548;63305:0.9867951082730284"/>
        <prop id="smpi/lat-factor" value="0:1.1347371681807956;8133:29.84698135078078;15831:30.274775156187765;33956:64.63917831305292;633

## Removing the initialization and finalization from the traces

In [5]:
for opt in sorted(traces['optimization'].unique()):
    for exp_id in sorted(traces['exp_id'].unique()):
        df = traces[(traces['optimization'] == opt) & (traces['exp_id'] == exp_id)]
        print('optim %d, exp %d' % (opt, exp_id), df.start.min(), df.end.max())
    print()

optim 0, exp 0 0.0 46.910866999999996
optim 0, exp 1 0.0 44.407853
optim 0, exp 2 0.0 45.125308000000004

optim 1, exp 0 0.0 60.485409999999995
optim 1, exp 1 0.0 61.741327000000005
optim 1, exp 2 0.0 61.818925

optim 2, exp 0 0.0 34.171738
optim 2, exp 1 0.0 34.152488
optim 2, exp 2 0.0 34.165638

optim 3, exp 0 0.0 33.617737
optim 3, exp 1 0.0 33.631519
optim 3, exp 2 0.0 33.628278

optim 4, exp 0 0.0 33.527409999999996
optim 4, exp 1 0.0 33.538357
optim 4, exp 2 0.0 33.525057000000004



In [6]:
def split(df):
    return [df[(df['rank'] == rank) & (df['exp_id'] == exp_id) & (df['optimization'] == opt)] 
            for rank in df['rank'].unique()
            for exp_id in df['exp_id'].unique()
            for opt in df['optimization'].unique()
           ]

def crop(df, func_name):
    func_calls = df[df['function'] == func_name]
    assert len(func_calls) == 2
    start = func_calls['end'].min()
    end = func_calls['start'].max()
    df = df[(df['start'] > start) & (df['end'] < end)].copy()
    min_time = df['start'].min()
    df['start'] -= min_time
    df['end'] -= min_time
    return df

def crop_all(df, func_name):
    df_split = [crop(tmp_df, func_name) for tmp_df in split(df)]
    return pandas.concat(df_split)

traces = crop_all(traces, 'MPI_Bcast')

In [7]:
for opt in sorted(traces['optimization'].unique()):
    for exp_id in sorted(traces['exp_id'].unique()):
        df = traces[(traces['optimization'] == opt) & (traces['exp_id'] == exp_id)]
        print('optim %d, exp %d' % (opt, exp_id), df.start.min(), df.end.max())
    print()

optim 0, exp 0 0.0 29.772697
optim 0, exp 1 0.0 27.376062000000005
optim 0, exp 2 0.0 27.929152000000002

optim 1, exp 0 0.0 43.242209
optim 1, exp 1 0.0 44.579739
optim 1, exp 2 0.0 44.684934999999996

optim 2, exp 0 0.0 34.164941000000006
optim 2, exp 1 0.0 34.14657699999999
optim 2, exp 2 0.0 34.159951

optim 3, exp 0 0.0 33.586679
optim 3, exp 1 0.0 33.600991
optim 3, exp 2 0.0 33.59711800000001

optim 4, exp 0 0.0 33.495732999999994
optim 4, exp 1 0.0 33.506638
optim 4, exp 2 0.0 33.494074999999995



## Comparing MPI_Recv traces

In [8]:
recv = traces[traces['function'] == 'MPI_Recv'].sort_values(by=['start'])
group = recv.groupby(['optimization', 'exp_id'])[['msg_size', 'duration']].agg(['sum'])
group['count'] = recv.groupby(['optimization', 'exp_id']).count()['start']
group

Unnamed: 0_level_0,Unnamed: 1_level_0,msg_size,duration,count
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,sum,Unnamed: 4_level_1
optimization,exp_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,0,30234276176,2.623757,3517
0,1,30234276176,2.616444,3517
0,2,30234276176,2.622546,3517
1,0,30234276176,2.66261,3517
1,1,30234276176,2.69126,3517
1,2,30234276176,2.684018,3517
2,0,35232868048,3.740902,3318
2,1,35232868048,3.731154,3318
2,2,35232868048,3.708331,3318
3,0,30234276176,2.585149,3517


In [9]:
recv.groupby(['rank', 'optimization', 'exp_id'])[['msg_size']].agg(['count', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,msg_size,msg_size
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,sum
rank,optimization,exp_id,Unnamed: 3_level_2,Unnamed: 4_level_2
0,0,0,391,3775229864
0,0,1,391,3775229864
0,0,2,391,3775229864
0,1,0,391,3775229864
0,1,1,391,3775229864
0,1,2,391,3775229864
0,2,0,293,3775030184
0,2,1,293,3775030184
0,2,2,293,3775030184
0,3,0,391,3775229864


## Comparing MPI_Send traces

In [10]:
send = traces[traces['function'] == 'MPI_Send'].sort_values(by=['start'])
group = send.groupby(['optimization', 'exp_id'])[['msg_size', 'duration']].agg(['sum'])
group['count'] = send.groupby(['optimization', 'exp_id']).count()['start']
group

Unnamed: 0_level_0,Unnamed: 1_level_0,msg_size,duration,count
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,sum,Unnamed: 4_level_1
optimization,exp_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,0,45352872752,59.015004,107399
0,1,45352872752,52.215746,107399
0,2,45352872752,54.122222,107399
1,0,45351355992,40.556704,107399
1,1,45351355992,45.320012,107399
1,2,45351355992,45.676266,107399
2,0,45316132304,9.645956,105258
2,1,45316001232,9.615561,105258
2,2,45316094080,9.60989,105258
3,0,45333071336,10.805152,107399


In [11]:
send.groupby(['rank', 'optimization', 'exp_id'])[['msg_size']].agg(['count', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,msg_size,msg_size
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,sum
rank,optimization,exp_id,Unnamed: 3_level_2,Unnamed: 4_level_2
0,0,0,13517,5684327336
0,0,1,13517,5684327336
0,0,2,13517,5684327336
0,1,0,13517,5683351104
0,1,1,13517,5683351104
0,1,2,13517,5683351104
0,2,0,13322,6300344048
0,2,1,13322,6300311280
0,2,2,13322,6300334848
0,3,0,13517,5676621080


## Comparing MPI_Irecv traces

In [12]:
irecv = traces[traces['function'] == 'MPI_Irecv'].sort_values(by=['start'])
group = irecv.groupby(['optimization', 'exp_id'])[['msg_size', 'duration']].agg(['sum'])
group['count'] = irecv.groupby(['optimization', 'exp_id']).count()['start']
group

Unnamed: 0_level_0,Unnamed: 1_level_0,msg_size,duration,count
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,sum,Unnamed: 4_level_1
optimization,exp_id,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,0,15118596576,0.0,103882
0,1,15118596576,0.0,103882
0,2,15118596576,0.0,103882
1,0,15117079816,0.0,103882
1,1,15117079816,0.0,103882
1,2,15117079816,0.0,103882
2,0,10083264256,0.0,101940
2,1,10083133184,0.0,101940
2,2,10083226032,0.0,101940
3,0,15098795160,0.0,103882


In [13]:
irecv.groupby(['rank', 'optimization', 'exp_id'])[['msg_size']].agg(['count', 'sum'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,msg_size,msg_size
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,sum
rank,optimization,exp_id,Unnamed: 3_level_2,Unnamed: 4_level_2
0,0,0,13028,1880993888
0,0,1,13028,1880993888
0,0,2,13028,1880993888
0,1,0,13028,1881623584
0,1,1,13028,1881623584
0,1,2,13028,1881623584
0,2,0,12834,1259216896
0,2,1,12834,1259216896
0,2,2,12834,1259216896
0,3,0,13028,1883750856
