List the files in the results directory:

In [1]:
!ls ../results/

ior_h5_n16_M1_T128.out  ior_mpi_n4_M4_T8.out    ior_psx_n2_M2_T256.out
ior_h5_n16_M1_T16.out   ior_mpi_n4_M8_T128.out  ior_psx_n2_M2_T32.out
ior_h5_n16_M1_T256.out  ior_mpi_n4_M8_T16.out   ior_psx_n2_M2_T64.out
ior_h5_n16_M1_T32.out   ior_mpi_n4_M8_T256.out  ior_psx_n2_M2_T8.out
ior_h5_n16_M1_T64.out   ior_mpi_n4_M8_T32.out   ior_psx_n2_M4_T128.out
ior_h5_n16_M1_T8.out    ior_mpi_n4_M8_T64.out   ior_psx_n2_M4_T16.out
ior_h5_n16_M2_T128.out  ior_mpi_n4_M8_T8.out    ior_psx_n2_M4_T256.out
ior_h5_n16_M2_T16.out   ior_mpi_n64_M1_T128.out ior_psx_n2_M4_T32.out
ior_h5_n16_M2_T256.out  ior_mpi_n64_M1_T16.out  ior_psx_n2_M4_T64.out
ior_h5_n16_M2_T32.out   ior_mpi_n64_M1_T256.out ior_psx_n2_M4_T8.out
ior_h5_n16_M2_T64.out   ior_mpi_n64_M1_T32.out  ior_psx_n2_M8_T128.out
ior_h5_n16_M2_T8.out    ior_mpi_n64_M1_T64.out  ior_psx_n2_M8_T16.out
ior_h5_n16_M4_T128.out  ior_mpi_n64_M1_T8.out   ior_psx_n2_M8_T256.out
ior_h5_n16_M4_T16.out   ior_mpi_n64_M2_T128.out ior_psx_n2_M8_T32.out
ior

Now, let's pick an IOR output file and see what it looks like:

In [2]:
!cat ../results/ior_mpi_n16_M1_T8.out

IOR-3.2.0: MPI Coordinated Test of Parallel I/O
Began               : Thu Jun  6 17:14:12 2019
Command line        : /glade/work/kpaul/software/ncar-ior/bin/ior -a MPIIO -w -r -C -i10 -g -t 8m -b 8m -c
Machine             : Linux r14i0n9
TestID              : 0
StartTime           : Thu Jun  6 17:14:12 2019
Path                : /gpfs/fs1/scratch/kpaul/ior_scaling
FS                  : 15000.0 TiB   Used FS: 59.6%   Inodes: 2355.6 Mi   Used Inodes: 62.6%

Options: 
api                 : MPIIO
apiVersion          : (3.1)
test filename       : testFile
access              : single-shared-file
type                : collective
segments            : 1
ordering in a file  : sequential
ordering inter file : constant task offset
task offset         : 1
tasks               : 16
clients per node    : 1
repetitions         : 10
xfersize            : 8 MiB
blocksize           : 8 MiB
aggregate filesize  : 128 MiB

Results: 

access    bw(MiB/s)  block(KiB) xfer(KiB)  op

Okay, now let's write a function that parses a single IOR output file:

In [3]:
def read_ior_out(fname):
    ior_out = {}
    with open(fname) as f:
        ior_out['title'] = f.readline().strip()
        line = f.readline().strip()
        ior_out['header'] = {}
        while ':' in line:
            key,value = [x.strip() for x in line.split(':', 1)]
            ior_out['header'][key] = value
            line = f.readline().strip()
        f.readline()
        line = f.readline()
        ior_out['options'] = {}
        while ':' in line:
            key,value = [x.strip() for x in line.split(':', 1)]
            ior_out['options'][key] = value
            line = f.readline().strip()
        f.readline()
        f.readline()
        columns = f.readline().split()
        f.readline()
        line = f.readline().strip()
        ior_out['results'] = []
        while ':' not in line:
            vals = line.split()
            for i,v in enumerate(vals[1:-1]):
                vals[i+1] = None if v == '-' else float(v)
            vals[-1] = int(vals[-1])
            ior_out['results'].append({key:val for key, val in zip(columns, vals)})
            line = f.readline().strip()
        ior_out['max'] = {}
        ior_out['max']['write [MiB/sec]'] = float(line.split()[2])
        ior_out['max']['read [MiB/sec]'] = float(f.readline().split()[2])
        f.readline()
        f.readline()
        ior_out['summary'] = []
        columns = f.readline().split()
        vals = f.readline().split()
        vals[1:10] = [float(v) for v in vals[1:10]]
        vals[10:22] = [int(v) for v in vals[10:22]]
        vals[22] = float(vals[22])
        vals[-1] = int(vals[-1])
        ior_out['summary'].append({key:val for key, val in zip(columns, vals)})
        vals = f.readline().split()
        vals[1:10] = [float(v) for v in vals[1:10]]
        vals[10:22] = [int(v) for v in vals[10:22]]
        vals[22] = float(vals[22])
        vals[-1] = int(vals[-1])
        ior_out['summary'].append({key:val for key, val in zip(columns, vals)})
        key, value = [x.strip() for x in f.readline().split(' : ')]
        ior_out['header'][key] = value
    return ior_out

And test it...

In [4]:
read_ior_out('../results/ior_mpi_n16_M1_T8.out')

{'title': 'IOR-3.2.0: MPI Coordinated Test of Parallel I/O',
 'header': {'Began': 'Thu Jun  6 17:14:12 2019',
  'Command line': '/glade/work/kpaul/software/ncar-ior/bin/ior -a MPIIO -w -r -C -i10 -g -t 8m -b 8m -c',
  'Machine': 'Linux r14i0n9',
  'TestID': '0',
  'StartTime': 'Thu Jun  6 17:14:12 2019',
  'Path': '/gpfs/fs1/scratch/kpaul/ior_scaling',
  'FS': '15000.0 TiB   Used FS: 59.6%   Inodes: 2355.6 Mi   Used Inodes: 62.6%',
  'Finished': 'Thu Jun  6 17:14:12 2019'},
 'options': {'api': 'MPIIO',
  'apiVersion': '(3.1)',
  'test filename': 'testFile',
  'access': 'single-shared-file',
  'type': 'collective',
  'segments': '1',
  'ordering in a file': 'sequential',
  'ordering inter file': 'constant task offset',
  'task offset': '1',
  'tasks': '16',
  'clients per node': '1',
  'repetitions': '10',
  'xfersize': '8 MiB',
  'blocksize': '8 MiB',
  'aggregate filesize': '128 MiB'},
 'results': [{'access': 'write',
   'bw(MiB/s)': 9825.0,
   'block(KiB)': 8192.0,
   'xfer(KiB)': 81

Great!  Now, let's write a function that extracts out the information we want into a DataFrame.  What information do we want?  For each dictionary in the `summary` section, we extract out:

- `Max(MiB)` ==> `max[MiB/sec]`
- `Min(MiB)` ==> `min[MiB/sec]`
- `Mean(MiB)` ==> `mean[MiB/sec]`
- `StdDev` ==> `std[MiB/sec]`
- `#Tasks` ==> `nranks`
- `blksiz` ==> `blocksize[B]`
- `xsize` ==> `xfersize[B]`
- `aggs(MiB)` ==> `totalsize[MiB]`
- `API` ==> `api`

This constitutes a single row in the DataFrame.

In [5]:
import glob
import pandas as pd

def make_dataframe(fnames='../results/ior_*.out'):
    data = {'api': [], 'nranks': [], 'xfersize[MiB]': [], 'blocksize[MiB]': [], 'totalsize[MiB]': [],
            'operation': [], 'min[MiB/sec]': [], 'max[MiB/sec]': [], 'mean[MiB/sec]': [], 'std[MiB/sec]': []}
    fnames = glob.glob(fnames)
    for fname in fnames:
        ior_out = read_ior_out(fname)
        for sumitem in ior_out['summary']:
            data['operation'].append(sumitem['Operation'])
            data['api'].append(sumitem['API'])
            data['nranks'].append(sumitem['#Tasks'])
            data['xfersize[MiB]'].append(sumitem['xsize'] / 1024**2)
            data['blocksize[MiB]'].append(sumitem['blksiz'] / 1024**2)
            data['totalsize[MiB]'].append(sumitem['aggs(MiB)'])
            data['min[MiB/sec]'].append(sumitem['Min(MiB)'])
            data['max[MiB/sec]'].append(sumitem['Max(MiB)'])
            data['mean[MiB/sec]'].append(sumitem['Mean(MiB)'])
            data['std[MiB/sec]'].append(sumitem['StdDev'])
    return pd.DataFrame(data)

In [6]:
df = make_dataframe()

In [7]:
df

Unnamed: 0,api,nranks,xfersize[MiB],blocksize[MiB],totalsize[MiB],operation,min[MiB/sec],max[MiB/sec],mean[MiB/sec],std[MiB/sec]
0,Z5,1,256.0,2048.0,2048.0,write,462.11,568.80,553.34,0.12
1,Z5,1,256.0,2048.0,2048.0,read,90.59,90.88,90.84,0.00
2,HDF5,32,8.0,64.0,2048.0,write,732.16,5806.65,4404.77,192.54
3,HDF5,32,8.0,64.0,2048.0,read,6128.66,14749.42,10915.75,300.26
4,NCMPI,16,128.0,128.0,2048.0,write,2141.02,3824.29,2741.30,3.97
5,NCMPI,16,128.0,128.0,2048.0,read,5265.25,15086.45,7540.23,21.22
6,HDF5,4,8.0,8.0,32.0,write,172.85,378.61,294.42,9.40
7,HDF5,4,8.0,8.0,32.0,read,1090.59,2140.71,1890.39,44.33
8,MPIIO,1,64.0,128.0,128.0,write,3051.63,3160.40,3105.38,0.65
9,MPIIO,1,64.0,128.0,128.0,read,1671.03,3088.90,2677.78,6.24


In [8]:
df.to_csv('../results/ior_out.csv', index=False)