# darshan-job-summary Reproducer

This notebook demonstrates how to use pytokio to reproduce the plots created by the `darshan-job-summary.pl` script that is included with Darshan.

In [None]:
%matplotlib inline

In [None]:
import os

import pandas
import matplotlib.pyplot
import matplotlib
matplotlib.rcParams['font.size'] = 18

import tokio

In [None]:
darshanlog = tokio.connectors.darshan.Darshan(os.path.join('..', 'tests', 'inputs', 'sample.darshan'))

In [None]:
darshanlog.darshan_parser_base()
darshanlog.darshan_parser_total()
_ = darshanlog.darshan_parser_perf()

## Create the IO time stacked bar chart

In [None]:
cpu_time = darshanlog['header']['nprocs'] * darshanlog['header']['walltime']

In [None]:
dataframe_dict = []
for module in 'posix', 'mpiio', 'stdio':
    record ={
        'module': module,
        'read_time': max(0.0, darshanlog['counters'][module]['_total']['F_READ_TIME']),
        'write_time': max(0.0, darshanlog['counters'][module]['_total']['F_WRITE_TIME']),
        'meta_time': max(0.0, darshanlog['counters'][module]['_total']['F_META_TIME']),
    }
    record['total_time'] = darshanlog['header']['nprocs'] * darshanlog['header']['walltime']
    dataframe_dict.append(record)

In [None]:
df = pandas.DataFrame.from_dict(dataframe_dict).set_index('module')
df['Read'] = df['read_time'] / df['total_time'] * 100.0
df['Write'] = df['write_time'] / df['total_time'] * 100.0
df['Metadata'] = df['meta_time'] / df['total_time'] * 100.0
df['Other (including application compute)'] = 100.0 - df['Read'] - df['Write'] - df['Metadata']
df

In [None]:
fig, ax = matplotlib.pyplot.subplots(figsize=(8,6))

df[['Read', 'Write', 'Metadata', 'Other (including application compute)']].plot.bar(ax=ax, width=0.8, stacked=True)

ax.set_ylabel("Percentage of run time")
ax.set_xlabel("")
xticklabels = []
for xtick in ax.get_xticklabels():
    xticklabels.append(xtick.get_text().upper().replace("MPIIO", "MPI-IO"))
ax.set_xticklabels(xticklabels, rotation=30)
    
# Put a legend below current axis
box = ax.get_position()
ax.set_position([box.x0, box.y0 + box.height * 0.1,
                 box.width, box.height * 0.9])
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.17),
          fancybox=True, shadow=True)
ax.yaxis.grid()
ax.set_axisbelow(True)
ax.set_title("Average I/O cost per process")

## Create the total ops chart

In [None]:
module_ops = {
    'posix': ['reads', 'writes', 'opens', 'stats', 'seeks', 'mmaps', 'fsyncs'],
    'mpiio_coll': ['coll_reads', 'coll_writes', 'coll_opens'],
    'mpiio_indep': ['indep_reads', 'indep_writes', 'indep_opens'],
    'stdio': ['reads', 'writes', 'opens', 'seeks']
}
plot_module_labels = {
    'posix': "POSIX",
    'mpiio_indep': "MPI-IO Indep.",
    'mpiio_coll': "MPI-IO Coll.",
    'stdio': "STDIO"
}
dataframe_dict = []
for module_key, module_descr in plot_module_labels.items():
    if '_' in module_key:
        module, mode = module_key.split('_', 1)
    else:
        module = module_key
        mode = None

    record = {'module': module_descr}
    for counter_key in module_ops[module_key]:
        if mode and counter_key.startswith(mode):
            op = counter_key.split('_', 1)[-1]
        else:
            op = counter_key
        record[op] = darshanlog['counters'][module]['_total'].get(counter_key.upper())
    dataframe_dict.append(record)

In [None]:
plot_order = ['reads', 'writes', 'opens', 'stats', 'seeks', 'mmaps', 'fsyncs']

df = pandas.DataFrame.from_dict(dataframe_dict).set_index('module').T
df.columns.name = None
df.index.name = "op"
df

In [None]:
fig, ax = matplotlib.pyplot.subplots(figsize=(8,6))

df.loc[plot_order].plot.bar(ax=ax, width=0.8)

ax.set_ylabel("Ops (Total, All Processes)")
ax.set_xlabel("")
xticklabels = []
for xtick in ax.get_xticklabels():
    xticklabels.append(xtick.get_text().rstrip('s').title())
ax.set_xticklabels(xticklabels, rotation=30)

# Put a legend below current axis
box = ax.get_position()
ax.set_position([box.x0, box.y0 + box.height * 0.1,
                 box.width, box.height * 0.9])
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.17),
          fancybox=True, shadow=True, ncol=2)
ax.grid()
ax.set_axisbelow(True)
ax.set_title("I/O Operation Counts")

# ax.set_yscale('log')