# Benchmark for the emission test tracer

The emission test tracer is transported in ICON-AES. The emission test tracer tendency is updated using Python code and the established bridges given in the field attribute `echam_ttr_config%icon_ml_bridge`. 

- `fortran`: Pure Fortran
- `cffi`
- `pipes`
- `mpi`

The benchmark is done on a one-month atm amip simulation where no output is written beyond the basic necessities.

## Read the times directly from log files

Keep the human out of the loop to avoid errors.

Cycle all the log files, check if they finished successfully, and retrieve the wall time

We go the lazy route and just load the full file, worst case the jupyterhub crashes 

In [None]:
import pandas as pd
import itertools
import os
from datetime import datetime
import numpy as np

In [None]:
refresh_frame = True # clear the dataframe --> False for speed up

if os.path.exists('./benchmark.csv') and not refresh_frame:
    df = pd.read_csv('./benchmark.csv')
else:
    df = pd.DataFrame(columns=['count', 'resolution', 'bridge', 'nodes', 'procs_pernode', 't_start', 't_end', 'logfile', 'is_logfile', 'is_finished_ok'])
    df.t_start = df.t_start.astype(object)
    df = df.replace('nan', None)
df

In [None]:
%%time

exp_dir = '/work/ka1176/caroline/jobs/hereon_iconml/one_month/R02B04'

var_nodes = [1, 2, 4]
var_procs_pernode = [128, 64, 32]
var_counts = [1, 2, 3]
var_bridges = ['fortran', 'cffi', 'pipes', 'mpi']
var_resolutions = ['R02B04']


i=0
for nodes, procs_pernode, count, bridge, resolution in itertools.product(var_nodes, var_procs_pernode, var_counts, var_bridges, var_resolutions):
    
    # check if entry already exists in dataframe
    entry = df[df.eval(f"count=={count} & nodes=={nodes} & procs_pernode=={procs_pernode}")]
    entry = entry.loc[entry['bridge'] == bridge] # stupid string comparison does not work in eval
    entry = entry.loc[entry['resolution'] == resolution]
    
    assert len(entry) <= 1
    
    if len(entry) == 1 and entry['is_finished_ok'].all():
        # print(f'Entry exists, continue')
        continue
    
    logfile = f'LOG_exp.iconml_month_{bridge}_{nodes}-{procs_pernode}_{count}.run'
    is_logfile = os.path.exists(os.path.join(exp_dir, logfile))
    
    # none-initialize the values that are parsed from logfiles
    t_start = None
    t_end = None
    is_finished_ok = False
    
    # check if the run finished OK
    if is_logfile:
        with open(os.path.join(exp_dir, logfile)) as f:
            ll = f.readlines()
            
            is_finished_ok = [ 'Script run successfully:  OK' in lll for lll in ll ]
            is_finished_ok = np.sum(np.asarray(is_finished_ok)) == 2 # occurs twice in log script
            
            # read the start and end time
            if is_finished_ok:
                ix = np.where(np.asarray(ll) == '+ date\n')[0]
                assert len(ix) == 2
                t_start = ll[ix[0]+1].strip()
                t_end   = ll[ix[1]+1].strip()
                
                # find lines matching pattern
                # TODO turn this into regex
                # TODO load the file again and loop
                # TODO calculate the overhead time (wall time - total time)
                # TODO report the total time spent in .... .... submodules especially echam_ttr
                pattern = '0:  name                              # calls    t_min          min rank   t_avg          t_max          max rank   total min (s)   total min rank   total max (s)   total max rank\n'
                ix = np.where(np.asarray(ll) == pattern)[0]
                print(ix)
                
    new_entry = pd.DataFrame(dict(count=count,
                               resolution=resolution,
                               bridge=bridge,
                               nodes=nodes,
                               procs_pernode=procs_pernode,
                               t_start=t_start,
                               t_end=t_end,
                               logfile=logfile,
                               is_logfile=is_logfile,
                               is_finished_ok=is_finished_ok), index=[i])
    
    df = pd.concat([df, new_entry], axis=0)
    
    i += 1
    
print(f'Added {i} new entries to dataframe')

In [None]:
df.loc[df['is_finished_ok']].info()

In [None]:
df.to_csv('./benchmark.csv')

## Process

- Calculate wall time
- Calculate total number of MPI cores

In [None]:
df['MPI_processes'] = df['nodes'] * df['procs_pernode']

In [None]:
def time_stamp_to_seconds(s, tsformat='%a %b %d %H:%M:%S %Z %Y', to_sec=True):
    '''
    Converts a time stamp from an ICON log script to UNIX seconds
    
    Parameters:
    
    s - Time stamp string
    tsformat - Format see https://strftime.org/
    to_sec - Return as Epoch seconds (default: True)
    
    Returns:
    Parsed time stamp in requested format
    '''
    
    if s == 'None': # typecast as str
        return None
        
    x = datetime.strptime(s, tsformat)
    
    if to_sec:
        return int(x.strftime('%s'))
    return x

In [None]:
df_sec = df[['t_start', 't_end']].astype(str).applymap(time_stamp_to_seconds)
df_sec

In [None]:
df['delta_t'] = df_sec['t_end'] - df_sec['t_start']
df['node_hours'] = df['delta_t'] * df['nodes'] / 3600

## Plot

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt

sns.set_style('whitegrid')
sns.set_context('talk')

### For different numbers of mpi_procs

In [None]:
for procs in np.sort(pd.unique(df.MPI_processes)):
    print(procs)
    
    fig, ax = plt.subplots(1, 1, figsize=(8, 5))

    sns.barplot(data=df.loc[df['MPI_processes'] == procs], x='nodes', y='delta_t', hue='bridge', ax=ax)

    ax.set_title(f'One month R02B04 with {procs} MPI processes')
    ax.set_ylabel('Wall time (seconds)')

    fig.tight_layout()
    plt.show()

### Bridge scaling compared

In [None]:
df.groupby(['MPI_processes', 'bridge'])['delta_t'].mean()

In [None]:
xtix = np.sort(pd.unique(df.MPI_processes)).astype(int)
print(xtix)

perfect_scale = [4140] * len(xtix) # perfect scale if 32 is the fixed value
perfect_scale = np.array(perfect_scale) * 32 / xtix

In [None]:
fig = plt.figure(figsize=(8, 5))

sns.lineplot(data=df.loc[df['nodes'] == 1], x='MPI_processes', y='delta_t', hue='bridge', legend='brief', marker='o')
ax=plt.gca()
ax.set(xscale="log", yscale="log")
ax.set_xlabel('MPI processes')
ax.set_ylabel('Wall time (seconds)')

ax.set_xticks(xtix)
ax.set_xticklabels(xtix)

ax.set_yticks([500, 1000, 2000, 5000])
ax.set_yticklabels([500, 1000, 2000, 5000])


ax.plot(xtix, perfect_scale, ':', color='C7')
ax.legend(ncol=2)

plt.show()


In [None]:
fig = plt.figure(figsize=(8, 5))

sns.lineplot(data=df.loc[df['nodes'] == 2], x='MPI_processes', y='delta_t', hue='bridge', legend='brief', marker='o')
ax=plt.gca()
ax.set(xscale="log", yscale="log")
ax.set_xlabel('MPI processes')
ax.set_ylabel('Wall time (seconds)')

ax.set_xticks(xtix)
ax.set_xticklabels(xtix)

ax.set_yticks([500, 1000, 2000, 5000])
ax.set_yticklabels([500, 1000, 2000, 5000])


ax.plot(xtix, perfect_scale, ':', color='C7')
ax.legend(ncol=2)

plt.show()


In [None]:
fig = plt.figure(figsize=(8, 5))

sns.lineplot(data=df.loc[df['nodes'] == 4], x='MPI_processes', y='delta_t', hue='bridge', legend='brief', marker='o')
ax=plt.gca()
ax.set(xscale="log", yscale="log")
ax.set_xlabel('MPI processes')
ax.set_ylabel('Wall time (seconds)')

ax.set_xticks(xtix)
ax.set_xticklabels(xtix)

ax.set_yticks([500, 1000, 2000, 5000])
ax.set_yticklabels([500, 1000, 2000, 5000])


ax.plot(xtix, perfect_scale, ':', color='C7')
ax.legend(ncol=2)

plt.show()


Compare the runtime when bridge is included with the original Fortran runtime

In [None]:
for procs in np.sort(pd.unique(df.MPI_processes)):
    print('-'*40)
    print(' MPI processes: ', procs)
    
    tmp = df.loc[(df['MPI_processes']==procs) & (df['bridge'] != 'mpi')].groupby('bridge')['delta_t'].mean()
    
    if tmp.isnull().all():
        continue

    tmp =  100 * (tmp - tmp.fortran) / tmp.fortran
    
    print('Relative increase (%) compared to FORTRAN runtime\n', tmp)

### Node hours

In [None]:
df['delta_t_hrs'] = df['delta_t'] / 3600

In [None]:
for bridge in var_bridges:
    print('**', bridge)
    min_node_hours = df.loc[df['bridge'] == bridge].sort_values('delta_t').iloc[0]
    print(min_node_hours.loc[['delta_t_hrs', 'node_hours', 'nodes', 'procs_pernode']])

In [None]:

sns.scatterplot(data=df, x='node_hours', y='delta_t_hrs', hue='bridge')
ax=plt.gca()
ax.set_xlabel('Node hours')
ax.set_ylabel('Wall time (hours)')
ax.legend(ncol=2, loc=2)
xx = np.linspace(0.3, 2.0)
ax.plot(xx, xx, ':', color='C7')
ax.plot(xx, xx/2, ':', color='C7')
ax.plot(xx, xx/4, ':', color='C7')
plt.show()

### Variability

In [None]:
df.groupby(['MPI_processes', 'bridge', 'nodes'])['delta_t'].std() / df.groupby(['MPI_processes', 'bridge', 'nodes'])['delta_t'].mean()

In [None]:
df[df.is_logfile & ~df.is_finished_ok]

In [None]:
df[(df['count'] > 0) & (df['is_finished_ok'])]

In [None]:
df[(df['bridge'] == 'cffi') & (df['nodes'] == 1) & (df['procs_pernode'] == 32)]

In [None]:
TODO grep for the time spent in the interface_echam_ttr module