In [None]:
%matplotlib inline

In [None]:
import collections
import datetime

import matplotlib
matplotlib.rcParams['font.size'] = 16
import matplotlib.pyplot
import pandas

import tokio
import tokio.connectors.nersc_isdct

In [None]:
TARGET_DATE = datetime.datetime(2018, 12, 7)

date_start = TARGET_DATE
date_end = TARGET_DATE + datetime.timedelta(days=1)

print("Returning data from %s to %s" % (date_start, date_end))

In [None]:
io_traffic = collections.defaultdict(dict)

## Lustre file systems

In [None]:
for lustre_fs in 'cscratch', 'scratch1', 'scratch2', 'scratch3', 'coribb':
    for rw in 'read', 'write':
        tmp_df = tokio.tools.hdf5.get_dataframe_from_time_range(
            fsname=lustre_fs,
            dataset_name='datatargets/%sbytes' % rw,
            datetime_start=date_start,
            datetime_end=date_end)
        if tmp_df is not None:
            io_traffic[lustre_fs][rw] = tmp_df.sum().sum()
        else:
            io_traffic[lustre_fs][rw] = -1.0

## Cori Burst Buffer - via ISDCT

In [None]:
isdct_file = tokio.tools.common.enumerate_dated_files(
    start=date_start,
    end=date_end,
    template=tokio.config.CONFIG['isdct_files'])

zero_reads = 0
zero_writes = 0
if len(isdct_file) == 2:
    yesterday_isdct = tokio.connectors.nersc_isdct.NerscIsdct(isdct_file[0])
    today_isdct = tokio.connectors.nersc_isdct.NerscIsdct(isdct_file[-1])
    isdct_diff = today_isdct.diff(yesterday_isdct)#, report_zeros=False)
    read_tot = 0.0
    write_tot = 0.0
    for devicedata in isdct_diff['devices'].values():
        if 'data_units_written_bytes' not in devicedata:
            zero_writes += 1
        if 'data_units_read_bytes' not in devicedata:
            zero_reads += 1
        read_tot += devicedata.get('data_units_read_bytes', 0.0)
        write_tot += devicedata.get('data_units_written_bytes', 0.0)

print("%d devices showed zero writes" % zero_writes)
print("%d devices showed zero reads" % zero_reads)

In [None]:
io_traffic['coribb'] = {
    'read': read_tot,
    'write': write_tot,
}

## HPSS

In [None]:
hpss_file = tokio.tools.common.enumerate_dated_files(
    start=date_start,
    end=date_end + datetime.timedelta(seconds=-1),
    template=tokio.config.CONFIG['hpss_report_files'])
assert len(hpss_file) == 1
hpss_dict = tokio.connectors.hpss.HpssDailyReport(hpss_file[0])

In [None]:
total_read = hpss_dict['archive']['io totals by client application']['total']
io_traffic['archive'] = {
    'read': hpss_dict['archive']['io totals by client application']['total']['read_gb'] * 2**30,
    'write': hpss_dict['archive']['io totals by client application']['total']['write_gb'] * 2**30,
}

## Report on all storage systems

In [None]:
summaries_for_df = collections.defaultdict(dict)
for system, iovolumes in io_traffic.items():
    summaries_for_df[system]['read'] = iovolumes['read']
    summaries_for_df[system]['write'] = iovolumes['write']
    print("%12s %s read, %s written" % (
            system,
            tokio.common.humanize_bytes(iovolumes['read'], fmt="%6.1f %3s"),
            tokio.common.humanize_bytes(iovolumes['write'], fmt="%6.1f %3s")))

In [None]:
summary_tibs = pandas.DataFrame.from_dict(summaries_for_df, orient='index') / 2**40

In [None]:
fig, ax = matplotlib.pyplot.subplots(figsize=(8,6))

summary_tibs.T.plot(kind='bar', stacked=True, ax=ax, width=0.9)

ax.yaxis.grid()
ax.set_axisbelow(True)
ax.set_ylabel("Data Moved (TiB)")
ax.set_title("Storage Activity at NERSC on %s" % TARGET_DATE.strftime("%b %d, %Y"))