Skip to content
This repository has been archived by the owner on Nov 5, 2022. It is now read-only.

Commit

Permalink
Merge 60716a6 into 7593223
Browse files Browse the repository at this point in the history
  • Loading branch information
glennklockwood committed Oct 8, 2019
2 parents 7593223 + 60716a6 commit eac6f90
Show file tree
Hide file tree
Showing 16 changed files with 519 additions and 85 deletions.
51 changes: 16 additions & 35 deletions examples/tokio.tools.hdf5-heatmap.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@
"metadata": {},
"outputs": [],
"source": [
"print \"Valid file systems are:\"\n",
"for file_system in tokio.config.HDF5_FILES.keys():\n",
" print \" \", file_system"
"print(\"Valid file systems are: \" + \", \".join(tokio.config.CONFIG['hdf5_files'].keys()))"
]
},
{
Expand All @@ -61,8 +59,8 @@
"end_time = datetime.datetime(2017, 10, 17, 21, 56, 14)\n",
"file_system = 'cscratch'\n",
"\n",
"print \"Start time: %s\" % start_time\n",
"print \"End time: %s\" % end_time"
"print(\"Start time: %s\" % start_time)\n",
"print(\"End time: %s\" % end_time)"
]
},
{
Expand All @@ -74,30 +72,6 @@
"You shouldn't have to modify any code below here."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tokio.DEBUG = True\n",
"tokio.tools.hdf5.enumerate_h5lmts(fsname=file_system,\n",
" datetime_start=start_time,\n",
" datetime_end=end_time)\n",
"\n",
"inputf = '/global/project/projectdirs/pma/www/daily/2017-10-17/cori_snx11168.h5lmt'\n",
"inputf = '/global/u2/g/glock/src/git/pytokio-dev/tests/inputs/snx11025_2018-01-28.h5lmt'\n",
"\n",
"with tokio.connectors.hdf5.Hdf5(inputf) as f:\n",
" f.to_dataframe('/datatargets/readbytes')\n",
"\n",
"#tokio.tools.hdf5.get_dataframe_from_time_range(\n",
"# file_name=h5lmt_file,\n",
"# dataset_name='/datatargets/readbytes',\n",
"# datetime_start=start_time,\n",
"# datetime_end=end_time)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -227,7 +201,7 @@
" zlabel=\"GiB/sec\",\n",
" ylabel=\"GiB/sec\",\n",
" title=plot_dataset[1])\n",
" print \"Total %.f GiB %s\" % (plot_dataset[0].sum().sum() * 5.0, plot_dataset[1].split()[0])"
" print(\"Total %.f GiB %s\" % (plot_dataset[0].sum().sum() * 5.0, plot_dataset[1].split()[0]))"
]
},
{
Expand All @@ -253,25 +227,32 @@
" ylabel=\"% Load\",\n",
" title=plot_dataset[1])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"display_name": "Python 3",
"language": "python",
"name": "python2"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.14"
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
Expand Down
40 changes: 32 additions & 8 deletions share/nersc/mmperfmon2hdf5.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,24 @@ MMPERFMON_DIR_BASES[project2]="/global/project/iotest/kkr/gpfs-for-glenn/output"

# you shouldn't have to modify anything below here

# Parse arguments
usage() { echo "$0 usage:" && grep " .)\ #" $0; exit 0; }

FORCE=0

while getopts "fh" arg; do
case $arg in
f) # do not abort on invalid dates
FORCE=1
;;
h | *) # display help
usage
exit 0
;;
esac
done
shift $((OPTIND-1))

FSNAME=$1
DATE=$2

Expand Down Expand Up @@ -55,24 +73,30 @@ while true; do
tomorrow=$(date -d "$today + 1 day" "+%Y-%m-%d")
echo "[$(date)] Archiving ${FSNAME} for ${today}"

output_file="${FSNAME}_${today}.hdf5"
if [ ! -d "$today" ]; then
mkdir -vp "$today"
fi

output_file="$today/${FSNAME}.hdf5"

# Because each *.out file starts on the first, not zeroth, minute of the hour,
# we need to go back an hour to get the first data point for each day. This is
# why the following globs refer to the 23rd hour of yesterday
tstart=$(date +%s)
${ARCHIVE_MMPERFMON} --init-start "${today}T00:00:00" \
--init-end "${tomorrow}T00:00:00" \
--timestep ${TIMESTEP} \
${MMPERFMON_DIR_BASE}/*/ngfsv*.nersc.gov.${today}-*.out* \
${MMPERFMON_DIR_BASE}/*/ngfsv*.nersc.gov.${yesterday}-23:*.out* \
--output "$output_file"
--filesystem "$FSNAME" \
--output "$output_file" \
"${today}T00:00:00" \
"${tomorrow}T00:00:00"
# "${today}T23:59:59"

ret=$?
tend=$(date +%s)

if [ ! -f "$output_file" ]; then
echo "[$(date)] ERROR: did not create $output_file" >&2
exit $ret
if [ ! $FORCE ]; then
exit $ret
fi
else
echo "[$(date)] Wrote output to $output_file in $((tend - tstart)) seconds"
fi
Expand Down
Empty file.
1 change: 1 addition & 0 deletions tests/inputs/mmperfmon.2019-05-16-mini.tgz.README
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is just an empty file to test tools.nersc_mmperfmon correctness. It is never loaded.
20 changes: 20 additions & 0 deletions tests/inputs/site.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,26 @@
"inputs/%Y-%m-%d/ost-map.txt",
"inputs/%Y-%m-%d/ost-map_edison.txt"
],
"mmperfmon_output_files": {
"project2": [
"/global/project/iotest/kkr/gpfs-for-glenn/output/*/ngfsv*.nersc.gov.%Y-%m-%d-%H:00:00.out",
"/global/project/iotest/kkr/gpfs-for-glenn/output/*/ngfsv*.nersc.gov.%Y-%m-%d-%H:00:00.out.gz"
],
"projecta": [
"/global/projecta/iotest/kkr/gpfs-for-glenn/output/*/ngfsv*.nersc.gov.%Y-%m-%d-%H:00:00.out",
"/global/projecta/iotest/kkr/gpfs-for-glenn/output/*/ngfsv*.nersc.gov.%Y-%m-%d-%H:00:00.out.gz"
],
"projectb": [
"/global/projectb/iotest/kkr/gpfs-for-glenn/output/*/ngfsv*.nersc.gov.%Y-%m-%d-%H:00:00.out",
"/global/projectb/iotest/kkr/gpfs-for-glenn/output/*/ngfsv*.nersc.gov.%Y-%m-%d-%H:00:00.out.gz"
],
"testfs-mini": [
"inputs/mmperfmon.%Y-%m-%d-mini.tgz"
],
"testfs-micro": [
"inputs/mmperfmon.%Y-%m-%d-micro.tgz"
]
},
"darshan_log_dirs": {
"testsystem": "inputs/darshanlogs"
},
Expand Down
69 changes: 66 additions & 3 deletions tests/test_cli_archive_mmperfmon.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,26 @@
import datetime
import h5py
import tokio
import tokio.connectors.hdf5
import tokio.cli.archive_mmperfmon
import tokiotest

def generate_tts(output_file,
input_file=tokiotest.SAMPLE_MMPERFMON_MULTI,
init_start=tokiotest.SAMPLE_MMPERFMON_MULTI_START,
init_end=tokiotest.SAMPLE_MMPERFMON_MULTI_END):
init_end=tokiotest.SAMPLE_MMPERFMON_MULTI_END,
query_start=tokiotest.SAMPLE_MMPERFMON_MINI_START,
query_end=tokiotest.SAMPLE_MMPERFMON_MINI_END):
"""Create a TokioTimeSeries output file
"""
argv = [
'--init-start', init_start,
'--init-end', init_end,
'--timestep', str(tokiotest.SAMPLE_MMPERFMON_TIMESTEP),
'--output', output_file,
input_file
'--filesystem', 'testfs-mini',
query_start,
query_end
]
print("Running [%s]" % ' '.join(argv))
tokio.cli.archive_mmperfmon.main(argv)
Expand All @@ -37,7 +42,9 @@ def update_tts(output_file):

argv = [
'--output', output_file,
tokiotest.SAMPLE_MMPERFMON_MULTI_SUBSET,
'--filesystem', 'testfs-micro',
tokiotest.SAMPLE_MMPERFMON_MICRO_START,
tokiotest.SAMPLE_MMPERFMON_MICRO_END
]

print("Running [%s]" % ' '.join(argv))
Expand Down Expand Up @@ -111,3 +118,59 @@ def test_bin_archive_mmperfmon_overlaps():
h5_file.close()

tokiotest.identical_datasets(summary0, summary1)

@nose.tools.with_setup(tokiotest.create_tempfile, tokiotest.delete_tempfile)
def test_bin_archive_mmperfmon_edges():
"""cli.archive_mmperfmon: test boundary correctness
"""
tokiotest.TEMP_FILE.close()

# initialize a new TimeSeries, populate it, and write it out as HDF5
generate_tts(
tokiotest.TEMP_FILE.name,
query_start=tokiotest.SAMPLE_MMPERFMON_MINI_START,
query_end=tokiotest.SAMPLE_MMPERFMON_MINI_END)

query_start_dt = datetime.datetime.strptime(tokiotest.SAMPLE_MMPERFMON_MINI_START, "%Y-%m-%dT%H:%M:%S")
query_end_dt = datetime.datetime.strptime(tokiotest.SAMPLE_MMPERFMON_MINI_END, "%Y-%m-%dT%H:%M:%S")

h5_file = tokio.connectors.hdf5.Hdf5(tokiotest.TEMP_FILE.name, 'r')
df = h5_file.to_dataframe('datatargets/readbytes')
h5_file.close()

# confirm that query_start is included in resulting dataset
print(df.loc[query_start_dt].sum())
assert not df.loc[query_start_dt].isna().all()

# confirm that query_end is excluded from resulting dataset
print(df.loc[query_end_dt].sum())
assert df.loc[query_end_dt].isna().all()

@nose.tools.with_setup(tokiotest.create_tempfile, tokiotest.delete_tempfile)
def test_bin_archive_mmperfmon_endofday():
"""cli.archive_mmperfmon: test boundary correctness, end of day
Exactly fills an HDF5 file. It should have exactly zero unfilled rows, and
in combination with test_bin_archive_mmperfmon_edges(), confirms that the
time ranges aren't pulling in or leaving out the first or last row.
"""
tokiotest.TEMP_FILE.close()

query_start = tokiotest.SAMPLE_MMPERFMON_MINI_START
query_end = tokiotest.SAMPLE_MMPERFMON_MINI_END
query_start_dt = datetime.datetime.strptime(query_start, "%Y-%m-%dT%H:%M:%S")
query_end_dt = datetime.datetime.strptime(query_end, "%Y-%m-%dT%H:%M:%S")

generate_tts(
tokiotest.TEMP_FILE.name,
init_start=query_start,
init_end=query_end,
query_start=query_start,
query_end=query_end)

h5_file = tokio.connectors.hdf5.Hdf5(tokiotest.TEMP_FILE.name, 'r')
df = h5_file.to_dataframe('datatargets/readbytes')
h5_file.close()

print(df.isna().all(axis=1).isna().any())
assert not df.isna().all(axis=1).isna().any()

0 comments on commit eac6f90

Please sign in to comment.