# tools.hdf5 Interface

This notebook demonstrates the basic syntax for retrieving Lustre server-side data from LMT via the pytokio API.

In [None]:
%matplotlib inline

In [None]:
import matplotlib
matplotlib.rcParams.update({'font.size': 14})
import matplotlib.pyplot as plt
import numpy as np
import datetime
import tokio.tools

## Define input time range

`start_time` and `end_time` define the time range of interest.  Note that LMT stores data every five seconds, so requesting a large time range (e.g., multiple days) can result in very large query times and very slow plotting.

In [None]:
start_time = datetime.datetime(2017, 7, 6, 0, 37, 49)
end_time = datetime.datetime(2017, 7, 6, 0, 54, 45)
file_system = 'cscratch'

## Retrieve LMT data from HDF5

The arguments for `tools.tokio.hdf5.get_dataframe_from_time_range` requires a bit of arcane knowledge.  Specifically:

`file_name` can be:

* `cori_snx11168.h5lmt` for cscratch
* `edison_snx11025.h5lmt` for edison scratch1
* `edison_snx11035.h5lmt` for edison scratch2
* `edison_snx11036.h5lmt` for edison scratch3

`group_name` can be:

* `OSTReadGroup/OSTBulkReadDataSet` for read bytes/sec
* `OSTWriteGroup/OSTBulkWriteDataSet` for write bytes/sec
* `OSSCPUGroup/OSSCPUDataSet` for OST CPU loads (out of 100.0)
* `MDSCPUGroup/MDSCPUDataSet` for MDS CPU loads (out of 100.0)

In [None]:
result_reads = tokio.tools.hdf5.get_dataframe_from_time_range(
            fsname=file_system,
            dataset_name='datatargets/readrates',
            datetime_start=start_time,
            datetime_end=end_time)
result_reads.head()

In [None]:
result_writes = tokio.tools.hdf5.get_dataframe_from_time_range(
            fsname=file_system,
            dataset_name='datatargets/writerates',
            datetime_start=start_time,
            datetime_end=end_time)
result_writes.head()

## Plot Overall OST I/O Rates

In [None]:
plot_df = ((result_writes+result_reads) / 2.0**30.0)
plot_title = "Overall OST I/O Rates"

In [None]:
fig, ax = matplotlib.pyplot.subplots()
fig.set_size_inches(10, 8)
fig.suptitle(plot_title)

### Convert bytes/sec to GiB/sec
plot_df.plot.area(ax=ax)

ax.grid(True)
ax.legend_.remove()
ax.set_ylabel("GiB/sec")
ax.set_xlabel("Time")

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
fig.suptitle(plot_title)
heatmap = ax.matshow(plot_df.transpose(),
                     cmap='hot',
                     aspect=(len(plot_df.transpose().index)/len(plot_df.transpose().columns)))
ax.xaxis.tick_bottom()
ax.invert_yaxis()
cbar = plt.colorbar(heatmap)
cbar.ax.set_ylabel("I/O Rate (GiB/sec)")

### set the OST name labels (y axis)
new_labels = ax.get_yticks().tolist()
for i, label in enumerate(new_labels):
    column_index = int(new_labels[i])
    if column_index < 0 or column_index >= len(plot_df.columns):
        pass
    else:
        new_labels[i] = plot_df.columns[column_index]
ax.set_yticklabels(new_labels)

### set the timestamp labels (x axis)
new_labels = ax.get_xticks().tolist()
for i, label in enumerate(new_labels):
    row_index = int(new_labels[i])
    if row_index < 0 or row_index >= len(plot_df.index):
        pass
    else:
        new_labels[i] = plot_df.index[row_index].to_pydatetime()
ax.set_xticklabels(new_labels)

for label in ax.get_xticklabels():
    label.set_rotation(90)
    
plt.show()

## Plot OST Read Rates

In [None]:
plot_df = (result_reads / 2.0**30.0)
plot_title = "OST Read Rates"

In [None]:
fig, ax = matplotlib.pyplot.subplots()
fig.set_size_inches(10, 8)
fig.suptitle(plot_title)

### Convert bytes/sec to GiB/sec
plot_df.plot.area(ax=ax)

ax.grid(True)
ax.legend_.remove()
ax.set_ylabel("GiB/sec")
ax.set_xlabel("Time")

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
fig.suptitle(plot_title)
heatmap = ax.matshow(plot_df.transpose(),
                     cmap='hot',
                     aspect=(len(plot_df.transpose().index)/len(plot_df.transpose().columns)))
ax.xaxis.tick_bottom()
ax.invert_yaxis()
cbar = plt.colorbar(heatmap)
cbar.ax.set_ylabel("I/O Rate (GiB/sec)")

### set the OST name labels (y axis)
new_labels = ax.get_yticks().tolist()
for i, label in enumerate(new_labels):
    column_index = int(new_labels[i])
    if column_index < 0 or column_index >= len(plot_df.columns):
        pass
    else:
        new_labels[i] = plot_df.columns[column_index]
ax.set_yticklabels(new_labels)

### set the timestamp labels (x axis)
new_labels = ax.get_xticks().tolist()
for i, label in enumerate(new_labels):
    row_index = int(new_labels[i])
    if row_index < 0 or row_index >= len(plot_df.index):
        pass
    else:
        new_labels[i] = plot_df.index[row_index].to_pydatetime()
ax.set_xticklabels(new_labels)

for label in ax.get_xticklabels():
    label.set_rotation(90)
    
plt.show()

## Plot OST Write Rates

In [None]:
plot_df = (result_writes / 2.0**30.0)
plot_title = "OST Write Rates"

In [None]:
fig, ax = matplotlib.pyplot.subplots()
fig.set_size_inches(10, 8)
fig.suptitle(plot_title)

### Convert bytes/sec to GiB/sec
plot_df.plot.area(ax=ax)

ax.grid(True)
ax.legend_.remove()
ax.set_ylabel("GiB/sec")
ax.set_xlabel("Time")

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
fig.suptitle(plot_title)
heatmap = ax.matshow(plot_df.transpose(),
                     cmap='hot',
                     aspect=(len(plot_df.transpose().index)/len(plot_df.transpose().columns)))
ax.xaxis.tick_bottom()
ax.invert_yaxis()
cbar = plt.colorbar(heatmap)
cbar.ax.set_ylabel("I/O Rate (GiB/sec)")

### set the OST name labels (y axis)
new_labels = ax.get_yticks().tolist()
for i, label in enumerate(new_labels):
    column_index = int(new_labels[i])
    if column_index < 0 or column_index >= len(plot_df.columns):
        pass
    else:
        new_labels[i] = plot_df.columns[column_index]
ax.set_yticklabels(new_labels)

### set the timestamp labels (x axis)
new_labels = ax.get_xticks().tolist()
for i, label in enumerate(new_labels):
    row_index = int(new_labels[i])
    if row_index < 0 or row_index >= len(plot_df.index):
        pass
    else:
        new_labels[i] = plot_df.index[row_index].to_pydatetime()
ax.set_xticklabels(new_labels)

for label in ax.get_xticklabels():
    label.set_rotation(90)
    
plt.show()

## Plot CPU Load

In [None]:
plot_df = tokio.tools.hdf5.get_dataframe_from_time_range(
            fsname=file_system,
            dataset_name='dataservers/cpuload',
            datetime_start=start_time,
            datetime_end=end_time)

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
fig.suptitle('OSS CPU Loads')
heatmap = ax.matshow(plot_df.transpose(),
                     cmap='hot',
                     aspect=(len(plot_df.transpose().index)/len(plot_df.transpose().columns)))
ax.xaxis.tick_bottom()
ax.invert_yaxis()
cbar = plt.colorbar(heatmap)
cbar.ax.set_ylabel("CPU Load (%)")

### set the OST name labels (y axis)
new_labels = ax.get_yticks().tolist()
for i, label in enumerate(new_labels):
    column_index = int(new_labels[i])
    if column_index < 0 or column_index >= len(plot_df.columns):
        pass
    else:
        new_labels[i] = plot_df.columns[column_index]
ax.set_yticklabels(new_labels)

### set the timestamp labels (x axis)
new_labels = ax.get_xticks().tolist()
for i, label in enumerate(new_labels):
    row_index = int(new_labels[i])
    if row_index < 0 or row_index >= len(plot_df.index):
        pass
    else:
        new_labels[i] = plot_df.index[row_index].to_pydatetime()
ax.set_xticklabels(new_labels)

for label in ax.get_xticklabels():
    label.set_rotation(90)
    
plt.show()