# SSD Endurance Requirements

This notebook demonstrates how we determine the minimum required drive endurance for the SSDs to be deployed in Perlmutter.  The master equation is

$
        {DWPD}^{new} =
        {SSI}
        \cdot
        {FSWPD}^{ref}
        \cdot
        {WAF}
        \cdot
        \left ( \frac{1}{\chi} \right )
        \left ( \frac{N^{ref}}{N^{new}} \right )
        \left ( \frac{c^{ref}}{c^{new}} \right )
        \left ( \frac{R^{ref}}{R^{new}} \right )
$

where

- ${SSI}$ is the sustained system improvement
- ${FSWPD}^{ref}$ is the reference file system's total write volume expressed in units of file system writes per day
- ${WAF}$ is the write amplification factor that results from factors intrinsic to the application workload
- $\chi$ is the fraction of Lustre capacity available after formatting, typically ranging from 0.95 to 0.97
- $N^{ref}$ and $N^{new}$ are the number of drives in the reference and new systems
- $c^{ref}$ and $c^{new}$ are the per-drive capacities in the reference and new systems
- $R^{ref}$ and $R^{new}$ are the code rates of the reference and new systems

In [None]:
%matplotlib inline

In [None]:
import datetime

import numpy
import matplotlib.pyplot
import pandas

matplotlib.rcParams['font.size'] = 16

In [None]:
def pibs_to_pbs(pibs):
    return pibs * 2**50 / 10**(5*3)

## Define constants

In [None]:
START_TIME = datetime.datetime(2017, 4, 1)
END_TIME = datetime.datetime(2019, 3, 31)

CSCRATCH_KIBS = 29763608416864 # from df -k
CSCRATCH_BYTES = CSCRATCH_KIBS * 1024
CSCRATCH_PIBS = CSCRATCH_BYTES / 2**50

PARAM_SSI_LOW = 3.0
PARAM_SSI_HIGH = 4.0

PARAM_CHI = 0.95

PARAM_N_REF = 248 * 41
PARAM_C_REF = 4 * 10**(4*3) # in bytes
PARAM_R_REF = 8.0 / (8.0 + 2.0)

PARAM_N_NEW = None # unknown/undisclosed
PARAM_C_NEW = None # unknown/undisclosed
PARAM_R_NEW_LOW = 8.0 / (10.0 + 2.0)
PARAM_R_NEW_HIGH = 8.0 / (8.0 + 2.0)

## Calculate FSWPD term

We use daily I/O rates collected from LMT to determine the value for the $FSWPD$ parameter we will use in our endurance calculation here.

In [None]:
cscratch_df = pandas.read_csv('datasets/cscratch_daily_iorates.csv')
cscratch_df['date'] = [datetime.datetime.strptime(x, "%Y-%m-%d") for x in cscratch_df['date']]
cscratch_df = cscratch_df[(cscratch_df['date'] >= START_TIME) & (cscratch_df['date'] <= END_TIME)]
cscratch_df.index = cscratch_df['date']
cscratch_df = cscratch_df.drop(columns=['date'])

In [None]:
cscratch_df['fsrpd'] = cscratch_df['read_bytes'] / CSCRATCH_BYTES
cscratch_df['fswpd'] = cscratch_df['write_bytes'] / CSCRATCH_BYTES

In [None]:
print("Distribution of FSWPD:")
print(cscratch_df['fswpd'].describe())

print()

print("Distribution of daily read+write volumes in TiB:")
print((cscratch_df['read_bytes'] + cscratch_df['write_bytes']).describe() / 2**40)

print()

print("Distribution of daily write volumes in TiB:")
print((cscratch_df['write_bytes']).describe() / 2**40)

### Generate the FSWPD distribution histogram

In [None]:
BINWIDTH = 0.0125
fig, ax = matplotlib.pyplot.subplots(figsize=(8, 3.5))

#cscratch_df['fswpd'].hist(edgecolor='black',
#                          bins=numpy.arange(0, 0.3, BINWIDTH),
#                          width=BINWIDTH * 0.8,
#                          color='C0',
#                          ax=ax,
#                          density=True)

hist, bins = numpy.histogram(cscratch_df['fswpd'],
                             bins=numpy.arange(0, 0.3, BINWIDTH))

ax.bar(bins[:-1],
       hist.astype(numpy.float64) / hist.sum(),
       width=BINWIDTH * 0.8,
       color='C0',
       edgecolor='black',
       
      )

majtick = matplotlib.ticker.MultipleLocator(4*BINWIDTH)
mintick = matplotlib.ticker.MultipleLocator(BINWIDTH)
majtickfmt = matplotlib.ticker.FormatStrFormatter("%.2f")
ax.xaxis.set_major_locator(majtick)
ax.xaxis.set_minor_locator(mintick)
ax.xaxis.set_major_formatter(majtickfmt)
ax.tick_params(which='major', length=7)

ax.set_xlabel("File System Writes per Day")
#ax.set_ylabel("Number of days")
ax.set_ylabel("Fraction of days")
caption = "Cori scratch (%.1f PB)\n%s - %s" % (
    pibs_to_pbs(CSCRATCH_PIBS),
    START_TIME.strftime("%b %-d, %Y"),
    END_TIME.strftime("%b %-d, %Y"))
ax.text(0.98, 0.78, caption, fontsize='medium',
        ha='right', transform=ax.transAxes, backgroundcolor='#FFFFFFFF')

ax.set_axisbelow(True)
ax.xaxis.grid(False)


if False:
    ax.set_ylim(-10, None)
    for bar in ax.patches:
        x = bar.get_x()
        y = bar.get_height()
        if y > 0:
    #       ax.text(x + 0.0115, y + 25, int(y), ha='center')
            ax.text(x + BINWIDTH/2,
                    #y + y * 2.0 + 15 if y < 100 else y + 75,
                    y + 25,
                    int(y),
                    ha='center',
                    rotation=90 if y >= 10 else 0,
                    )

num_annotations = 0
for bar in ax.patches:
    x = bar.get_x()
    y = bar.get_height()
    total = hist.sum()
    ndays = y * total
    if 0 < ndays < 10:
        ax.annotate("%d day%s" % (ndays, "" if ndays == 1 else "s"),
                    xy=(x + BINWIDTH / 2, y + 0.01),
                    xycoords='data',
                    xytext=(0, 45),# + num_annotations * 10),
                    textcoords='offset points',
                    arrowprops={'facecolor': 'black', 'width': 1, "headwidth": 7, "shrink": 0.05},
                    ha='center',
                    va='bottom',
                    rotation=90
                   )
        num_annotations += 1
majtick = matplotlib.ticker.MultipleLocator(0.1)
mintick = matplotlib.ticker.MultipleLocator(0.05)
ax.yaxis.set_major_locator(majtick)
ax.yaxis.set_minor_locator(mintick)
ax.yaxis.grid()
ax.set_ylim(-0.05, None)

In [None]:
output_file = 'cscratch_daily_iorates_%s-%s.pdf' % (START_TIME.strftime("%Y%m%d"), END_TIME.strftime("%Y%m%d"))
fig.savefig(output_file, dpi=200, bbox_inches='tight', transparent=True)
print("Wrote output to", output_file)

### Pick a value for ${FSWPD}^{ref}$

In [None]:
# Report on extreme values
PERCENTILE = 99

print("%dth percentile is %.2f TiB written per day" % (
    PERCENTILE,
    (cscratch_df['write_bytes'] / 2**40).quantile(q=PERCENTILE / 100.0)))

print("%dth percentile is %.6f FSWPD" % (
    PERCENTILE, 
    cscratch_df['fswpd'].quantile(q=PERCENTILE / 100.0)))

In [None]:
PARAM_FSWPD = cscratch_df['fswpd'].mean()

print("=" * 80)
print("Using %.3f as the value for FSWPD^ref" % PARAM_FSWPD)
print("=" * 80)

## Calculate WAF term

Here we calculate the write amplification factors of all SSDs in Cori's burst buffer to estimate the effect of misaligned writes has on SSD drives given Cori's production workload.

This is a very imperfect analysis because

1. Cori's burst buffer (DataWarp) does server-side write-back caching which can even out misaligned but sequential I/Os
2. Cori's burst buffer has no parity, so there is no read-modify-write penalty
3. Cori's burst buffer uses multilevel striping, and the four-SSD RAID0 configuration uses 512 KiB stripes
4. Cori's burst buffer workload is not representative of the full Cori workload since only a small subset of NERSC users opts in to using the burst buffer

But in the absence of both application-level write data to Lustre (provided by LMT) and device-level write data to disks (provided by smartctl or a RAID appliance), we can't calculate the true WAF.  Sadly, the vast majority of Cori's Lustre HDDs were affected by a firmware bug which caused them to stop reporting total bytes written.

In [None]:
INPUT_CSV = 'datasets/isdct_summary_20190401.csv'

ssd_data = pandas.read_csv(INPUT_CSV)

SSD_DATA_DATE = INPUT_CSV.rsplit('_', 1)[-1].split('.', 1)[0]

BINWIDTH = 0.5

In [None]:
ssd_data['waf'] = ssd_data['smart_nand_bytes_written_bytes'] / ssd_data['smart_host_bytes_written_bytes']
ssd_data['lifetime_drive_writes'] = ssd_data['smart_host_bytes_written_bytes'] / (ssd_data['maximum_lba'] * 512) # 512 bytes per LBA 

In [None]:
def plot_waf_hist(ssd_data, ax=None):
    if not ax:
        fig, ax = matplotlib.pyplot.subplots(figsize=(8, 3.5))
    else:
        fig = ax.get_figure()

    hist, bins = numpy.histogram(ssd_data['waf'],
                                 bins=numpy.arange(0, 10 + 2*BINWIDTH, BINWIDTH))

    ax.bar(bins[:-1] + BINWIDTH / 2,
           hist.astype(numpy.float64) / hist.sum(),
           width=BINWIDTH * 1.00,
           color='C1',
           edgecolor='black',

          )

#   majtick = matplotlib.ticker.MultipleLocator(2*BINWIDTH)
#   mintick = matplotlib.ticker.MultipleLocator(BINWIDTH)
#   majtickfmt = matplotlib.ticker.FormatStrFormatter("%d")
#   ax.xaxis.set_major_locator(majtick)
#   ax.xaxis.set_minor_locator(mintick)
#   ax.xaxis.set_major_formatter(majtickfmt)
#   ax.tick_params(which='major', length=7)

    ax.set_xlabel("Write Amplification Factor")
    #ax.set_ylabel("Number of days")
    ax.set_ylabel("Fraction of SSDs")

    ax.set_axisbelow(True)
    ax.xaxis.grid(True)
    
    majtick = matplotlib.ticker.MultipleLocator(0.1)
#   mintick = matplotlib.ticker.MultipleLocator(0.05)
    ax.yaxis.set_major_locator(majtick)
#   ax.yaxis.set_minor_locator(mintick)
    ax.yaxis.grid()
    ax.set_ylim(-0.05, None)

    # annotate outliers
    num_annotations = 0
    for bar in ax.patches:
        x = bar.get_x()
        y = bar.get_height()
        total = hist.sum()
        ndays = y * total
        if 0 < ndays < 20:
            ax.annotate("%d drive%s" % (ndays, "" if ndays == 1 else "s"),
                        xy=(x + BINWIDTH / 2, y + 0.01),
                        xycoords='data',
                        xytext=(0, 45),# + num_annotations * 10),
                        textcoords='offset points',
                        arrowprops={'facecolor': 'black', 'width': 1, "headwidth": 7, "shrink": 0.05},
                        ha='center',
                        va='bottom',
                        rotation=90
                       )
            num_annotations += 1

    return ax

In [None]:
def plot_waf_scatter(ssd_data, ax=None):
    
    if not ax:
        fig, ax = matplotlib.pyplot.subplots(figsize=(8, 3.5))
    else:
        fig = ax.get_figure()

    x = ssd_data['waf']
#   y = ssd_data['smart_host_bytes_written_bytes'] / 10**12
    y = ssd_data['lifetime_drive_writes']

    ax.scatter(x, y, marker='x', color='black', alpha=0.25)

    ax.set_xlabel("WAF")
    ax.set_ylabel("Lifetime\nDrive Writes")
    ax.grid()
    ax.set_axisbelow(True)

    majtick = matplotlib.ticker.MultipleLocator(2*BINWIDTH)
#   mintick = matplotlib.ticker.MultipleLocator(BINWIDTH)
    majtickfmt = matplotlib.ticker.FormatStrFormatter("%d")
    ax.xaxis.set_major_locator(majtick)
#   ax.xaxis.set_minor_locator(mintick)
    ax.xaxis.set_major_formatter(majtickfmt)
    ax.tick_params(which='major', length=7)
    
    return ax

In [None]:
fig, axes = matplotlib.pyplot.subplots(nrows=2, ncols=1, figsize=(8, 6), sharex=True)
fig.subplots_adjust(hspace=0.0, wspace=0.0)

plot_waf_hist(ssd_data, ax=axes[0])
plot_waf_scatter(ssd_data, ax=axes[1])

In [None]:
ax = plot_waf_hist(ssd_data)
output_file = 'coribb_ssd_wafs_hist_%s.pdf' % SSD_DATA_DATE
ax.get_figure().savefig(output_file, dpi=200, bbox_inches='tight', transparent=True)
print("Wrote output to", output_file)

In [None]:
output_file = 'coribb_ssd_wafs_%s.pdf' % SSD_DATA_DATE
fig.savefig(output_file, dpi=200, bbox_inches='tight', transparent=True)
print("Wrote output to", output_file)

### Pick a value for WAF

In [None]:
print("Distribution of years in service:")
print((ssd_data['power_on_hours'] / 24 / 365.25).describe())

In [None]:
print("Distribution of WAFs:")
(ssd_data['waf']).describe()

In [None]:
PERCENTILE_LOW = 50
PERCENTILE_HIGH = 95

print("%2dth percentile: %.2f" % (PERCENTILE_LOW, ssd_data['waf'].quantile(q=PERCENTILE_LOW / 100.0)))
print("%2dth percentile: %.2f " % (PERCENTILE_HIGH, ssd_data['waf'].quantile(q=PERCENTILE_HIGH / 100.0)))

In [None]:
PARAM_WAF_LOW = ssd_data['waf'].quantile(q=PERCENTILE_LOW / 100.0)
PARAM_WAF_HIGH = ssd_data['waf'].quantile(q=PERCENTILE_HIGH / 100.0)

print("=" * 80)
print("Using %.2f as the value for WAF_low" % PARAM_WAF_LOW)
print("Using %.2f as the value for WAF_high" % PARAM_WAF_HIGH)
print("=" * 80)

## Calculate DWPD

We can finally calculate $DWPD$ using

> $
        {DWPD}^{new} =
        {SSI}
        \cdot
        {FSWPD}^{ref}
        \cdot
        {WAF}
        \cdot
        \left ( \frac{1}{\chi} \right )
        \left ( \frac{N^{ref}}{N^{new}} \right )
        \left ( \frac{c^{ref}}{c^{new}} \right )
        \left ( \frac{R^{ref}}{R^{new}} \right )
$

Note that we actually use the form

> $
        {DWPD}^{new} =
        {SSI}
        \cdot
        {FSWPD}^{ref}
        \cdot
        {WAF}
        \cdot
        \left ( \frac{N^{ref} \cdot c^{ref} \cdot R^{ref}}{C^{new}} \right )
$

where $C^{new}$ is defined either as

- $C^{new} = \chi \cdot R \cdot N^{new} \cdot c^{new}$, if we know what $c^{new}$ and $N^{new}$ will be on our new file system, or
- $C^{new} = SSI \cdot \left ( \frac{ \lambda_{purge}}{{PF}} \right ) \cdot \left ( \frac{\partial C^{ref}}{\partial t} \right )$, which is how we determined the minimum required capacity for the new file system

In [None]:
if None in (PARAM_CHI, PARAM_N_NEW, PARAM_C_NEW, PARAM_R_NEW_LOW, PARAM_R_NEW_HIGH):
    # these are the file system capacity requirements from cscratch_daily_growth.ipynb
    # daily_fill_pct.mean() * CSCRATCH_KIBS * 1024 = 133116697694451.58 bytes/day = 133 TB/day 
    print("NOTE: deriving chi * R * N * c from C^new because chi, R, N, or c is undefined!\n")
    PARAM_BIGC_NEW_LOW = PARAM_SSI_LOW * 28 / 0.5 * 133116697694451.58
    PARAM_BIGC_NEW_HIGH = PARAM_SSI_HIGH * 28 / 0.5 * 133116697694451.58
else:
    PARAM_BIGC_NEW_LOW = PARAM_CHI * PARAM_R_NEW_LOW * PARAM_N_NEW * PARAM_C_NEW
    PARAM_BIGC_NEW_HIGH = PARAM_CHI * PARAM_R_NEW_HIGH * PARAM_N_NEW * PARAM_C_NEW

print("Using chi * (R * N * c)^new = %.0f PB (low)" % (PARAM_BIGC_NEW_LOW / 10**(5*3)))
print("                            = %.0f PB (high)" % (PARAM_BIGC_NEW_HIGH / 10**(5*3)))

In [None]:
DWPD_NEW_LOW = PARAM_SSI_LOW * PARAM_FSWPD * PARAM_WAF_LOW * PARAM_R_REF * PARAM_N_REF * PARAM_C_REF / PARAM_BIGC_NEW_LOW
DWPD_NEW_HIGH = PARAM_SSI_HIGH * PARAM_FSWPD * PARAM_WAF_HIGH * PARAM_R_REF * PARAM_N_REF * PARAM_C_REF / PARAM_BIGC_NEW_HIGH

print("=" * 80)
print("Required DWPD: %.2f (low)" % DWPD_NEW_LOW)
print("               %.2f (high)" % DWPD_NEW_HIGH)
print("=" * 80)