# Particle Data Exploration
This notebook serves to explore a particle dataset traced from an NTLP simulation.

Goals include:
- Understanding ranges of data and their distributions
- Identifying invalid data and outliers, including BE failures and cold particles
- Visualizing particle events as reported via NTLP's `history.nc` NetCDF file
- Visualizing particle behavior and its environment throughout their lifetimes
- Resampling particles onto a common timeline so their characteristics/environment are comparable

Provides configuration for the following datasets:
- Pi chamber - 1-way coupling, relative humidity set at 103%
- Pi chamber - 2-way coupling, normal relative humidity
- cfog
- Fatima
- Spray

Unless otherwise noted, the datasets were traced with 2-way coupling.  Additional datasets are easily added.

In [None]:
%matplotlib notebook

import glob
import multiprocessing
import os
import warnings

import matplotlib.pyplot as plt
import netCDF4 as nc
import numpy as np
import pandas as pd

# To compute the mode of out parameters.
from scipy import stats

import droplet_approximation

In [None]:
# Path to the top-level simulations/ data directory.
#
# NOTE: This is set to an invalid value as there is no way to set a sensible default.
#       The next cell halts execution if it's not set.
#
simulations_data_root = None

# Name of the simulation we're investigating.
#
# NOTE: This must match one of the names in the next cell!
#
simulation_name = "Pi Chamber - 1-way Coupling, RH~103%"
#simulation_name = "Pi Chamber - 2-way Coupling"
#simulation_name = "cfog"
#simulation_name = "Fatima"
#simulation_name = "Spray"

# Number of processes to use for parallel operations.  Zero means use one process
# per core on the system.
number_processes = 0

# Use a large fanout degree for our raw particle files directory hierarchy.
#
# NOTE: This must match how the particle directories were constructed.  Do *not* change
#       this unless you've updated the common datasets as well.
#
dirs_per_level   = 256

In [None]:
# Ensure we know where our data resides, otherwise stop execution.
if simulations_data_root is None:
    raise ValueError( "Must set simulations_data_root to run this notebook!" )

# Do we need to default the number of processes to use?
if number_processes == 0:
    number_processes = os.cpu_count()
    
# Map the simulation name to its directory beneath the simulations data root.
if simulation_name == "Pi Chamber - 1-way Coupling, RH~103%":
    simulation_directory_name = "pi_chamber-1way_rh103"
elif simulation_name == "Pi Chamber - 2-way Coupling":
    simulation_directory_name = "pi_chamber-2way"
elif simulation_name == "cfog":
    simulation_directory_name = "cfog"
elif simulation_name == "Fatima":
    simulation_directory_name = "fatima"
elif simulation_name == "Spray":
    simulation_directory_name = "spray"
else:
    raise ValueError( "Unknown simulation_name!" )

# Top-level directory of this simulation.
simulation_root = "{:s}/{:s}/".format( simulations_data_root, simulation_directory_name )

# Path to the top of the raw particle files directory hierarchy and its index.
particles_root       = "{:s}/particles".format( simulation_root )
particles_index_path = "{:s}/particles.index".format( particles_root )

# Path to the NTLP particle trace root.
ntlp_trace_root = simulation_root

# Path to the NTLP history NetCDF file.
ntlp_history_path = "{:s}/history.nc".format( simulation_root )

# List of NTLP particle trace paths.
ntlp_trace_paths = glob.glob( os.path.join( ntlp_trace_root, "be_dump_*.data" ) )

print( "Investigating '{:s}'.  Key directories are:\n"
       "\n"
       "    Top-level:         {:s}\n"
       "    Particles:         {:s}\n"
       "    Particles index:   {:s}\n"
       "    NTLP traces:       {:s}\n"
       "    NTLP history.nc:   {:s}\n"
       "\n"
       "{:d} NTLP trace{:s} found.".format(
           simulation_name,
           simulation_directory_name,
           particles_root,
           particles_index_path,
           ntlp_trace_root,
           ntlp_history_path,
           len( ntlp_trace_paths ),
           "" if len( ntlp_trace_paths ) == 1 else "s" ) )

In [None]:
def extract_floats_from_binary_file( file_path ):
    """
    Reads 36-byte binary records from a file into a 2D NumPy array.
    Suitable for debugging.
    
    Takes 1 argument:
    
      file_path - Path to the binary file to read.
      
    Returns 1 value:
    
      floats_array - NumPy array, shaped N x 9 where N is the number of records, containing
                     the data in file_path interpreted as 32-bit floating point values.

    """
    
    # Read all data as 32-bit floats
    data = np.fromfile( file_path, dtype=np.float32 )

    # Reshape to records of 9 elements (2 integers + 7 floats).
    records = data.reshape( -1, 9 )

    # Extract only the last 7 columns (floats).
    floats_array = records[:, 2:]

    return floats_array

def flag_invalid_data( df_row ):
    """
    Flags invalid data in a particles DataFrame.  XXX: be more descriptive
    
    This function is intended to be applied to a particles DataFrame.
    
    Takes 1 argument:
    
      df_row - Particles DataFrame with one row to examine for invalid values.
      
    Returns 1 value:
    
      new_columns - Pandas Series containing the newly created columns representing
                    invalid data in df_row.  The Series does not name the columns
                    but contain the following:
                    

                      1. Invalid input BE radii
                      2. Invalid input BE particle temperatures
                      3. Invalid salt masses
                      4. Invalid air temperatures
                      5. Invalid relative humidities
                      6. Invalid air densities
                      7. Invalid integration times

    """
    
    invalid_input_radius_flag      = (np.any( df_row["input be radii"] <= 0.0 ) or
                                      np.any( df_row["input be radii"] >= 1e-2 ))
    invalid_input_temperature_flag = (np.any( df_row["input be temperatures"] <= 270.0 ) or
                                      np.any( df_row["input be temperatures"] >= 320.0 ))
    invalid_salt_flag              = (np.any( np.log10( df_row["salt masses"] ) < -22 ) or
                                      np.any( np.log10( df_row["salt masses"] ) > -10))
    invalid_air_temperature_flag   = (np.any( df_row["air temperatures"] <= 270.0 ) or
                                      np.any( df_row["air temperatures"] >= 320.0 ))
    invalid_rh_flag                = (np.any( df_row["relative humidities"] < 0.85 ) or
                                      np.any( df_row["relative humidities"] > 1.15 ))
    invalid_air_density_flag       = (np.any( df_row["air densities"] < 0.8 ) or
                                      np.any( df_row["air densities"] > 1.2 ))
    invalid_dt_flag                = np.any( df_row["integration times"] <= 0.0 )
    
    new_columns = pd.Series( [invalid_input_radius_flag,
                              invalid_input_temperature_flag,
                              invalid_salt_flag,                            
                              invalid_air_temperature_flag,
                              invalid_rh_flag,
                              invalid_air_density_flag,
                              invalid_dt_flag],
                             index=flag_column_names )
    
    return new_columns

# Reading Particle Observations

In [None]:
unique_particle_ids = np.fromfile( particles_index_path, dtype=np.int32 )

parallel_read_flag = True

if parallel_read_flag:
    particles_df = droplet_approximation.batch_read_particles_data( particles_root,
                                                                    unique_particle_ids,
                                                                    dirs_per_level )
else:
    particles_df = droplet_approximation.read_particles_data( particles_root,
                                                              unique_particle_ids,
                                                              dirs_per_level )

print( "{:d} particles in the DataFrame.".format( 
    len( particles_df ) ) )
print( "{:d} total observations.".format( 
    particles_df["number observations"].sum() ))
print( "Last observation at {:.2f} seconds.".format(
    particles_df["death time"].max() ))

## Identify the Data Ranges
Show the ranges for each of the droplet parameters based on all of the particles' observations.

In [None]:
radius_min            = np.log10( particles_df["input be radii"].apply( np.min ).min() )
radius_max            = np.log10( particles_df["input be radii"].apply( np.max ).max() )
temperature_min       = particles_df["input be temperatures"].apply( np.min ).min()
temperature_max       = particles_df["input be temperatures"].apply( np.max ).max()
salt_mass_min         = np.log10( particles_df["salt masses"].apply( np.min ).min() )
salt_mass_max         = np.log10( particles_df["salt masses"].apply( np.max ).max() )
air_temperature_min   = particles_df["air temperatures"].apply( np.min ).min()
air_temperature_max   = particles_df["air temperatures"].apply( np.max ).max()
relative_humidity_min = particles_df["relative humidities"].apply( np.min ).min() * 100
relative_humidity_max = particles_df["relative humidities"].apply( np.max ).max() * 100
air_density_min       = particles_df["air densities"].apply( np.min ).min()
air_density_max       = particles_df["air densities"].apply( np.max ).max()
dt_min                = np.log10( particles_df["integration times"].apply( np.min ).min() )
dt_max                = np.log10( particles_df["integration times"].apply( np.max ).max() )

# Display the ranges in a human-readable form.
print( "Data ranges:\n" 
       "\n"
       "  log10(Radius)       [{:.2f}, {:.2f}] m\n"
       "  Temperature:        [{:.2f}, {:.2f}] K\n"
       "  log10(Salt mass):   [{:.2f}, {:.2f}] kg/m^3\n"
       "  Air temperature:    [{:.2f}, {:.2f}] K\n"
       "  Relative humidity:  [{:.2f}, {:.2f}] %\n"
       "  Air density:        [{:.2f}, {:.2f}] kg/m^3\n"
       "  log10(dt):          [{:.2f}, {:.2f}] s".format(
           radius_min, radius_max,
           temperature_min, temperature_max,
           salt_mass_min, salt_mass_max,
           air_temperature_min, air_temperature_max,
           relative_humidity_min, relative_humidity_max,
           air_density_min, air_density_max,
           dt_min, dt_max ) )

# Now display them in a way that can be pasted into code.
print()
print( "parameter_ranges = {{\n"
       "    \"radius\":             ({:f}, {:f}),\n"
       "    \"temperature\":        ({:f}, {:f}),\n"
       "    \"salt_mass\":          ({:f}, {:f}),\n"
       "    \"air_temperature\":    ({:f}, {:f}),\n"
       "    \"relative_humidity\":  ({:f}, {:f}),\n"
       "    \"rhoa\":               ({:f}, {:f}),\n"
       "    \"time\":               ({:f}, {:f})\n"
       "}}".format(
           radius_min, radius_max,
           temperature_min, temperature_max,
           salt_mass_min, salt_mass_max,
           air_temperature_min, air_temperature_max,
           relative_humidity_min, relative_humidity_max,
           air_density_min, air_density_max,
           dt_min, dt_max ) )

# Finally, display them in a format that can be pasted into the data generation script.
print()
print( "For use with generate_training_data.py:\n" )
print( "{:f}:{:f},{:f}:{:f},{:f}:{:f},{:f}:{:f},{:f}:{:f},{:f}:{:f},{:f}:{:f}".format(
           radius_min, radius_max,
           temperature_min, temperature_max,
           salt_mass_min, salt_mass_max,
           air_temperature_min, air_temperature_max,
           relative_humidity_min, relative_humidity_max,
           air_density_min, air_density_max,
           dt_min, dt_max ) )

In [None]:
# Visualize the number of observations per particle so we can get an idea of
# how long they live.
fig_h, ax_h = plt.subplots( 1, 1, figsize=(10, 6) )

mean_number_observations   = particles_df["number observations"].mean()
median_number_observations = np.median( particles_df["number observations"] )
max_particle_id            = particles_df.index.max()

ax_h.plot( particles_df["number observations"], ".", label="Observations" )
ax_h.plot( [0, max_particle_id],
           [mean_number_observations, mean_number_observations],
           "-.",
           label="Mean" )
ax_h.plot( [0, max_particle_id],
           [median_number_observations, median_number_observations],
           "-",
           label="Median" )

ax_h.set_title( "Observations per Particle\n"
                "{:d} Particles".format(
                len( particles_df ) ) )
ax_h.set_xlabel( "Particle Identifier" )
ax_h.set_ylabel( "Number Observations" )
ax_h.legend( loc="upper right" )

fig_h.tight_layout()

print( "Number of observations' statistics:\n"
       "\n"
       "  [min, max]:  [{:d}, {:d}]\n"
       "  Mean:        {:d}\n"
       "  Median:      {:d}\n".format(
           particles_df["number observations"].min(),
           particles_df["number observations"].max(),
           int( particles_df["number observations"].mean() ),
           int( particles_df["number observations"].median() ) ) )

In [None]:
# Report statistics on particle life times in simulation time.

particles_duration = particles_df.apply( lambda x: x["death time"] - x["birth time"], axis=1 )

simulation_end = particles_df["death time"].max()

number_surviving_particles = (particles_df["death time"] == simulation_end).sum()

print( "Particle life times span [{:.2f}s, {:.2f}s] with a mean of {:.2f}s and median of {:.2f}s".format(
    particles_duration.min(),
    particles_duration.max(),
    particles_duration.mean(),
    particles_duration.median() ) )

print( "{:d} particle{:s} ({:.2f}%) survived to the end of the simulation ({:.2f}s).".format(
    number_surviving_particles,
    "" if number_surviving_particles == 1 else "s",
    number_surviving_particles / len( particles_df ) * 100,
    simulation_end ) )

In [None]:
flag_column_names = ["invalid input be radius",
                     "invalid input be temperature",
                     "invalid salt mass",
                     "invalid air temperature",
                     "invalid relative humidity",
                     "invalid air density",
                     "invalid dt"]

# Flag particles based on the types of invalid observations they contain.
particles_df[flag_column_names] = particles_df.apply( flag_invalid_data, axis=1 )

number_particles = len( particles_df )

number_invalid_radius          = particles_df["invalid input be radius"].sum()
number_invalid_temperature     = particles_df["invalid input be temperature"].sum()
number_invalid_salt_mass       = particles_df["invalid salt mass"].sum()
number_invalid_air_temperature = particles_df["invalid air temperature"].sum()
number_invalid_rh              = particles_df["invalid relative humidity"].sum()
number_invalid_air_density     = particles_df["invalid air density"].sum()
number_invalid_dt              = particles_df["invalid dt"].sum()

# Report what we found.
print( "Invalid counts for {:d} particles:\n"
       "\n"
       "    input radius:           {:d} ({:.2f}%)\n"
       "    input temperature:      {:d} ({:.2f}%)\n"
       "    input salt mass:        {:d} ({:.2f}%)\n"
       "    input air temperature:  {:d} ({:.2f}%)\n"
       "    input rh:               {:d} ({:.2f}%)\n"
       "    input air density:      {:d} ({:.2f}%)\n"
       "    dt:                     {:d} ({:.2f}%)\n"
       .format(
           number_particles,
           number_invalid_radius,          number_invalid_radius / number_particles * 100.0,
           number_invalid_temperature,     number_invalid_temperature / number_particles * 100.0,
           number_invalid_salt_mass,       number_invalid_salt_mass / number_particles * 100.0,
           number_invalid_air_temperature, number_invalid_air_temperature / number_particles * 100.0,
           number_invalid_rh,              number_invalid_rh / number_particles * 100.0,
           number_invalid_air_density,     number_invalid_air_density / number_particles * 100.0,
           number_invalid_dt,              number_invalid_dt / number_particles * 100.0
       ))

In [None]:
print( particles_df[particles_df["invalid input be radius"]]["input be radii"].apply( np.min ) )
print( particles_df[particles_df["invalid input be temperature"]]["input be temperatures"].apply( np.min ) )
print( particles_df[particles_df["invalid salt mass"]]["salt masses"].apply( np.min ) )
print( particles_df[particles_df["invalid air temperature"]]["air temperatures"].apply( np.min ) )
print( particles_df[particles_df["invalid relative humidity"]]["relative humidities"].apply( np.min ) )
print( particles_df[particles_df["invalid air density"]]["air densities"].apply( np.min ) )
print( particles_df[particles_df["invalid dt"]]["integration times"].apply( np.min ) )

In [None]:
# Plot backward Euler errors as a function of simulation time so we can see structure
# in the failures.  Particles with multiple failures will stand out, as will systemic
# failures shortly after particle creation.

def get_failure_times_by_id( row ):
    # Only look at observations where BE failed.
    be_mask          = (row["be statuses"] > 0)
    
    # Compute the particle's entire timeline and take the subset where BE failed.
    simulation_times = (row["birth time"] + np.cumsum( row["integration times"] ) - row["integration times"][0])[be_mask]
    
    # Replicate the particle's identifier once per BE failure.
    particle_id      = row.name * np.ones( (1, be_mask.sum()), dtype=np.int32 )

    # Create an array sized number_failures x 2.
    return np.vstack( [simulation_times, particle_id] ).T
    
# How many particles don't have the be flag set?
be_failure_mask     = (particles_df["number be failures"] > 0)
number_be_particles = be_failure_mask.sum()

# Get the simulation times and particle identifiers for BE failures.
failures_df   = particles_df[be_failure_mask]
times_and_ids = np.vstack( failures_df.apply( get_failure_times_by_id, axis=1 ) )

fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.plot( times_and_ids[:, 0],
           times_and_ids[:, 1],
           "." )
ax_h.set_ylabel( "Particle ID" )
ax_h.set_xlabel( "Time (s)" )
ax_h.set_title( "{:d} BE Failures Across {:d} Particles\n"
                "{:.2f}% Particles Had a Failure".format( 
    times_and_ids.shape[0],
    number_be_particles,
    number_be_particles / len( particles_df ) * 100 ) )

fig_h.tight_layout()

In [None]:
# Only a small fraction of particles will fail BE and, of those, the vast
# majority fail once.  We construct our bins such that we always have a bin
# covering one failure and then distribute the remaining bins across the
# rest of the data.
bin1_end = 1

# Get a largish number of bins so we can see the shape of the failure
# distribution.
number_bins = 100

# Get a convenience variable for the per-particle BE failure counts.
number_be_failures = failures_df["number be failures"] 

# We always have a bin from [0, bin1_end].  Construct the remaining
# bin edges.
remaining_data = number_be_failures[number_be_failures > bin1_end]
if len( remaining_data ) > 0:
    number_remaining_bins = number_bins - 1
    remaining_edges       = np.linspace( bin1_end, 
                                         remaining_data.max(),
                                         number_remaining_bins )
else:
    remaining_edges = [bin1_end]

# Build the full array.
failure_edges = np.concatenate( [[0, bin1_end], remaining_edges[1:]] )

failure_counts, failure_edges = np.histogram( number_be_failures, bins=failure_edges )

# Show the failures as a function of particle as well as the distribution.
fig_h, ax_h = plt.subplots( 1, 2, figsize=(10, 6) )

fig_h.suptitle( "{:d} Backward Euler Failures Across {:d} Particles".format(
    number_be_failures.sum(),
    number_be_particles ) )

ax_h[0].plot( failures_df.index,
              failures_df["number be failures"], 
              "." )
ax_h[0].set_title( "Individual Particles" )
ax_h[0].set_xlabel( "Particle Identifiers" )
ax_h[0].set_ylabel( "BE Failure Count" )

ax_h[1].hist( failures_df["number be failures"], bins=failure_edges )
ax_h[1].set_title( "Distribution of Failure Counts\n"
                   "{:d} Single BE Failures ({:.2f}%)".format(
                    failure_counts[0],
                    failure_counts[0] / number_be_failures.sum() * 100 ) )
ax_h[1].set_xlabel( "Number of BE Failures Per Particle" )
if len( failure_counts ) > 1:
    ax_h[1].set_ylim( (0, failure_counts[1]*1.1) )
fig_h.tight_layout()

nonzero_failures_mask = (particles_df["number be failures"] > 0)
lotsof_failures_mask  = (particles_df["number be failures"] > 1)

print( "{:d} particles with no failures".format( (~nonzero_failures_mask).sum() ) )
print( "{:d} particles with 1 or more failures".format( nonzero_failures_mask.sum() ) )
print( "{:d} particles with 2 or more failures".format( lotsof_failures_mask.sum() ) )

# Life Cycle Analysis

In [None]:
life_cycle_flag = False

def print_variable_summary( variable, variable_name ):
    variable_count = len( variable )
    variable_mean  = np.mean( variable )
    variable_mode  = stats.mode( variable, keepdims=False )
    variable_min   = np.min( variable )
    variable_max   = np.max( variable )
    
    print( "{:s}:\n"
           "\n"
           "  Count:  {:d} values\n"
           "  Range:  [{:g}, {:g}]\n"
           "  Mean:   {:g}\n"
           "  Mode:   {:g} ({:d} times)\n".format(
               variable_name,
               variable_count,
               variable_min, variable_max,
               variable_mean,
               variable_mode.mode, variable_mode.count
           ) )

if life_cycle_flag:
    print_variable_summary( particles_df["particle id"],        "Particle ID" )
    print_variable_summary( particles_df["be flag"],            "BE Flag" )
    print_variable_summary( particles_df["input radius"],       "Input Radius (m)" )
    print_variable_summary( particles_df["output radius"],      "Output Radius (m)" )
    print_variable_summary( particles_df["input temperature"],  "Input Temperature (K)" )
    print_variable_summary( particles_df["output temperature"], "Output Temperature (K)" )
    print_variable_summary( particles_df["time"],               "Simulation Time (s)" )
    print_variable_summary( particles_df["integration time"],   "Delta t (s)" )
    print_variable_summary( particles_df["salinity"],           "Salt Mass (kg)" )
    print_variable_summary( particles_df["air density"],        "Air Density (rhoa)" )
    print_variable_summary( particles_df["air temperature"],    "Air Temperature (K)" )
    print_variable_summary( particles_df["relative humidity"],  "RH (%)" )
    print_variable_summary( particles_df["processor"],          "MPI Rank" )
else:
    warnings.warn( "Life cycle analysis isn't complete!  Finish me!" )

# Reviewing Particle Event Counts From `history.nc`

In [None]:
history_nc = nc.Dataset( ntlp_history_path, mode="r" )

droplet_approximation.plot_particle_history( history_nc )

In [None]:
# Demonstrate that we can account for every particle injected and destroyed.
# No deep insight here, just that the history file correctly tracks each of
# the key particle events.

fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

number_particles     = history_nc.variables["tnumpart"][:]
number_new_particles = history_nc.variables["tot_reintro"][:]
number_destroyed     = history_nc.variables["tnum_destroy"][:]

ax_h.plot( np.cumsum( number_new_particles - number_destroyed ),
           ".",
           label="New + Destroyed" )
ax_h.plot( number_particles,
           label="tnumpart" )
ax_h.plot( np.cumsum( number_new_particles - number_destroyed ) - number_particles,
           label="Difference" )
ax_h.set_title( "Total particles in simulation: {:d}".format(
    int( number_new_particles.sum() ) ) )
ax_h.set_xlabel( "Time(s)" )
ax_h.set_ylabel( "Count" )
ax_h.legend()

print( "Difference between aggregate and cumulative sum?  {}".format(
    np.any( np.cumsum( number_new_particles - number_destroyed ) - number_particles > 0 )
    ) )

# Understanding a Simulation's Particles

In [None]:
# Show the characteristics of a handful of random particles over their lifetime.

number_particles = 10
particle_indices = np.sort( np.random.randint( 0, len( particles_df ), number_particles ) )

droplet_approximation.plot_particles( particles_df.iloc[particle_indices], time_range=[-np.inf, 3000] )

In [None]:
# Plot long-lived particles, if they exist.

number_particles            = 10
minimum_number_observations = 3000

selection_mask = (particles_df["number observations"] > minimum_number_observations)
long_particle_ids = particles_df[selection_mask].index

if len( long_particle_ids ) > 0:
    particle_ids = np.random.choice( long_particle_ids, size=number_particles, replace=False )
    droplet_approximation.plot_particles( particles_df.loc[particle_ids] )
else:
    print( "No particles had more than {:d} observation{:s}.".format(
        minimum_number_observations,
        "" if minimum_number_observations == 1 else "s" ) )

In [None]:
# Use the following to measure percentages of the population having a characteristic
# in a range of interest.
particles_df["air densities"].apply( lambda x: ((x>=1.22) & (x<1.25)).sum() ).sum() / particles_df["number observations"].sum() * 100

In [None]:
# Show the log-scale distribution of input BE radii throughout the simulation.
# We look at the log-distribution so we see all sizes instead of only the
# larger particles.
#
# NOTE: We don't visualize the output BE radii as those are identical except for
#       particles that fail BE when injected.  Since only a small percentage (~5%)
#       fail BE and retain their injection radius we skip plotting them as
#       it merely shows a difference on the outlier line at the left side of
#       the plot.
#
#       By construction all particles' observations are constructed so that
#       they contain an input and an output so output-only observations
#       (the last for each particle) have already been discarded.  This means
#       that only the now-last observation's output BE radii are omitted which
#       likely can be ignored (at least this is true for smaller simulations
#       with O(100K) particles).
#
input_radii_concatenated = np.concatenate( particles_df["input be radii"].tolist() )

fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.hist( np.log10( input_radii_concatenated ),  bins=100, log=True )
ax_h.minorticks_on()
ax_h.set_title( "{:s}\n"
                "All Particles ({:d} Observations)".format(
    simulation_name,
    input_radii_concatenated.shape[0] ) )
ax_h.set_xlabel( "log10( Radius ) (m)" )
ax_h.set_ylabel( "Counts (log-scale)" )

In [None]:
# Show the log-scale distribution of input temperatures throughout the simulation.
#
# NOTE: We don't visualize the output temperatures as those are identical except for
#       particles that fail BE when injected.  Since only a small percentage (~5%)
#       fail BE and retain their injection temperature we skip plotting them.
#
#       By construction all particles' observations are constructed so that
#       they contain an input and an output so output-only observations
#       (the last for each particle) have already been discarded.  This means
#       that only the now-last observation's output temperatures are omitted which
#       likely can be ignored (at least this is true for smaller simulations
#       with O(100K) particles).
#

input_temperatures_concatenated = np.concatenate( particles_df["input be temperatures"].tolist() )

fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.hist( input_temperatures_concatenated, bins=1000, log=True )
ax_h.minorticks_on()
ax_h.set_title( "{:s}\n"
                "All Particles ({:d} Observations)".format(
    simulation_name,
    input_temperatures_concatenated.shape[0] ) )
ax_h.set_xlabel( "Input Temperature (K)" )
if False:
    ax_h.set_xlim( 281, 293 )
ax_h.set_ylabel( "Counts (log-scale)" )

In [None]:
# Show the log-scale distribution of particles' salt masses throughout
# the simulation.  We look at the log10
salt_masses_concatenated = np.concatenate( particles_df["salt masses"].tolist() )

fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.hist( np.log10( salt_masses_concatenated ), bins=100, log=True )
ax_h.minorticks_on()
ax_h.set_title( "{:s}\n"
                "All Particles ({:d} Observations)".format(
    simulation_name,
    salt_masses_concatenated.shape[0] ) )
ax_h.set_xlabel( "log10( Salt Mass ) (kg)" )
ax_h.set_ylabel( "Counts (log-scale)" )

In [None]:
# Show the log-scale distribution of particles' air temperatures throughout
# the simulation.
air_temperatures_concatenated = np.concatenate( particles_df["air temperatures"].tolist() )

fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.hist( air_temperatures_concatenated, bins=100, log=True )
ax_h.minorticks_on()
ax_h.set_title( "{:s}\n"
                "All Particles ({:d} Observations)".format(
    simulation_name,
    air_temperatures_concatenated.shape[0] ) )
ax_h.set_xlabel( "Air Temperature (K)" )
ax_h.set_ylabel( "Counts (log-scale)" )

In [None]:
# Show the log-scale distribution of relative humidities experienced by
# particles throughout the simulation.  We look at the log-distribution
# so to not be dominated by the mean relative humidity.
rh_concatenated = np.concatenate( particles_df["relative humidities"].tolist() )

fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.hist( rh_concatenated * 100.0, bins=100, log=True )
ax_h.minorticks_on()
ax_h.set_title( "{:s}\n"
                "All Particles ({:d} Observations)".format(
    simulation_name,
    rh_concatenated.shape[0] ) )
ax_h.set_xlabel( "Relative Humidity (%)" )
ax_h.set_ylabel( "Counts (log-scale)" )

In [None]:
# Show the log-scale distribution of air densities experienced by
# particles throughout the simulation.
air_densities_concatenated = np.concatenate( particles_df["air densities"].tolist() )

fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.hist( air_densities_concatenated, bins=100, log=True )
ax_h.minorticks_on()
ax_h.set_title( "{:s}\n"
                "All Particles ({:d} Observations)".format(
    simulation_name,
    air_densities_concatenated.shape[0] ) )
ax_h.set_xlabel( "Air Density (kg/m^3)" )
ax_h.set_ylabel( "Counts (log-scale)" )

In [None]:
# Show the log-scale distribution of integration times experienced by
# particles throughout the simulation.
dts_concatenated = np.concatenate( particles_df["integration times"].tolist() )

fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.hist( dts_concatenated, bins=100, log=True )
ax_h.minorticks_on()
ax_h.set_title( "{:s}\n"
                "All Particles ({:d} Observations)".format(
    simulation_name,
    dts_concatenated.shape[0] ) )
ax_h.set_xlabel( "Integration Times (s)" )
ax_h.set_ylabel( "Counts (log-scale)" )

In [None]:
# Show the distribution of observations per particle so we can gain
# insight into the lifetime of the particles.
fig_h, ax_h = plt.subplots( 1, 2, figsize=(10, 6), sharex=True, sharey=True )

simulation_end          = particles_df["death time"].max()
complete_particles_mask = (particles_df["death time"] < simulation_end)

fig_h.suptitle( "{:s}".format(
    simulation_name ) )

ax_h[0].hist( particles_df["number observations"][complete_particles_mask], bins=1000 )
ax_h[0].minorticks_on()
ax_h[0].set_title( "{:.1f} Observations/Particle\n"
                   "{:d} Destroyed Particles".format( 
                    particles_df["number observations"][complete_particles_mask].mean(),
                    complete_particles_mask.sum() ) )
ax_h[0].set_xlabel( "Number of Observations" )
ax_h[0].set_ylabel( "Counts" )

ax_h[1].hist( particles_df["number observations"][~complete_particles_mask], bins=1000 )
ax_h[1].minorticks_on()
ax_h[1].set_title( "{:.1f} Observations/Particle\n"
                   "{:d} Surviving Particles".format( 
                    particles_df["number observations"][~complete_particles_mask].mean(),
                    (~complete_particles_mask).sum() ) )
ax_h[1].set_xlabel( "Number of Observations" )
ax_h[1].set_ylabel( "Counts" )

#fig_h.tight_layout()

# Cold Particle Analysis
NTLP's backward Euler (BE) process can generate cold particles.  The following cells aim to
understand when these occur for:

- Assessing their impact when MLP is used
- Attempting to fix BE so cold particles aren't generated

In [None]:
def cold_particles_with_be_failure( particle_series ):
    """
    0 - No BE failures
    1 - First BE failure doesn't match the first cold observation
    2 - First BE failure coincides with the first cold observation
    """
    #
    # NOTE: This should only be called on particles that have a cold particle!
    #
    cold_particle_index    = np.where( particle_series["input be temperatures"] < cold_threshold )[0][0]
    first_be_failure_index = np.where( particle_series["be statuses"] > 0 )[0]
    
    if len( first_be_failure_index ) == 0:
        return 0
    elif first_be_failure_index[0] != cold_particle_index:
        return 1
    else:
        return 2

# Temperature cutoff, in Kelvin, below which we declare the particle "cold".
cold_threshold = 280

# Particles that are considered "cold".
cold_particles_mask        = particles_df.apply( lambda x: np.any( x["input be temperatures"] < cold_threshold ), axis=1 )
cold_particles_ids         = particles_df[cold_particles_mask].index

# Number of cold observations for each particle.
cold_particles_counts      = particles_df[cold_particles_mask].apply( lambda x: (x["input be temperatures"] < cold_threshold ).sum(), axis=1 )

# First location of each cold observation.  For the initial dataset, 
# all cold particles occurred once.
cold_particles_location    = particles_df[cold_particles_mask].apply( lambda x: np.where( x["input be temperatures"] < cold_threshold )[0][0], axis=1 )

# Temperature of the first cold observation for each
# particle.
cold_particles_temperature = particles_df[cold_particles_mask].apply( lambda x: x["input be temperatures"][np.where( x["input be temperatures"] < cold_threshold )[0][0]], axis=1 )

# First BE status for each cold particle.
cold_particles_be_status = particles_df[cold_particles_mask].apply( cold_particles_with_be_failure, axis=1 )

In [None]:
# 
fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

number_cold_particles = cold_particles_mask.sum()

failure_modes = ["on first BE failure", 
                 "BE failure doesn't match", 
                 "BE and cold match"]

for color_be_index, failure_mode in enumerate( failure_modes ):
    mask = (cold_particles_be_status == color_be_index)
    ax_h.scatter( particles_df[cold_particles_mask][mask].index,
                  cold_particles_location[mask],
                  label=failure_mode )

ax_h.set_title( "{:d} Cold Particles ({:.2f}%)\n"
                "Temperature < {:d}K".format(
                    number_cold_particles,
                    number_cold_particles / len( particles_df ) * 100,
                    cold_threshold
    ) )
ax_h.set_ylabel( "Timestep Seen" )
ax_h.set_xlabel( "Particle Number" )
ax_h.legend()

In [None]:
fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.plot( particles_df[cold_particles_mask].index,
           cold_particles_temperature,
           "." )

ax_h.set_title( "Cold Particles Temperature" )
ax_h.set_ylabel( "Temperature (K)" )
ax_h.set_xlabel( "Particle Number" )

In [None]:
# Interactively look at how cold observations occur.  Previous temperature
# is reasonable (as expected) and successive temperature recovers (as expected).
# Implies that something with the previous observation's environment causes
# BE to kind of succede/mostly fail.
%matplotlib notebook

fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

cold_particle_index = 103
cold_particle_id    = cold_particles_location.index[cold_particle_index]
cold_particle       = particles_df.loc[cold_particle_id]

ax_h.plot( np.cumsum( cold_particle["integration times"] ) - cold_particle["integration times"][0],
           cold_particle["input be temperatures"] )
ax_h.set_title( "Particle {:d} (Cold Index {:d})\n"
                .format(
    cold_particle_id,
    cold_particle_index
    ))
ax_h.set_ylabel( "Particle Temperature (K)" )
ax_h.set_xlabel( "Timestep" )

In [None]:
# Interactively look at how cold observations occur.  Previous temperature
# is reasonable (as expected) and successive temperature recovers (as expected).
# Implies that something with the previous observation's environment causes
# BE to kind of succede/mostly fail.
%matplotlib notebook

fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

for cold_particle_id in cold_particles_ids:
    cold_particle       = particles_df.loc[cold_particle_id]

    ax_h.plot( #np.cumsum( cold_particle["integration times"] ) - cold_particle["integration times"][0],
               cold_particle["input be temperatures"],
               ".",
               label="{:d}".format( cold_particle.name ) )

#ax_h.legend()
#ax_h.set_title( "Particle {:d} (Cold Index {:d})\n"
#                .format(
#    cold_particle_id,
#    cold_particle_index
#    ))
ax_h.set_title( "{:d} Cold Particles (Temperature < {:d}K)".format(
    len( cold_particles_ids ),
    cold_threshold
    ))
ax_h.set_ylabel( "Particle Temperature (K)" )
ax_h.set_xlabel( "Timestep" )

In [None]:
cold_particles_df = pd.DataFrame( cold_particles_location, columns=["cold_observation_index"] )

cold_particles_df["observation index"]     = pd.Series( dtype=np.int32   )
cold_particles_df["integration time"]      = pd.Series( dtype=np.float32 )
cold_particles_df["be status"]             = pd.Series( dtype=np.int32   )
cold_particles_df["input be radius"]       = pd.Series( dtype=np.float32 )
cold_particles_df["output be radius"]      = pd.Series( dtype=np.float32 )
cold_particles_df["input be temperature"]  = pd.Series( dtype=np.float32 )
cold_particles_df["output be temperature"] = pd.Series( dtype=np.float32 )
cold_particles_df["salt mass"]             = pd.Series( dtype=np.float32 )
cold_particles_df["air temperature"]       = pd.Series( dtype=np.float32 )
cold_particles_df["relative humidity"]     = pd.Series( dtype=np.float32 )
cold_particles_df["air density"]           = pd.Series( dtype=np.float32 )

In [None]:
for cold_particle_id in cold_particles_ids:
    observation_index = cold_particles_location[cold_particle_id]
    
    cold_particles_df.at[cold_particle_id, "observation index"]     = observation_index - 1
    cold_particles_df.at[cold_particle_id, "integration time"]      = particles_df.at[cold_particle_id, "integration times"][observation_index - 1]
    cold_particles_df.at[cold_particle_id, "be status"]             = particles_df.at[cold_particle_id, "be statuses"][observation_index - 1]
    cold_particles_df.at[cold_particle_id, "previous be status"]    = particles_df.at[cold_particle_id, "be statuses"][observation_index - 2]
    cold_particles_df.at[cold_particle_id, "input be radius"]       = particles_df.at[cold_particle_id, "input be radii"][observation_index - 1]
    cold_particles_df.at[cold_particle_id, "output be radius"]      = particles_df.at[cold_particle_id, "output be radii"][observation_index - 1]
    cold_particles_df.at[cold_particle_id, "input be temperature"]  = particles_df.at[cold_particle_id, "input be temperatures"][observation_index - 1]
    cold_particles_df.at[cold_particle_id, "output be temperature"] = particles_df.at[cold_particle_id, "output be temperatures"][observation_index - 1]
    cold_particles_df.at[cold_particle_id, "salt mass"]             = particles_df.at[cold_particle_id, "salt masses"][observation_index - 1]
    cold_particles_df.at[cold_particle_id, "air temperature"]       = particles_df.at[cold_particle_id, "air temperatures"][observation_index - 1]
    cold_particles_df.at[cold_particle_id, "relative humidity"]     = particles_df.at[cold_particle_id, "relative humidities"][observation_index - 1]
    cold_particles_df.at[cold_particle_id, "air density"]           = particles_df.at[cold_particle_id, "air densities"][observation_index - 1]

In [None]:
fig_h, ax_h = plt.subplots( 4, 2, figsize=(10, 12) )

number_bins = 25

ax_h[0][0].hist( cold_particles_df["integration time"], bins=number_bins )
ax_h[0][0].set_title( "Integration Time" )
ax_h[0][0].set_xlabel( "Time (s)" )
ax_h[0][1].hist( cold_particles_df["be status"], bins=number_bins, label="Cold Particle" )
ax_h[0][1].hist( cold_particles_df["previous be status"], bins=number_bins, label="Previous Timestep" )
ax_h[0][1].set_title( "BE Status" )
ax_h[0][1].set_xlabel( "BE Failure Mode" )
ax_h[0][1].legend()

# XXX: why is input radii busted?
ax_h[1][0].hist( np.log10( cold_particles_df["input be radius"] ), bins=number_bins )
ax_h[1][0].set_title( "Input Radius" )
ax_h[1][0].set_xlabel( "Size (m)" )
ax_h[1][1].hist( np.log10( cold_particles_df["output be radius"] ), bins=number_bins )
ax_h[1][1].set_title( "Output Radius" )
ax_h[1][1].set_xlabel( "Size (m)" )

ax_h[2][0].hist( cold_particles_df["input be temperature"], bins=number_bins )
ax_h[2][0].set_title( "Input Temperature" )
ax_h[2][0].set_xlabel( "Temperature (K)" )
ax_h[2][1].hist( cold_particles_df["output be temperature"], bins=number_bins )
ax_h[2][1].set_title( "Output Temperature" )
ax_h[2][1].set_xlabel( "Temperature (K)" )

ax_h[3][0].hist( cold_particles_df["air temperature"], bins=number_bins )
ax_h[3][0].set_title( "Air Temperature" )
ax_h[3][0].set_xlabel( "Temperature (K)" )
ax_h[3][1].hist( cold_particles_df["relative humidity"] * 100, bins=number_bins )
ax_h[3][1].set_title( "Relative Humidity" )
ax_h[3][1].set_xlabel( "Percentage (%)" )

fig_h.tight_layout()

# Resampling Particles Onto A Common Timeline
Comparing multiple particles is easier when they have the same number of observations
as this allows us to take averages, ratios, etc.  

In [None]:
# Resampling particles onto a common timeline ("Particle Life").
# This lets us see common shapes in the particle's states since
# they're now interpolated onto a fixed number of points in time.

import functools
from scipy.interpolate import interp1d

# The pi chamber development dataset's particles have ~1500 observations on average.
# We pick a number of points that well retain most features while not balooning
# our memory footprint.
number_points = 1000

def resample_particle_observations( particle_series, metric, number_points ):
    """
    Interpolates the target metric onto a [0, 1] with a fixed number of points.
    """
    
    first_time = particle_series["integration times"][0]
    timeline   = np.cumsum( particle_series["integration times"] )

    linear_metric = interp1d( (timeline - first_time) / (timeline[-1] - first_time),
                              particle_series[metric], 
                              kind="linear" )
    return linear_metric( np.linspace( 0.0, 1.0, number_points ) )

simulation_end          = particles_df["death time"].max()
complete_particles_mask = (particles_df["death time"] < simulation_end)

resample_output_radii        = functools.partial( resample_particle_observations,
                                                  metric="output be radii",
                                                  number_points=number_points )
resample_output_temperatures = functools.partial( resample_particle_observations,
                                                  metric="output be temperatures",
                                                  number_points=number_points )
resample_salt_masses         = functools.partial( resample_particle_observations,
                                                  metric="salt masses",
                                                  number_points=number_points )
resample_air_temperatures    = functools.partial( resample_particle_observations,
                                                  metric="air temperatures",
                                                  number_points=number_points )
resample_relative_humidities = functools.partial( resample_particle_observations,
                                                  metric="relative humidities",
                                                  number_points=number_points )
resample_air_densities       = functools.partial( resample_particle_observations,
                                                  metric="air densities",
                                                  number_points=number_points )

radii_resampled          = np.vstack( particles_df[complete_particles_mask].apply( resample_output_radii, axis=1 ).tolist() )
particle_temps_resampled = np.vstack( particles_df[complete_particles_mask].apply( resample_output_temperatures, axis=1 ).tolist() )
salt_masses_resampled    = np.vstack( particles_df[complete_particles_mask].apply( resample_salt_masses, axis=1 ).tolist() )
air_temps_resampled      = np.vstack( particles_df[complete_particles_mask].apply( resample_air_temperatures, axis=1 ).tolist() )
rh_resampled             = np.vstack( particles_df[complete_particles_mask].apply( resample_relative_humidities, axis=1 ).tolist() )
air_densities_resampled  = np.vstack( particles_df[complete_particles_mask].apply( resample_air_densities, axis=1 ).tolist() )

In [None]:
# Demonstrate that resampling works as expected against a random particle.  Plot the resampled
# metric against the original.
particle_index = np.random.randint( 0, len( particles_df[complete_particles_mask] ) )

dt = 1 / number_points

particle = particles_df[complete_particles_mask].iloc[particle_index]

fig_h, ax_h = plt.subplots( 1, 1, figsize=(10, 6) )

fig_h.suptitle( "Particle {:d} (Index {:d})".format(
    particle.name,
    particle_index ) )

#raise RuntimeError( "Consolidate onto one plot" )

ax_h.plot( (np.cumsum( particle["integration times"] ) - particle["integration times"][0]) / (particle["death time"] - particle["birth time"]),
           particle["relative humidities"],
           label="Observations" )
ax_h.plot( np.arange( number_points ) * dt,
           rh_resampled[particle_index, :],
           ".",
           label="Resampling" )
ax_h.set_xlabel( "Interpolated Time (s)" )
ax_h.set_ylabel( "Relative Humidity (fractional)" )
ax_h.set_xlabel( "Time (s)" )
ax_h.legend( loc="upper left" )

In [None]:
fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.plot( np.arange( number_points ) * dt, radii_resampled.min( axis=0 ), label="Minimum" )
ax_h.plot( np.arange( number_points ) * dt, radii_resampled.mean( axis=0 ), label="Mean" )
ax_h.plot( np.arange( number_points ) * dt, np.median( radii_resampled, axis=0 ), label="Median" )
ax_h.plot( np.arange( number_points ) * dt, radii_resampled.max( axis=0 ), label="Maximum" )
ax_h.set_yscale( "log" )
ax_h.set_title( "Particle Radii Extrema\n"
                "{:d} Completed Particles".format( complete_particles_mask.sum() ) )
ax_h.set_xlabel( "Interpolated Time (s)" )
ax_h.set_ylabel( "Size (m)" )
ax_h.minorticks_on()
ax_h.legend()

In [None]:
fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.plot( np.arange( number_points ) * dt, particle_temps_resampled.min( axis=0 ), label="Minimum" )
ax_h.plot( np.arange( number_points ) * dt, particle_temps_resampled.mean( axis=0 ), label="Mean" )
ax_h.plot( np.arange( number_points ) * dt, np.median( particle_temps_resampled, axis=0 ), label="Median" )
ax_h.plot( np.arange( number_points ) * dt, particle_temps_resampled.max( axis=0 ), label="Maximum" )
ax_h.set_yscale( "log" )
ax_h.set_title( "Particle Temperatures Extrema\n"
                "{:d} Completed Particles".format( complete_particles_mask.sum() ) )
ax_h.set_xlabel( "Interpolated Time (s)" )
ax_h.set_ylabel( "Temperature (K)" )
ax_h.minorticks_on()
ax_h.legend()

In [None]:
fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.plot( np.arange( number_points ) * dt, salt_masses_resampled.min( axis=0 ), label="Minimum" )
ax_h.plot( np.arange( number_points ) * dt, salt_masses_resampled.mean( axis=0 ), label="Mean" )
ax_h.plot( np.arange( number_points ) * dt, np.median( salt_masses_resampled, axis=0 ), label="Median" )
ax_h.plot( np.arange( number_points ) * dt, salt_masses_resampled.max( axis=0 ), label="Maximum" )
ax_h.set_yscale( "log" )
ax_h.set_title( "Salt Masses Extrema\n"
                "{:d} Completed Particles".format( complete_particles_mask.sum() ) )
ax_h.set_xlabel( "Interpolated Time (s)" )
ax_h.set_ylabel( "Mass (kg)" )
ax_h.minorticks_on()
ax_h.legend()

In [None]:
fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.plot( np.arange( number_points ) * dt, air_temps_resampled.min( axis=0 ), label="Minimum" )
ax_h.plot( np.arange( number_points ) * dt, air_temps_resampled.mean( axis=0 ), label="Mean" )
ax_h.plot( np.arange( number_points ) * dt, np.median( air_temps_resampled, axis=0 ), label="Median" )
ax_h.plot( np.arange( number_points ) * dt, air_temps_resampled.max( axis=0 ), label="Maximum" )
ax_h.set_yscale( "log" )
ax_h.set_title( "Air Temperatures Extrema\n"
                "{:d} Completed Particles".format( complete_particles_mask.sum() ) )
ax_h.set_xlabel( "Interpolated Time (s)" )
ax_h.set_ylabel( "Temperature (K)" )
ax_h.minorticks_on()
ax_h.legend()

In [None]:
fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.plot( np.arange( number_points ) * dt, rh_resampled.min( axis=0 ), label="Minimum" )
ax_h.plot( np.arange( number_points ) * dt, rh_resampled.mean( axis=0 ), label="Mean" )
ax_h.plot( np.arange( number_points ) * dt, np.median( rh_resampled, axis=0 ), label="Median" )
ax_h.plot( np.arange( number_points ) * dt, rh_resampled.max( axis=0 ), label="Maximum" )
ax_h.set_title( "Relative Humidity Extrema\n"
                "{:d} Completed Particles".format( complete_particles_mask.sum() ) )
ax_h.set_xlabel( "Interpolated Time (s)" )
ax_h.set_ylabel( "Relative Humidity (fractional)" )
ax_h.minorticks_on()
ax_h.legend()

In [None]:
fig_h, ax_h = plt.subplots( 1, 1, figsize=(6, 6) )

ax_h.plot( np.arange( number_points ) * dt, air_densities_resampled.min( axis=0 ), label="Minimum" )
ax_h.plot( np.arange( number_points ) * dt, air_densities_resampled.mean( axis=0 ), label="Mean" )
ax_h.plot( np.arange( number_points ) * dt, np.median( air_densities_resampled, axis=0 ), label="Median" )
ax_h.plot( np.arange( number_points ) * dt, air_densities_resampled.max( axis=0 ), label="Maximum" )
ax_h.set_title( "Air Density Extrema\n"
                "{:d} Completed Particles".format( complete_particles_mask.sum() ) )
ax_h.set_xlabel( "Interpolated Time (s)" )
ax_h.set_ylabel( "Air Density (?)" )
ax_h.minorticks_on()
ax_h.legend()

# Open Analysis Questions to Answer
- When does BE fail?
  - Does it happen multiple times to a particle?
  - How often in the data set?
- Why are there super cold particles? ~210K
- 