In [None]:
import glob
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize._numdiff import approx_derivative
import torch
from torch.nn import L1Loss

import droplet_approximation

# ODE Exploration
This notebook graphs the radius and temperature derivatives of droplets in various conditions. It has two sections:
1. Trace ODE Exploration: pulls random droplets from a selected NTLP trace. Graphs the droplet radius, temperature, derivative of radius with respect to time, and derivative of temperature with respect to time. This is calculated both for BE and for a model with direct inferencing.
2. Domain ODE Exploration: rolls random droplets from the "boxed" domain for the droplet model. Graphs the distribution of both analytical dydt and autograd dydt. Additionally charts both analytical dydt and autograd dydt as a function of radius for both variables. 

## Overall Setup

In [None]:
# Do not edit settings here, edit in the cell below instead.
model_path = None

parameter_ranges = droplet_approximation.get_parameter_ranges()

# Get current commit SHA
current_SHA = os.popen( "git rev-parse HEAD" ).read()


In [None]:
model_load_path = "../models/box-narrow_uniform_high_res-mlp_4layer-b=1024-lr=1e-3_halvingschedule-l2reg=1e-6_checkpoint.pt"

custom_parameter_ranges = {
    "radius": (-6.75, -3.00),
    "temperature": (284.0, 300.0),
    "salt_mass": (-17.66, -17.65),
    "air_temperature": (284.0, 300.0),
    "relative_humidity": (0.98, 1.11),
    "rhoa": (0.99, 1.01),
    "time": (-1.4, -0.75)
}

In [None]:

# Overlay the custom parameter ranges and set them for the remainder of the
# notebook.
parameter_ranges.update( custom_parameter_ranges )

droplet_approximation.set_parameter_ranges( parameter_ranges )

if model_load_path is None:
    evaluate_model = False

    model_name = None
else:
    evaluate_model = True
    model = droplet_approximation.ResidualNet()
    droplet_approximation.load_model_checkpoint(model_load_path, model)

    model_name = model_load_path.split( "/" )[-1].split( "." )[0].replace( "_", " " ).replace("-", " ")

## Trace ODE Exploration

In [None]:
# Path to the top-level simulations/ data directory.
simulations_data_root = "/afs/crc.nd.edu/group/RichterLab/droplet_approximation/data/simulations"

# Name of the simulation we're investigating.
#
# NOTE: This must match one of the names in the next cell!
#
simulation_name = "Pi Chamber - 1-way Coupling, RH~103%"
#simulation_name = "Pi Chamber - 2-way Coupling"
#simulation_name = "cfog"
#simulation_name = "Fatima"
#simulation_name = "Spray"

number_processes = 0

# Use a large fanout degree for our raw particle files directory hierarchy.
#
# NOTE: This must match how the particle directories were constructed.  Do *not* change
#       this unless you've updated the common datasets as well.
#
dirs_per_level   = 256

# Ensure we know where our data resides, otherwise stop execution.
if simulations_data_root is None:
    raise ValueError( "Must set simulations_data_root to run this notebook!" )

# Do we need to default the number of processes to use?
if number_processes == 0:
    number_processes = os.cpu_count()
    
# Map the simulation name to its directory beneath the simulations data root.
if simulation_name == "Pi Chamber - 1-way Coupling, RH~103%":
    simulation_directory_name = "pi_chamber-rh103-1way"
elif simulation_name == "Pi Chamber - 2-way Coupling":
    simulation_directory_name = "pi_chamber-2way"
elif simulation_name == "cfog":
    simulation_directory_name = "cfog"
elif simulation_name == "Fatima":
    simulation_directory_name = "fatima"
elif simulation_name == "Spray":
    simulation_directory_name = "spray"
else:
    raise ValueError( "Unknown simulation_name!" )
# Top-level directory of this simulation.
simulation_root = "{:s}/{:s}".format( simulations_data_root, simulation_directory_name )

# Path to the top of the raw particle files directory hierarchy and its index.
particles_root       = "{:s}/particles".format( simulation_root )
particles_index_path = "{:s}/particles.index".format( particles_root )

# Path to the NTLP particle trace root.
ntlp_trace_root = simulation_root

# Path to the NTLP history NetCDF file.
ntlp_history_path = "{:s}/history.nc".format( simulation_root )

# List of NTLP particle trace paths.
ntlp_trace_paths = glob.glob( os.path.join( ntlp_trace_root, "be_dump_*.data" ) )

print( "Investigating '{:s}'.  Key directories are:\n"
       "\n"
       "    Top-level:         {:s}\n"
       "    Particles:         {:s}\n"
       "    Particles index:   {:s}\n"
       "    NTLP traces:       {:s}\n"
       "    NTLP history.nc:   {:s}\n"
       "\n"
       "{:d} NTLP trace{:s} found.".format(
           simulation_name,
           simulation_directory_name,
           particles_root,
           particles_index_path,
           ntlp_trace_root,
           ntlp_history_path,
           len( ntlp_trace_paths ),
           "" if len( ntlp_trace_paths ) == 1 else "s" ) )


In [None]:
unique_particle_ids = np.fromfile( particles_index_path, dtype=np.int32 )

### Particle Selection
Select which particles from the traces to graph

In [None]:
# Select whatever particles you want here - these were selected arbitrarily
target_particle_ids = unique_particle_ids[:1000][2:5]

In [None]:

parallel_read_flag = True

if parallel_read_flag:
    particles_df = droplet_approximation.batch_read_particles_data( particles_root,
                                                                    target_particle_ids,
                                                                    dirs_per_level )
else:
    particles_df = droplet_approximation.read_particles_data( particles_root,
                                                              target_particle_ids,
                                                              dirs_per_level )


In [None]:
for particle_id in target_particle_ids:
    p_df = particles_df.loc[particle_id]
    particle_parameters = np.stack( p_df[[
        "input be radii",
        "input be temperatures",
        "salt masses",
        "air temperatures",
        "relative humidities",
        "air densities",
        "integration times"
    ]].to_numpy(), axis=-1 )
    times = np.cumsum( np.insert( particle_parameters[:, -1], 0, 0 ))[:-1]
    
    dydt_data = np.array( [droplet_approximation.dydt( 0, particle_step[:2], particle_step[2:-1] ) 
                           for particle_step in particle_parameters] )

    # TODO add reference value from Greg's push
    title_string = "Trajectory and Derivatives for Particle {:d} from trace of {:s}".format( particle_id, simulation_name )
    if evaluate_model:
        title_string += "\n Against MLP {:s}".format( model_name )
    title_string += "\n SHA: {:s}".format( current_SHA )

    fig, ax_h = plt.subplots( 3, 2, figsize=(12,8) )
    fig.suptitle( title_string )

    ax_h[0][0].set_title( "Droplet Radius vs. Time" )
    ax_h[0][0].set_xlabel( "Time (s)" )
    ax_h[0][0].set_ylabel( "Radius (m)" )
    ax_h[0][0].plot( times, particle_parameters[:, 0], label="NTLP Radius" )
    ax_h[0][0].set_yscale( "log" )
    ax_h[0][0].minorticks_on()


    ax_h[0][1].set_title( "Droplet Temperature vs. Time" )
    ax_h[0][1].set_xlabel( "Time (s)" )
    ax_h[0][1].set_ylabel( "Temperature (K)" )
    ax_h[0][1].plot( times, particle_parameters[:, 1], label="NTLP Temperature" )
    ax_h[0][1].minorticks_on()

    ax_h[1][0].set_title( "Droplet Signed Log dr/dt vs. Time" )
    ax_h[1][0].set_xlabel( "Time (s)" )
    ax_h[1][0].set_ylabel( "Signed Log dr/dt (m/s)" )
    ax_h[1][0].plot( times, np.sign(dydt_data[:, 0]) * np.log10( np.abs( dydt_data[:, 0] ) ),
                     label="Analytic Signed Log dr/dt" )
    ax_h[1][0].minorticks_on()
    
    ax_h[1][1].set_title( "Droplet Signed Log dT/dt vs. Time" )
    ax_h[1][1].set_xlabel( "Time (s)" )
    ax_h[1][1].set_ylabel( "Signed Log dT/dt (K/s)" )
    ax_h[1][1].plot( times, np.sign(dydt_data[:, 1]) * np.log10(np.abs(dydt_data[:, 1])),
                     label="Analytic Signed Log dT/dt" )
    ax_h[1][1].minorticks_on()

    ax_h[2][0].set_title( "Droplet RH vs. Time" )
    ax_h[2][0].set_xlabel( "Time (s)" )
    ax_h[2][0].set_ylabel( "Relative Humidity (%)" )
    ax_h[2][0].plot( times, 100*particle_parameters[:, 4] )
    ax_h[2][0].minorticks_on()


    ax_h[2][1].set_title( "Droplet Delta Temperature vs. Time" )
    ax_h[2][1].set_xlabel( "Time (s)" )
    ax_h[2][1].set_ylabel( "Air - Particle Temperature (K)" )
    ax_h[2][1].plot( times, particle_parameters[:, 3] - particle_parameters[:, 1] )
    ax_h[2][1].minorticks_on()

    if evaluate_model:
        model.eval()

        # Turn the inputs into tensors. Only calculate gradients for
        # time.
        tensor_droplets = torch.from_numpy( np.hstack( [droplet_approximation.normalize_droplet_parameters( particle_parameters[:, :-1] ), 
                                                       np.reshape( particle_parameters[:, -1], (-1,1) ) ] ).astype( "float32" )).requires_grad_( True )
        
        # Calculate outputs. Scale them for graphing/chain rule calculations
        model_outputs = model( tensor_droplets )
        scaled_model_outputs = droplet_approximation.scale_droplet_parameters( model_outputs.detach().numpy() )


        model_drdt = torch.autograd.grad( model_outputs[:, 0],
                                          tensor_droplets,
                                          grad_outputs=torch.ones_like( model_outputs[:, -1] ), retain_graph=True )[0][:, -1].detach().numpy()
        model_dTdt = torch.autograd.grad( model_outputs[:, 1],
                                          tensor_droplets,
                                          grad_outputs=torch.ones_like( model_outputs[:, -1] ) )[0][:, -1].detach().numpy()
        
        # Scale dydt according to chain rule
        model_drdt *= scaled_model_outputs[:, 0] * np.log( 10.0 ) * ((6.75 - 3.00)/2.0)
        model_dTdt *= (300 - 284)
        
        ax_h[0][0].plot( times, scaled_model_outputs[:, 0], label="MLP Radius" )
        ax_h[0][1].plot( times, scaled_model_outputs[:, 1], label="MLP Temperature" )
        ax_h[1][0].plot( times, np.sign( model_drdt ) * np.log10( np.abs( model_drdt ) ), label="MLP autograd Signed Log dr/dt" )
        ax_h[1][1].plot( times, np.sign( model_dTdt ) * np.log10( np.abs( model_dTdt ) ), label="MLP autograd Signed Log dT/dt" )

    ax_h[0][0].legend()
    ax_h[0][1].legend()
    ax_h[1][0].legend()
    ax_h[1][1].legend()
    
    fig.tight_layout()

## Domain ODE Exploration

In [None]:
domain_name      = "Uncoupled NTLP Pi Chamber Domain"
droplet_count    = 50000
integration_time = 0.10

# Generate droplets, sampling particle temperature uniformly on +-3K from air tempearture
droplet_parameters = droplet_approximation.scale_droplet_parameters( np.array([1.0, 1.0,1.0,1.0,1.0,1.0]) * np.random.uniform( -1,1.0, (droplet_count, 6) ) )
droplet_parameters[:, 1] = droplet_parameters[:, 3] + np.random.uniform( -3.0, 3.0, droplet_count )

In [None]:
# Generate dydt data for this domain. Scatterplot the log of the actual analytical data

dydt_data = np.array( [droplet_approximation.dydt(0, droplet[:2], droplet[2:]) for droplet in droplet_parameters] )

plt.figure()
plt.title( "Scatter Plot of Droplet Log Absolute Value Analytical dydt for {:s}\n SHA: {:s}".format( domain_name, current_SHA ) )
plt.xlabel( "Log Absolute Value Analytical drdt" )
plt.ylabel( "Log Absolute Value Analytical dTdt" )
plt.minorticks_on()
plt.grid( color="black", alpha=0.1 )
plt.scatter( np.log10( np.abs( dydt_data.T[0, :] ) ), np.log10( np.abs( dydt_data.T[1,:] ) ), s=3, alpha=0.5 )

In [None]:
if not evaluate_model:
    raise Exception( "No model provided. The remaining analysis requires a model." )
    
model.eval()

# Turn the inputs into tensors. Only calculate gradients for
# time.
tensor_droplets = torch.from_numpy( np.hstack( [droplet_approximation.normalize_droplet_parameters( droplet_parameters ),
                                                np.reshape( np.repeat( integration_time, droplet_count ), (-1,1) )] ).astype( "float32" )).requires_grad_( True )
      
# Calculate outputs. Scale them for graphing/chain rule calculations
model_outputs = model( tensor_droplets )
scaled_model_outputs = droplet_approximation.scale_droplet_parameters( model_outputs.detach().numpy() )

model_drdt = torch.autograd.grad( model_outputs[:, 0],
                                  tensor_droplets,
                                  grad_outputs=torch.ones_like( model_outputs[:, -1] ),
                                  retain_graph=True)[0][:, -1].detach().numpy()
model_dTdt = torch.autograd.grad( model_outputs[:, 1],
                                  tensor_droplets,
                                  grad_outputs=torch.ones_like( model_outputs[:, -1] ) )[0][:, -1].detach().numpy()

# Scale dydt according to chain rule
model_drdt *= scaled_model_outputs[:, 0] * np.log( 10.0 ) * ((6.75 - 3.00)/2.0)
model_dTdt *= (300 - 284)

In [None]:
# Calculate dydt data for the outputs of the model.
# This is done because autograd can only calculate dydt
# at model outputs, not model inputs. Thus, the outputs serve
# as the only point of comparison.
dydt_data = np.array( [droplet_approximation.dydt(0, output[:2], droplet[2:]) for droplet, output in zip( droplet_parameters, scaled_model_outputs)] )

In [None]:
# Mask for what part of the domain to consider.
# Allows digging into where the model matches/deviates
# from the analytical solution.

#mask = droplet_parameters[:, 0] > 1.0e-6
mask = np.full( droplet_count, True )

plt.figure()
title_string = "Scatter Plot of Droplet Log Absolute Value Analytical dydt for {:s}\n".format( domain_name )
title_string += "Evaluated at Model Output for {:s}\n SHA: {:s}".format( model_name, current_SHA )
plt.title( title_string )
plt.xlabel( "Log Absolute Value drdt" )
plt.ylabel( "Log Absolute Value dTdt" )
plt.minorticks_on()
plt.grid( color="black", alpha=0.1 )


plt.scatter( np.log10( np.abs( dydt_data.T[0, mask] ) ), np.log10( np.abs( dydt_data.T[1,mask] ) ), s=3, alpha=0.5, label="Analytical Log Absolute Value dydt" )
plt.scatter( np.log10( np.abs( model_drdt[mask] ) ), np.log10( np.abs( model_dTdt[mask] ) ), color='r', s=3, alpha=0.5, label="MLP autograd Log Absolute Value dydt" )

plt.legend()

In [None]:
drdt_error = model_drdt - dydt_data.T[0, :]
dTdt_error = model_dTdt - dydt_data.T[1,:]

log_drdt_error = np.log10(np.abs(model_drdt)) - np.log10(np.abs(dydt_data.T[0,:]))
log_dTdt_error = np.log10(np.abs(model_dTdt)) - np.log10(np.abs(dydt_data.T[1,:]))

In [None]:
# Mask for what part of the domain to consider.
# Allows digging into where the model matches/deviates
# from the analytical solution.

#mask = droplet_parameters[:, 0] > 1.0e-6
mask = np.full( droplet_count, True )

fig = plt.figure()
title_string = "Error in Model vs. Analytical Log Absolute Value dy/dt for {:s}. Colored by log Radius.\n".format( domain_name )
title_string += "Evaluated at Model Output for {:s}\n SHA: {:s}".format( model_name, current_SHA )
plt.title( title_string )
plt.xlabel( "Log Absolute Value Analytical drdt" )
plt.ylabel( "Log Absolute Value Analytical dTdt" )
plt.minorticks_on()
plt.grid( color="black", alpha=0.1 )
plt.scatter( log_drdt_error[mask], log_dTdt_error[mask], s=3, alpha=0.5, c=np.log10( scaled_model_outputs[:, 0] ), cmap="viridis" )

ax = fig.axes[0]
plt.colorbar( label="log Radius", ax=ax )

In [None]:
# Mask for what part of the domain to consider.
# Allows digging into where the model matches/deviates
# from the analytical solution.

#mask = droplet_parameters[:, 0] > 1.0e-6
mask = np.full( droplet_count, True )

# Graph the Analytical dTdt vs. log Radius
fig = plt.figure()
title_string = "Analytical dr/dt vs. log Radius for {:s}. Colored by RH.\n".format( domain_name )
title_string += "Evaluated at Model Output for {:s}\n SHA: {:s}".format( model_name, current_SHA )
plt.title( title_string )
plt.xlabel( "Log Radius (m)" )
plt.ylabel( "Analytical dr/dt (m/s)" )
#plt.ylim((-5,5))
plt.scatter( np.log10( scaled_model_outputs[mask, 0] ), dydt_data[:, 0], c=droplet_parameters[mask,4], cmap="viridis", s=3, alpha=0.6 )
plt.minorticks_on()
plt.grid( color="black", alpha=0.1 )

ax = fig.axes[0]
plt.colorbar( label="RH", ax=ax )

# Graph the actual Analytical dTdt vs. log Radius
fig = plt.figure()
title_string = "Model autograd dr/dt vs. log Radius for {:s}. Colored by RH.\n".format( domain_name )
title_string += "Evaluated at Model Output for {:s}\n SHA: {:s}".format( model_name, current_SHA )
plt.title( title_string )
plt.xlabel( "Log Radius (m)" )
plt.ylabel( "Model autograd dr/dt (m/s)" )
#plt.ylim((-5, 5))
plt.scatter( np.log10( scaled_model_outputs[mask, 0] ), model_drdt, c=droplet_parameters[mask,4], cmap="viridis", s=3, alpha=0.6 )
plt.minorticks_on()
plt.grid( color="black", alpha=0.1 )

ax = fig.axes[0]
plt.colorbar( label="RH", ax=ax )

In [None]:
# There is no cell to graph the linear-scale temperature ranges because they're so enormous that the graph would be useless.

In [None]:
# Mask for what part of the domain to consider.
# Allows digging into where the model matches/deviates
# from the analytical solution.

#mask = droplet_parameters[:, 0] > 1.0e-6
mask = np.full( droplet_count, True )

# Graph the Analytical dTdt vs. log Radius
fig = plt.figure()
title_string = "Analytical Log Absolute Value dr/dt vs. log Radius for {:s}. Colored by RH.\n".format( domain_name )
title_string += "Evaluated at Model Output for {:s}\n SHA: {:s}".format( model_name, current_SHA )
plt.title( title_string )
plt.xlabel( "Log Radius (m)" )
plt.ylabel( "Analytical Log Absolute Value dr/dt (m/s)" )
#plt.ylim((-5,5))
plt.scatter( np.log10( scaled_model_outputs[mask, 0] ), np.log10( np.abs( dydt_data[:, 0] ) ), c=droplet_parameters[mask,4], cmap="viridis", s=3, alpha=0.6 )
plt.minorticks_on()
plt.grid( color="black", alpha=0.1 )

ax = fig.axes[0]
plt.colorbar( label="RH", ax=ax )

# Graph the actual Analytical dTdt vs. log Radius
fig = plt.figure()
title_string = "Model autograd Log Absolute Value dr/dt vs. log Radius for {:s}. Colored by RH.\n".format( domain_name )
title_string += "Evaluated at Model Output for {:s}\n SHA: {:s}".format( model_name, current_SHA )
plt.title( title_string )
plt.xlabel( "Log Radius (m)" )
plt.ylabel( "Model autograd Log Absolute Value dr/dt (m/s)" )
#plt.ylim((-5, 5))
plt.scatter( np.log10( scaled_model_outputs[mask, 0] ), np.log10( np.abs( model_drdt ) ), c=droplet_parameters[mask,4], cmap="viridis", s=3, alpha=0.6 )
plt.minorticks_on()
plt.grid( color="black", alpha=0.1 )

ax = fig.axes[0]
plt.colorbar( label="RH", ax=ax )

In [None]:
# Mask for what part of the domain to consider.
# Allows digging into where the model matches/deviates
# from the analytical solution.

#mask = droplet_parameters[:, 0] > 1.0e-6
mask = np.full( droplet_count, True )

# Graph the Analytical dTdt vs. log Radius
fig = plt.figure()
title_string = "Analytical Log Absolute Value dT/dt vs. log Radius for {:s}. \n".format( domain_name )
title_string += "Evaluated at Model Output for {:s}\n SHA: {:s}".format( model_name, current_SHA )
plt.title( title_string )
plt.xlabel( "Log Radius (m)" )
plt.ylabel( "Analytical Log Absolute Value dT/dt (K/s)" )
plt.ylim( (-5,5) )
plt.scatter( np.log10( scaled_model_outputs[mask, 0] ), np.log10( np.abs( dydt_data[:, 1] ) ), s=3, alpha=0.6 )
plt.minorticks_on()
plt.grid( color="black", alpha=0.1 )

# Graph the actual Analytical dTdt vs. log Radius
fig = plt.figure()
title_string = "Model autograd Log Absolute Value dT/dt vs. log Radius for {:s}. \n".format( domain_name )
title_string += "Evaluated at Model Output for {:s}\n SHA: {:s}".format( model_name, current_SHA )
plt.title( title_string )
plt.xlabel( "Log Radius (m)" )
plt.ylabel( "Model autograd Log Absolute Value dT/dt (K/s)" )
plt.ylim( (-5, 5) )
plt.scatter( np.log10( scaled_model_outputs[mask, 0] ), np.log10( np.abs( model_dTdt ) ), s=3, alpha=0.6 )
plt.minorticks_on()
plt.grid( color="black", alpha=0.1 )