In [None]:
import os
import multiprocessing

# Limit numpy to 1 thread so that
# we can parallelize the error analysis
# properly
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"

import matplotlib.pyplot as plt
import numpy as np
import torch
import pandas as pd

from droplet_approximation import *

# Likewise limit pytroch to 1 thread
torch.set_num_threads( 1 )
torch.set_num_interop_threads( 1 )


RuntimeError: Error: cannot set number of interop threads after parallel work has started or set_num_interop_threads called

In [None]:
# This model corresponds to "Model Box Uncoupled 400M l1 Residual 14 Epochs" in the group drive
model_load_paths = ["../models/network_box_uncoupled_400M_l1_residual_epoch_14.pth"]

# These ranges correspond to "Model Box Uncoupled 400M l1 Residual 14 Epochs" in the group drive
parameter_ranges = {
    "radius": ( -6.75, -3.00 ),
    "relative_humidity": ( 0.98, 1.11 )
}

# Commit for each model
commit_SHAs = ["369bacc0ba5c7367f17ec71707a1df64afd1b6f5"]

# Change this to fit wherever testing data is stored
particles_root  = "../data/particles"
dirs_per_level  = 256

# This controls how much of the data to load.
# The notebook will load subset_fraction / dirs_per_level
# of the overall dataset.
subset_fraction = 1

# Controls whether to do the graphing/analysis with iterative or direct inference
iterative = True

# Good CUSUM parameters for iterative, could probably be dialed in more
cusum_error_tolerance = np.array( [ 0.02, 0.10 ] )
cusum_error_threshold = np.array( [ 0.08, 0.40 ] )

# Good CUSUM parameters for non-iterative, could probably be dialed in more
#cusum_error_tolerance = np.array( [ 0.005, 0.05 ] )
#cusum_error_threshold = np.array( [ 0.02, 0.20 ] )

norm = standard_norm

# How many processes/batches to run the analysis with
# Defaults number_processes to multiprocessing.cpu_count() - 1
number_processes = 0
number_batches = 1

# How many of the worst particles to graph
number_graphs = 3

# This sets the x-y-z limits for the deviation cluster graph.
# Since there are some deviations at very far flung parts of
# parameter space, without explicitly setting these ranges,
# the deviations are all smooshed together on the graph.
deviation_graph_x_range = ( .0,2.0 )
deviation_graph_y_range = ( -6.75, -3.50 )
deviation_graph_z_range = ( 1.0,1.07 )

In [None]:
if number_processes == 0:
    number_processes = multiprocessing.cpu_count() - 1

NameError: name 'multiprocessing' is not defined

In [None]:
# This is to make it work with the current model. Remove if using a new model
set_parameter_ranges( parameter_ranges )

In [None]:
model_count = len( model_load_paths )

model_names = [ path.split( "/" )[-1].split( "." )[0].replace( "_", " " ) for path in model_load_paths ]

models = [ ResidualNet() for i in range( model_count ) ]

for i in range( model_count ):
    models[i].load_state_dict( torch.load( model_load_paths[i] ) )

In [None]:
# Extract 1/256 of the particles
ids_index = np.fromfile( "../data/particles/particles.index", dtype=np.int32 )
filtered_ids = ids_index[ ( ( ids_index // 256 ) % 256 < subset_fraction ) ]

df = read_particles_data( particles_root, filtered_ids, dirs_per_level )

In [None]:
score_reports = [ ScoringReport( particles_root,
                                 filtered_ids, 
                                 dirs_per_level,
                                 models[model_i], 
                                 model_names[model_i], 
                                 "cpu", 
                                 cusum_error_tolerance=cusum_error_tolerance, 
                                 cusum_error_threshold=cusum_error_threshold,
                                 iterative=iterative,
                                 norm=norm,
                                 number_processes=number_processes,
                                 number_batches=number_batches,
                                 max_clusters=7,
                                 parameter_ranges=parameter_ranges )
                  for model_i in range ( model_count ) ]


In [None]:
for score_report in score_reports:
    # We set precision to 2 because otherwise everything is labeled
    # with very long decimals. We can fix this more thoroughly later
    with np.printoptions( precision=2 ):
        fig,ax = score_report.plot_deviations(label_centers=False)

    fig.set_size_inches( ( 8,12 ) )
    ax.set_ylim3d( deviation_graph_x_range ) 
    ax.set_xlim3d( deviation_graph_y_range ) 
    ax.set_zlim3d( deviation_graph_z_range ) 

In [None]:
print( score_report[0].net_nrsme )

In [None]:
# Sort DF based on first score report
df["nrmse"] = score_reports[0].per_particle_nrmse.values()

In [None]:
from matplotlib import colors


# For each particle, each model will yield 3 figures.
# The first will be the figure against BDF
# The second will be the figure against BE (with deviations highlighted)
# The third will be the figure's CUSUM analysis (calculated within the notebook)

colormap = colors.ListedColormap( ["red", "blue", "green", "orange", "black", "yellow"] )

for particle_df_index in df.nlargest( n=number_graphs, columns="nrmse" ).index:
    particle_df      = df.loc[particle_df_index]
    input_parameters = np.stack( particle_df[[
        "input radii",
        "input temperatures",
        "salt masses",
        "air temperatures",
        "relative humidities",
        "air densities",
        "integration times"
    ]].to_numpy(), axis=-1 )

    mask             = be_success_mask( input_parameters[:, 0] )
    input_parameters = input_parameters[mask]
    times            = np.cumsum( input_parameters[:, -1] )

    if iterative:
        model_outputs = [ do_iterative_inference(
                                input_parameters[:, :-1], 
                                times,
                                models[model_index],
                                "cpu"
                            ) for model_index in range( model_count ) ]
    else:
        model_outputs = [ np.insert( 
                                    do_inference(
                                        input_parameters[:, :-1],
                                        input_parameters[:, -1],
                                        models[model_index],
                                        "cpu"
                                    )[:-1, :],
                                    0, 
                                    input_parameters[0, :2],
                                    axis=0
                                ) for model_index in range( model_count ) ]


    bdf_output = do_iterative_bdf(
        input_parameters[:, :-1],
        times
    )
    be_output  = input_parameters[:, :2]

    normed_model_outputs = [ norm( model_output ) for model_output in model_outputs ]
    normed_bdf_output    = norm( bdf_output )
    normed_be_output     = norm( be_output )

    for model_index in range( model_count ): 
        fig_h_bdf, ax_h_bdf = analyze_model_particle_performance(
            times,
            bdf_output,
            model_outputs[model_index],
            norm
        )

        fig_h_bdf.suptitle( f"Droplet trajectory overview for particle {model_index} on model {model_names[model_index]} vs. BDF\n SHA: {commit_SHAs[model_index]}" ) 

        fig_h_be, ax_h_be = analyze_model_particle_performance(
            times,
            input_parameters[:, :2], 
            model_outputs[model_index],
            norm
        )

        fig_h_be.suptitle( f"Droplet trajectory overview for particle {model_index} on model {model_names[model_index]} vs. BE\n SHA: {commit_SHAs[model_index]}" ) 

        fig_h_cusum, ax_h_cusum = plt.subplots( 2, 2, figsize=(9,8) )
        fig_h_cusum.tight_layout()
        fig_h_cusum.suptitle( f"Droplet trajectory overview part 2 for particle {model_index} on model { model_names[model_index]}\n SHA: {commit_SHAs[model_index] }" ) 

        model_cusum = np.abs( calculate_cusum( ( normed_be_output - normed_model_outputs[model_index] ).T, cusum_error_tolerance ) )

        ax_h_cusum[0][0].set_title("Radius CUSUM chart") 
        ax_h_cusum[0][0].plot( times, model_cusum[0].T, label=["positive radius cusum", "negative radius cusum"] )
        ax_h_cusum[0][0].set_xlabel( "time (s)" )
        ax_h_cusum[0][0].axhline( y=cusum_error_threshold[0], color="red",linewidth=1, linestyle="--",label="cusum divergence threshold" )

        ax_h_cusum[0][0].set_ylabel("CUSUM")

        ax_h_cusum[0][1].plot( times, particle_df["relative humidities"][mask] )
        ax_h_cusum[0][1].set_title( "RH versus time for Particle " + str( model_index ) )
        ax_h_cusum[0][1].set_xlabel( "time (s)" )
        ax_h_cusum[0][1].set_ylabel( "Relative Humidity (%)" ) 


        ax_h_cusum[1][0].plot( times, particle_df["air temperatures"][mask] - particle_df["input temperatures"][mask] )
        ax_h_cusum[1][0].set_title( "Temperature Difference for Parti/cle " + str( model_index ) )
        ax_h_cusum[1][0].set_xlabel( "time (s)" )
        ax_h_cusum[1][0].set_ylabel( "Air Temperature (K)" ) 

        ax_h_cusum[1][1].plot( times, particle_df["air temperatures"][mask] )
        ax_h_cusum[1][1].set_title( "Air Temperatures" + str( model_index ) )
        ax_h_cusum[1][1].set_xlabel( "time (s)" )
        ax_h_cusum[1][1].set_ylabel( "time step (s)" ) 

        for k, deviation_index in enumerate( np.where( score_reports[model_index].deviation_particle_ids == particle_df_index )[0] ):
            deviation_parameter = score_reports[model_index].deviation_parameters[deviation_index]
            deviation_time      = score_reports[model_index].deviation_times[deviation_index]
            deviation_cluster   = score_reports[model_index].deviation_clusters[deviation_index]

            line_label = f"{deviation_parameter.name.lower()} deviation, cluster {deviation_cluster}", 

            ax_h_be[0][0].axvline( x=deviation_time,linewidth=1, linestyle="--", label=line_label, color=colormap( deviation_cluster ) )
            ax_h_be[0][1].axvline( x=deviation_time,linewidth=1, linestyle="--", label=line_label, color=colormap( deviation_cluster ) )
            ax_h_be[1][0].axvline( x=deviation_time,linewidth=1, linestyle="--", label=line_label, color=colormap( deviation_cluster ) )
            ax_h_be[1][1].axvline( x=deviation_time,linewidth=1, linestyle="--", label=line_label, color=colormap( deviation_cluster ) )
                
            ax_h_cusum[0][0].axvline( x=deviation_time, linewidth=1, linestyle="--", label=line_label, color=colormap( deviation_cluster ) )
            ax_h_cusum[0][1].axvline( x=deviation_time, linewidth=1, linestyle="--", label=line_label, color=colormap( deviation_cluster ) )
            ax_h_cusum[1][0].axvline( x=deviation_time, linewidth=1, linestyle="--", label=line_label, color=colormap( deviation_cluster ) )
            ax_h_cusum[1][1].axvline( x=deviation_time, linewidth=1, linestyle="--", label=line_label, color=colormap( deviation_cluster ) )

        ax_h_cusum[0][0].legend()
        ax_h_cusum[0][1].legend()
        ax_h_cusum[1][0].legend()
        ax_h_cusum[1][1].legend()

        ax_h_be[0][0].legend()
        ax_h_be[0][1].legend()
        ax_h_be[1][0].legend()
        ax_h_be[1][1].legend()

        fig_h_cusum.tight_layout()
        fig_h_cusum.show()
