### Generate the Official cb-WoFS Explainability Graphics 

In [1]:
# Box and Whisker plot for the top 5 predictors 
# pink line for a given example. 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from display_names import to_display_name, to_units
import sys
sys.path.insert(0, '/home/monte.flora/python_packages/WoF_post')
sys.path.insert(0, '/home/monte.flora/python_packages/wofs_ml_severe')

from wofs_ml_severe.io.load_ml_models import load_ml_model
from wofs_ml_severe.io.io import load_ml_data
from wofs.post.utils import load_yaml
import matplotlib.ticker as ticker

lookup_file: /home/monte.flora/python_packages/WoF_post/wofs/data/psadilookup.dat
lookup_file: /home/monte.flora/python_packages/WoF_post/wofs/data/psadilookup.dat


In [2]:
import math
import json 

class cbWoFSExplainabilityGraphics:
    def __init__(self, X_train, y_train):
        
        # Only get where the examples are matched to reports.
        inds = np.where(y_train>0)[0]
    
        X_train_subset = X_train.iloc[inds, :]
        X_train_subset.reset_index(drop=True, inplace=True)
        
        #X_train_subset['mid_level_lapse_rate_ens_mean_spatial_mean'] /= -2.7
        #X_train_subset['low_level_lapse_rate_ens_mean_spatial_mean'] /= -3.0
        
        # Convert mid-level temps 
        
        self.X_train = X_train_subset
        self.features = X_train_subset.columns 
        
        self.max_min_val_dict = { }
        
        
    def _round(self, value, mode):
        
        def round_to_nearest_fifth(x):    
            if x < 0:
                return -round_to_nearest_fifth(-x)
            elif x < 1:
                return x
            else:
                if mode=='upper': 
                    return math.ceil(x / 5,) * 5
                else:
                    return math.floor(x / 5,) * 5
        
        if value == 0.0:
            return 0.0, 0
    
        # Find the order of magnitude (oom)
        oom = int(math.log10(abs(value)))

        if oom == 0 or oom == 1: 
            round_int = 1
        if oom > 1:
            round_int = 0
        if oom == -1:
            round_int = 2
        if oom < 0:
            round_int = 3

        # Round to the nearest 5 for >=10 
        if 0 < value < 10:
            return round(value, 1), 1
        else:
            return round_to_nearest_fifth(round(value, round_int)), round_int

    def create(self, feature):
        """Create box-and-whisker graphic for a single feature"""
        units = to_units(feature)
        pretty_name = to_display_name(feature)
    
        f, ax = plt.subplots(dpi=192, nrows=1, 
                           figsize=(800/192, 100/192))
        
        # Despine and only leave the bottom side. 
        for side in ['top', 'right', 'left']: 
            ax.spines[side].set_visible(False)

        # Create the box-and-whiskers 
        whis=[0.1, 99.9]
        box_plot = ax.boxplot(x=self.X_train[feature], vert=False, 
                              whis=whis, patch_artist=True, 
                              widths=0.3, showfliers=False )
        
        # Create a title for the feature name. 
        ax.annotate(f'{pretty_name} ({units})', xy=(1.0, 1.15),
                    xycoords='axes fraction', fontsize=6, ha='right', color = 'k', fontweight='bold')
        
        # Remove y tick labels 
        ax.set_yticks([],)
        ax.tick_params(axis='x', labelsize=9, size=8)
        min_val, _ = self._round(np.nanpercentile(self.X_train[feature], whis[0]), 'lower')
        max_val, round_int = self._round(np.nanpercentile(self.X_train[feature], whis[-1]), 'upper')
        
        self.max_min_val_dict[feature] = {'max_val' : max_val,
                                          'min_val' : min_val, 
                                          'round_int' : round_int}
        
        # Identify pretty tick ranges 
        ax.set_xlim(min_val, max_val)
        
        # set the tick locator for the x-axis
        ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=7))
        
        rng = list(ax.get_xticks())
        if round_int == 0: 
            levels = [f"{v:.0f}" for v in rng]
        elif round_int == 1: 
            levels = [f"{v:.1f}" for v in rng]
        elif round_int == 2: 
            levels = [f"{v:.2f}" for v in rng]
        else:
            levels = [f"{v:.3f}" for v in rng]
        
        levels[0] = ''; levels[-1] = ''
        ax.set_xticks(rng)
        ax.set_xticklabels(labels=levels)

        # fill with colors
        color = 'xkcd:medium blue'
        for patch in box_plot['boxes']:
            patch.set_facecolor(color)
        for line in box_plot['medians']:
            line.set_color('k')

        return f, ax
    
    def save(self, fig, feature, target):
        plt.savefig(
            f"new_graphics/{feature.lower().replace(' ', '_')}_{target}_explainability_background.png", 
            format="png", dpi=192, bbox_inches="tight")
        plt.close(fig) 
        
    def save_json(self, target): 
        with open(f"new_graphics/min_max_vals_{target}.json", "w") as outfile:
            json.dump(self.max_min_val_dict, outfile)

### Create the Explainability Graphic Per Feature For Each Hazard

1. Only using the severe hail dataset 

In [4]:
import joblib
model_dict = joblib.load(
    '/work/mflora/ML_DATA/NEW_ML_MODELS/LogisticRegression_wind_severe_0km_None_second_hour_realtime.joblib')

In [3]:
import sys
sys.path.insert(0, '/home/monte.flora/python_packages/wofs_ml_severe')
sys.path.insert(0, '/home/monte.flora/python_packages/WoF_post')
from wofs_ml_severe.io.io import load_ml_data


targets = ['wind_severe_0km']#, 'hail_severe_0km', 'hail_severe_0km', 'all_severe', 'all_sig_severe']

for target in targets:
    print(target.upper())
    X_train, y_train, metadata = load_ml_data(target, 
                 lead_time = 'first_hour', 
                 mode = None, 
                 baseline=False,
                 return_only_df=False, 
                 load_reduced=True, 
                 base_path = '/work/mflora/ML_DATA/DATA',
                )
    explainer = cbWoFSExplainabilityGraphics(X_train.astype(float), y_train)
    
    for feature in X_train.columns: 
        fig, _ = explainer.create(feature)
        explainer.save(fig, feature, target)
        
    explainer.save_json(target)    

WIND_SEVERE_0KM
Only keeping warm season cases for the official training!


### Create the top 5 predictor global panel 