## Evaluating the  Models 

### Primary Goal: Evaluate the ML and BL models

In this notebook, I'll provide a brief tutorial on how to evaluate the final machine learning (ML) and baseline (BL) models. 

In [1]:
# Import packages 
import pandas as pd
import numpy as np
from os.path import join
import joblib
import matplotlib.pyplot as plt

# We add the github package to our system path so we can import python scripts for that repo. 
import sys
#sys.path.append('/home/monte.flora/python_packages/2to6_hr_severe_wx/')
sys.path.append('/home/samuel.varga/projects/2to6_hr_severe_wx/')
sys.path.append('/home/samuel.varga/python_packages/ml_workflow/')
sys.path.append('/home/samuel.varga/python_packages/VargaPy/')
from main.io import load_ml_data, load_bl_data
from main.verification import plot_verification 
from VargaPy.MlUtils import All_Severe, Drop_Unwanted_Variables

In [2]:
# Configuration variables (You'll need to change based on where you store your data)
FRAMEWORK='POTVIN'
TIMESCALE='0to3'
data_path = f'/work/samuel.varga/data/{TIMESCALE}_hr_severe_wx/{FRAMEWORK}'

<div class="alert alert-block alert-danger"> <b>CAUTION</b> We are evaluating the models, so mode must be set to 'test' in load_ml_data or load_bl_data </div>

### Step 1. Load the ML and Baseline Testing Datasets

In [8]:
# Set the target column 
hazard='tornado' #Use tornado to get 0-2UH Baseline
scale='36km' #9,18,36
target_col=f'{hazard}_severe__{scale}'
# Set the mode == test so as to load the testing dataset. 
mode='test'
All=True #Flag for using all-severe as targets
SigSevere=False



if All: #Use all severe as target
    
    X, y, metadata = All_Severe(base_path=data_path, mode=mode, target_scale=scale[0:-2:1], FRAMEWORK=FRAMEWORK, TIMESCALE=TIMESCALE, SigSevere=SigSevere)

else:
    X,y,metadata = load_ml_data(base_path=data_path, 
                            mode=mode, 
                            target_col=target_col)



# Load the testing dataset for the baseline model. 
# The baseline and machine learning models are 
# using the same target values so we do not need 
# initialize the baseline target values. 
bl_df, _, _ = load_bl_data(base_path=data_path, 
                             mode=mode, 
                             target_col=target_col, TIMESCALE=TIMESCALE, Big=False
                            )
bl_col = {'hail_severe' :  'hailcast__nmep_>1_25_45km',
          'wind_severe' : 'ws_80__nmep_>40_45km',
          'tornado_severe' : 'uh_2to5_instant__nmep_>150_27km' 
         } #Need to change this?

##2-6:
#36: 100, 45
#18: 100, 27
#9: 125, 27



#(66309 ,31127)||(15220,6082)
# Get the X input into the baseline model. 
X_bl = bl_df[bl_col[target_col.split('__')[0]]]

35388
63121
66309


In [19]:
X, ts_suff = Drop_Unwanted_Variables(X, original=False, training_scale=False, intrastormOnly=False, envOnly=False)

Using new variables- dropping old 90th percentile
(655392, 174)
all


### Step 2. Load the ML and Baseline models. 

In [24]:
#base_path = f'/work/mflora/ML_2TO6HR/models/'
base_path=f'/work/samuel.varga/projects/{TIMESCALE}_hr_severe_wx/{FRAMEWORK}'
model_ind=2

# Load the ML model.
if False:
    ml_data = joblib.load(join(base_path, f'mlModels/isonly/Varga_all_hist_{hazard}_{scale}.joblib'))
    # Load the baseline model. 
    bl_model = joblib.load(join(base_path,f'blModels/{target_col.split("_")[0]}_baseline_model_{scale}.joblib'))
elif All:
    ml_data=joblib.load(join(base_path, f'mlModels/{scale}/Varga_all_hist_all_{scale}_control_{model_ind}.joblib'))
    bl_model=joblib.load(join(base_path, f'blModels/all_baseline_model_{scale}.joblib'))
else:
    ml_data = joblib.load(join(base_path, f'mlModels/{scale}/{hazard}/Varga_all_hist_{hazard}_{scale}.joblib')) #First Model (Hist)
    # Load the baseline model. 
    bl_model = joblib.load(join(base_path,f'blModels/{target_col.split("_")[0]}_baseline_model_{scale}.joblib'))
# When the ML model is saved by the CalibratedPipelineHyperOptCV package, 
# there are additional metadata that is stored with it. 
# We want to load the model and the features. 
# We want to make sure the X input as the features in correct order. 
ml_model = ml_data['model']
features = ml_data['X'].columns

ml_models = [ml_model] 

for name in ['logistic','random']: #Adds the Logistic and random models to the list
    #ml_data = joblib.load(join(base_path, f'mlModels/{scale}/{hazard}/Varga_all_{name}_{hazard}_{scale}.joblib'))
    ml_data = joblib.load(join(base_path, f'mlModels/{scale}/Varga_all_{name}_all_{scale}_control_{model_ind}.joblib'))
    ml_model = ml_data['model']
    ml_models.append(ml_model)
    
names = ['Hist', 'Logistic', 'Random'] 
estimators = [(name, model) for name, model in zip(names, ml_models)]

X = X[features]

## Step 2a. Load HGBT only and baseline

### Step 3. Evaluate the ML and Baseline models using Reliability, Performance, and ROC Diagrams

In [None]:
# Since X_bl and y are dataframe, we convert them to numpy.arrays 
# by using .values method. 
plot_verification(estimators = estimators, 
                  baseline_estimators = [('BL', bl_model)], 
                  X = X, 
                  X_baseline = X_bl.values, 
                  y=y.values,
                  n_boot=10, SRSPlot=False
                 )

# Uncomment and modify the filename to save the figure. 
#plt.savefig(f'{target_col}.png')

#150 27
#75 9

  vertices = concatenate([
  vertices = concatenate([
  vertices = concatenate([
  vertices = concatenate([
  vertices = concatenate([
  vertices = concatenate([
  vertices = concatenate([
  vertices = concatenate([


In [29]:
print(np.mean(y)*100)

0.5720240710902788
