# Bootstrap Evaluation (Diatom)

## Importing

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy

import os
import lzma
import dill

from tqdm import tqdm

import cmocean.cm as cm
import salishsea_tools.viz_tools as sa_vi

from sklearn.utils import resample


## Plotting (regions)

In [2]:
def plot_box(ax, corn, colour):

    ax.plot([corn[2], corn[3], corn[3], corn[2], corn[2]], 
    [corn[0], corn[0], corn[1], corn[1], corn[0]], '-', color=colour)
    

## Plotting (histograms)

In [3]:
def plot_hist (name1, variable1, name2, variable2, title, boxnames):

    stats1 = []
    stats2 = []

    fig, axs = plt.subplots(1,len(boxnames), figsize = (20,6), layout='tight')
    
    for j in range(0, len(boxnames)):

        if j == 0: # So that they appear only once
        
            axs[j].hist(variable1[:,j], histtype='step', label=name1)
            stats1.append(scipy.stats.describe(variable1[:,j]))

            axs[j].hist(variable2[:,j], histtype='step', label=name2)
            stats2.append(scipy.stats.describe(variable2[:,j]))

            axs[j].set_ylabel('Frequency')
        
        else:

            axs[j].hist(variable1[:,j], histtype='step')
            stats1.append(scipy.stats.describe(variable1[:,j]))

            axs[j].hist(variable2[:,j], histtype='step')
            stats2.append(scipy.stats.describe(variable2[:,j]))
            
        axs[j].set_title(boxnames[j])

        fig.suptitle(title)
        fig.legend(ncol=2)

    stats1 = np.array(stats1, dtype=object)
    stats2 = np.array(stats2, dtype=object)

    return (stats1,stats2)

## Initiation

In [4]:
name = 'Flagellate'
units = '[mmol m-2]'
category = 'Concentrations'

ds = xr.open_dataset('/data/ibougoudis/MOAD/files/integrated_original.nc')
ds2 = xr.open_dataset('/data/ibougoudis/MOAD/files/external_inputs.nc')


## Regions

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 9))
mycmap = cm.deep
mycmap.set_bad('grey')
ax.pcolormesh(ds[name][0], cmap=mycmap)
sa_vi.set_aspect(ax)

SoG_north = [650, 730, 100, 200]
plot_box(ax, SoG_north, 'g')
SoG_center = [450, 550, 200, 300]
plot_box(ax, SoG_center, 'b')
Fraser_plume = [380, 460, 260, 330]
plot_box(ax, Fraser_plume, 'm')
SoG_south = [320, 380, 280, 350]
plot_box(ax, SoG_south, 'k')
Haro_Boundary = [290, 350, 210, 280]
plot_box(ax, Haro_Boundary, 'm')
JdF_west = [250, 425, 25, 125]
plot_box(ax, JdF_west, 'c')
JdF_east = [200, 290, 150, 260]
plot_box(ax, JdF_east, 'w')
PS_all = [0, 200, 80, 320]
plot_box(ax, PS_all, 'm')
PS_main = [20, 150, 200, 280]
plot_box(ax, PS_main, 'r')

boxnames = ['SoG_north','SoG_center','Fraser_plume','SoG_south', 'Haro_Boundary', 'JdF_west', 'JdF_east', 'PS_all', 'PS_main']
fig.legend(boxnames)

boxes = [SoG_north,SoG_center,Fraser_plume,SoG_south,Haro_Boundary,JdF_west,JdF_east,PS_all,PS_main]

regions0 = np.full((len(ds.y),len(ds.x)),np.nan)

for i in range (0, len(boxes)):
    regions0[boxes[i][0]:boxes[i][1], boxes[i][2]:boxes[i][3]] = i

regions0 = xr.DataArray(regions0,dims = ['y','x'])

# # Low resolution
# temp = []

# for i in boxes:
#     temp.append([x//5 for x in i])

# boxes = temp

## Loading

In [11]:
path = '/data/ibougoudis/MOAD/files/results/' + name + '/func_reg_boxes_s1_boot/'

with open(path + 'train_metrics.pkl', 'rb') as f:
    train1 = dill.load(f)
with open(path + 'test_metrics.pkl', 'rb') as f:
    test1 = dill.load(f)
name1 = 'original'

path = '/data/ibougoudis/MOAD/files/results/' + name + '/func_reg_boxes_s3_boot/'
with open(path + 'train_metrics.pkl', 'rb') as f:
    train2 = dill.load(f)
with open(path + 'test_metrics.pkl', 'rb') as f:
    test2 = dill.load(f)
name2 = 'original (Diatom)'

## Histograms

In [None]:
r_train1, r_train2 = plot_hist(name1, train1[0], name2, train2[0], 'Correlation Coefficient (Training)', boxnames)
rms_train1, rms_train2 = plot_hist(name1, train1[1], name2, train2[1], 'Root Mean Square Error (Training)', boxnames)
r_train_season1, r_train_season2 = plot_hist(name1, train1[3], name2, train2[3], 'Correlation Coefficient (Training, no seasonality)', boxnames)

r_test1, r_test2 = plot_hist(name1, test1[0], name2, test2[0], 'Correlation Coefficient (Testing)', boxnames)
rms_test1, rms_test2 = plot_hist(name1, test1[1], name2, test2[1], 'Root Mean Square Error (Testing)', boxnames)
r_test_season1, r_test_season2 = plot_hist(name1, test1[3], name2, test2[3], 'Correlation Coefficient (Testing, no seasonality)', boxnames)
spatial_error_test1, spatial_error_test2 = plot_hist(name1, test1[5], name2, test2[5], 'Spatial Error (Testing)', boxnames)


In [None]:
temp = pd.DataFrame([r_train1[:,2],r_train2[:,2]],columns=boxnames,index=[name1,name2])
temp = temp.style.set_caption('Correlation Coefficient (Training)')
display(temp)

In [None]:
temp = pd.DataFrame([r_test1[:,2],r_test2[:,2]],columns=boxnames,index=[name1,name2])
temp = temp.style.set_caption('Correlation Coefficient (Testing)')
display(temp)