In [1]:
!pwd

/Users/jerrylin/Desktop/OfflineWork/hypersampling


# Import packages

In [2]:
import numpy as np
import pandas as pd
import xarray as xr
from scipy import stats
import scipy
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.pylab as plb
from scipy.stats import ranksums
import os
import pickle
from scipy.stats import norm
from scipy.stats import spearmanr
from scipy.stats import mannwhitneyu
from scipy.stats import gaussian_kde
from scipy.signal import argrelextrema

from tqdm import tqdm

from preprocessing_functions import *

# Load data

In [3]:
color_dict = {
    'standard': 'blue',
    'specific': 'lime',
    'nomemory': 'red',
    'nowind': 'green',
    'noozone': 'purple',
    'nocoszrs': 'orange',
    'mae': 'pink',
    'nodropout': 'silver',
    'multiclimate': 'gold'
}

color_dict2 = {
    'standard': 'blue',
    'specific': 'lime',
    'no memory': 'red',
    'no wind': 'green',
    'no ozone': 'purple',
    'no zenith angle': 'orange',
    'MAE':'pink',
    'no dropout': 'silver',
    'multiclimate': 'gold'
}

label_dict = {
    'standard': 'standard',
    'specific': 'specific humidity',
    'nomemory': 'no memory',
    'nowind': 'no wind',
    'noozone': 'no ozone',
    'nocoszrs': 'no zenith angle',
    'mae': 'MAE',
    'nodropout': 'no dropout',
    'multiclimate': 'multiclimate'
}

config_names = ['standard', 'specific humidity', 'no memory', 'no wind', 'no ozone', 'no zenith angle', 'MAE', 'no dropout', 'multiclimate']

average_heating_rmse = 4.567616448322416e-05
average_moistening_rmse = 2.8896629675143484e-05
martingale_heating_rmse = 3.1405422356850924e-05
martingale_moistening_rmse = 2.9224563847088255e-05
mlr_heating_rmse = 2.600916471955618e-05
mlr_moistening_rmse = 2.229119069533864e-05

def get_model_info(config_name):
    model_info = pd.read_pickle(f'{config_name}/{config_name}_df.pandas.pkl')
    with open(f'{config_name}/prognostic_T.pkl', 'rb') as f:
        prognostic_T = pickle.load(f)
    with open(f'{config_name}/prognostic_Q.pkl', 'rb') as f:
        prognostic_Q = pickle.load(f)
    return model_info, prognostic_T, prognostic_Q

def get_drift_col(prognostic_diffs, model_info, name, threshold):
    return pd.Series([sum(prognostic_diffs[x]>threshold)!=0 for x in prognostic_diffs], name = name, index = model_info.index)

standard_model_info, standard_prognostic_T, standard_prognostic_Q = get_model_info('standard')
specific_model_info, specific_prognostic_T, specific_prognostic_Q = get_model_info('specific')
nomemory_model_info, nomemory_prognostic_T, nomemory_prognostic_Q = get_model_info('nomemory')
nowind_model_info, nowind_prognostic_T, nowind_prognostic_Q = get_model_info('nowind')
noozone_model_info, noozone_prognostic_T, noozone_prognostic_Q = get_model_info('noozone')
nocoszrs_model_info, nocoszrs_prognostic_T, nocoszrs_prognostic_Q = get_model_info('nocoszrs')
mae_model_info, mae_prognostic_T, mae_prognostic_Q = get_model_info('mae')
nodropout_model_info, nodropout_prognostic_T, nodropout_prognostic_Q = get_model_info('nodropout')
multiclimate_model_info, multiclimate_prognostic_T, multiclimate_prognostic_Q = get_model_info('multiclimate')

config_dict = {
    'standard': standard_model_info,
    'specific': specific_model_info,
    'nomemory': nomemory_model_info,
    'nowind': nowind_model_info,
    'noozone': noozone_model_info,
    'nocoszrs': nocoszrs_model_info,
    'mae': mae_model_info,
    'nodropout': nodropout_model_info,
    'multiclimate': multiclimate_model_info
}

survived_dict = {
    'standard': standard_model_info[standard_model_info['num_months']==12].copy(),
    'specific': specific_model_info[specific_model_info['num_months']==12].copy(),
    'nomemory': nomemory_model_info[nomemory_model_info['num_months']==12].copy(),
    'nowind': nowind_model_info[nowind_model_info['num_months']==12].copy(),
    'noozone': noozone_model_info[noozone_model_info['num_months']==12].copy(),
    'nocoszrs': nocoszrs_model_info[nocoszrs_model_info['num_months']==12].copy(),
    'mae': mae_model_info[mae_model_info['num_months']==12].copy(),
    'nodropout': nodropout_model_info[nodropout_model_info['num_months']==12].copy(),
    'multiclimate': multiclimate_model_info[multiclimate_model_info['num_months']==12].copy()
}

for config_name in survived_dict:
    survived_dict[config_name].loc[:, 'config_name'] = config_name

combined_df = pd.concat([survived_dict['standard'], \
                         survived_dict['specific'], \
                         survived_dict['nomemory'], \
                         survived_dict['nowind'], \
                         survived_dict['noozone'], \
                         survived_dict['nocoszrs'], \
                         survived_dict['mae'], \
                         survived_dict['nodropout'], \
                         survived_dict['multiclimate']])

online_temperature_overall = pd.concat([survived_dict['standard']['online_temperature'], \
                                        survived_dict['specific']['offline_moistening'], \
                                        survived_dict['nomemory']['online_temperature'], \
                                        survived_dict['nowind']['online_temperature'], \
                                        survived_dict['noozone']['online_temperature'], \
                                        survived_dict['nocoszrs']['online_temperature'], \
                                        survived_dict['mae']['online_temperature'], \
                                        survived_dict['nodropout']['online_temperature'], \
                                        survived_dict['multiclimate']['online_temperature']], axis = 0)
                            
online_moisture_overall = pd.concat([survived_dict['standard']['online_moisture'], \
                                     survived_dict['specific']['offline_moistening'], \
                                     survived_dict['nomemory']['online_moisture'], \
                                     survived_dict['nowind']['online_moisture'], \
                                     survived_dict['noozone']['online_moisture'], \
                                     survived_dict['nocoszrs']['online_moisture'], \
                                     survived_dict['mae']['online_moisture'], \
                                     survived_dict['nodropout']['online_moisture'], \
                                     survived_dict['multiclimate']['online_moisture']], axis = 0)

def get_top_models(config_name):
    top_models = set(sorted(list(survived_dict[config_name].sort_values('online_temperature').head().index) + list(survived_dict[config_name].sort_values('online_moisture').head().index)))
    print(len(top_models))
    return top_models


In [4]:
get_top_models('standard')

9


{5, 14, 17, 26, 27, 38, 46, 50, 155}

In [5]:
get_top_models('specific')

8


{5, 11, 16, 48, 53, 92, 118, 150}

In [6]:
get_top_models('nomemory')

8


{7, 12, 16, 22, 25, 48, 96, 98}

In [7]:
get_top_models('nowind')

8


{1, 54, 59, 100, 101, 105, 108, 113}

In [8]:
get_top_models('noozone')

9


{2, 10, 22, 46, 51, 65, 66, 95, 109}

In [9]:
get_top_models('nocoszrs')

8


{4, 14, 16, 34, 50, 61, 77, 120}

In [10]:
get_top_models('mae')

6


{2, 7, 8, 17, 18, 19}

In [11]:
get_top_models('nodropout')

10


{5, 28, 152, 158, 204, 217, 269, 278, 301, 317}

In [12]:
get_top_models('multiclimate')

9


{13, 24, 43, 50, 59, 70, 81, 98, 108}

In [21]:
standard_model_info['offline_heating'].describe()['std']**2

4.697489023621918e-13

In [28]:
standard_model_info['offline_heating'].describe()

count    3.300000e+02
mean     2.233679e-05
std      6.853823e-07
min      2.096917e-05
25%      2.180873e-05
50%      2.224929e-05
75%      2.282469e-05
max      2.447762e-05
Name: offline_heating, dtype: float64

In [20]:
standard_model_info['offline_moistening'].describe()['std']**2

1.8422675403501213e-13

In [29]:
standard_model_info['offline_moistening'].describe()

count    3.300000e+02
mean     2.040827e-05
std      4.292164e-07
min      1.944032e-05
25%      2.008001e-05
50%      2.041133e-05
75%      2.073622e-05
max      2.161173e-05
Name: offline_moistening, dtype: float64

In [19]:
nodropout_model_info['offline_heating'].describe()['std']**2

1.0700628951486035e-13

In [41]:
multiclimate_model_info['offline_heating'].describe()

count    3.300000e+02
mean     2.232633e-05
std      7.615547e-07
min      2.087760e-05
25%      2.173532e-05
50%      2.227286e-05
75%      2.283992e-05
max      2.470938e-05
Name: offline_heating, dtype: float64

In [30]:
nodropout_model_info['offline_heating'].describe()

count    3.300000e+02
mean     2.145106e-05
std      3.271182e-07
min      2.080738e-05
25%      2.125139e-05
50%      2.140811e-05
75%      2.158145e-05
max      2.394297e-05
Name: offline_heating, dtype: float64

In [23]:
nodropout_model_info['offline_moistening'].describe()['std']**2

4.608093124020497e-14

In [53]:
mae_model_info['offline_moistening'].describe()

count    3.300000e+02
mean     2.029807e-05
std      6.122576e-07
min      1.908866e-05
25%      1.985188e-05
50%      2.030701e-05
75%      2.077047e-05
max      2.193616e-05
Name: offline_moistening, dtype: float64

In [31]:
nodropout_model_info['offline_moistening'].describe()

count    3.300000e+02
mean     1.977750e-05
std      2.146647e-07
min      1.934995e-05
25%      1.964927e-05
50%      1.975770e-05
75%      1.986903e-05
max      2.134857e-05
Name: offline_moistening, dtype: float64

In [22]:
(4.697489023621918e-13 - 1.0700628951486035e-13)/4.697489023621918e-13

0.7722053442237636

In [24]:
(1.8422675403501213e-13 - 4.608093124020497e-14)/1.8422675403501213e-13

0.7498684081930503

In [25]:
np.var([.1,.2,.3,.4,.5,.6,.7,.8,.9,1])

0.02

In [27]:
np.var([.1,.11,.12,.13,.14,.15,.16,.17,.18,1])

0.067164

# Offline story

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(10, 10))
title_fontsize = 12.1

heating_standard, = axs[0,0].plot(np.arange(330), standard_model_info['offline_heating'], color = color_dict['standard'], label='standard')
heating_nowind, = axs[0,0].plot(np.arange(330), nowind_model_info['offline_heating'], color = color_dict['nowind'], label='no wind')
heating_noozone, = axs[0,0].plot(np.arange(330), noozone_model_info['offline_heating'], color = color_dict['noozone'], label='no ozone')
heating_nocoszrs, = axs[0,0].plot(np.arange(330), nocoszrs_model_info['offline_heating'], color = color_dict['nocoszrs'], label='no zenith angle')

heating_standard, = axs[1,0].plot(np.arange(330), standard_model_info['offline_heating'], color = color_dict['standard'], label='standard')
heating_specific = axs[1,0].plot(np.arange(330), specific_model_info['offline_heating'], color = color_dict['specific'], label='specific humidity')
heating_mae, = axs[1,0].plot(np.arange(330), mae_model_info['offline_heating'], color = color_dict['mae'], label='MAE')
heating_nodropout, = axs[1,0].plot(np.arange(330), nodropout_model_info['offline_heating'], color = color_dict['nodropout'], label='no dropout')
heating_multiclimate, = axs[1,0].plot(np.arange(330), multiclimate_model_info['offline_heating'], color = color_dict['multiclimate'], label='multiclimate')

moistening_standard, = axs[0,1].plot(np.arange(330), standard_model_info['offline_moistening'], color = color_dict['standard'], label='standard')
moistening_nowind, = axs[0,1].plot(np.arange(330), nowind_model_info['offline_moistening'], color = color_dict['nowind'], label='no wind')
moistening_noozone, = axs[0,1].plot(np.arange(330), noozone_model_info['offline_moistening'], color = color_dict['noozone'], label='no ozone')
moistening_nocoszrs, = axs[0,1].plot(np.arange(330), nocoszrs_model_info['offline_moistening'], color = color_dict['nocoszrs'], label='no zenith angle')

moistening_standard, = axs[1,1].plot(np.arange(330), standard_model_info['offline_moistening'], color = color_dict['standard'], label='standard')
moistening_specific, = axs[1,1].plot(np.arange(330), specific_model_info['offline_moistening'], color = color_dict['specific'], label='specific humidity')
moistening_mae, = axs[1,1].plot(np.arange(330), mae_model_info['offline_moistening'], color = color_dict['mae'], label='MAE')
moistening_nodropout, = axs[1,1].plot(np.arange(330), nodropout_model_info['offline_moistening'], color = color_dict['nodropout'], label='no dropout')
moistening_multiclimate, = axs[1,1].plot(np.arange(330), multiclimate_model_info['offline_moistening'], color = color_dict['multiclimate'], label='multiclimate')

axs[0,0].yaxis.get_offset_text().set_size(8) 
axs[0,0].set_title('Offline Heating Root Mean Squared Error', fontsize = title_fontsize)
axs[1,0].set_xlabel('model rank', fontsize = 12.5)
axs[0,0].set_ylabel('K/s')
axs[1,0].set_ylabel('K/s')
axs[0,0].set_ylim(2e-5, 2.5e-5)
axs[1,0].set_ylim(2e-5, 2.5e-5)
axs[0,0].grid(True, which="both", ls="--")
axs[1,0].grid(True, which="both", ls="--")

axs[0,1].yaxis.get_offset_text().set_size(8) 
axs[0,1].set_title('Offline Moistening Root Mean Squared Error', fontsize = title_fontsize)
axs[1,1].set_xlabel('model rank', fontsize = 12.5)
axs[0,1].set_ylabel('g/kg')
axs[1,1].set_ylabel('g/kg')
axs[0,1].set_ylim(1.8e-5, 2.4e-5)
axs[1,1].set_ylim(1.8e-5, 2.4e-5)
axs[0,1].grid(True, which="both", ls="--")
axs[1,1].grid(True, which="both", ls="--")

axs[0,1].legend([moistening_standard, \
               moistening_nowind, \
               moistening_noozone, \
               moistening_nocoszrs], \
               ['standard', 'no wind', 'no ozone', 'no zenith angle'], loc='upper right', fontsize = 11.3)
axs[1,1].legend([moistening_standard, \
               moistening_specific, \
               moistening_mae, \
               moistening_nodropout, \
               moistening_multiclimate], \
               ['standard', 'specific humidity', 'MAE', 'no dropout', 'multiclimate'], loc='upper right', fontsize = 11.3)
sublabels = ['a', 'b', 'c', 'd']
for i, ax in enumerate(axs.flat):
    ax.text(-0.1, 1.05, sublabels[i], transform=ax.transAxes,
            fontsize=14, va='top', ha='right')

plt.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(3, 2, figsize=(10, 15))
title_fontsize = 12.1

heating_standard, = axs[0,0].plot(np.arange(330), standard_model_info['offline_heating'], color = color_dict['standard'], label='standard')
heating_specific = axs[0,0].plot(np.arange(330), specific_model_info['offline_heating'], color = color_dict['specific'], label='specific humidity')
heating_nomemory, = axs[0,0].plot(np.arange(330), nomemory_model_info['offline_heating'], color = color_dict['nomemory'], label='no memory')
heating_nowind, = axs[0,0].plot(np.arange(330), nowind_model_info['offline_heating'], color = color_dict['nowind'], label='no wind')
heating_noozone, = axs[0,0].plot(np.arange(330), noozone_model_info['offline_heating'], color = color_dict['noozone'], label='no ozone')
heating_nocoszrs, = axs[0,0].plot(np.arange(330), nocoszrs_model_info['offline_heating'], color = color_dict['nocoszrs'], label='no zenith angle')
heating_mae, = axs[0,0].plot(np.arange(330), mae_model_info['offline_heating'], color = color_dict['mae'], label='MAE')
heating_nodropout, = axs[0,0].plot(np.arange(330), nodropout_model_info['offline_heating'], color = color_dict['nodropout'], label='no dropout')
heating_multiclimate, = axs[0,0].plot(np.arange(330), multiclimate_model_info['offline_heating'], color = color_dict['multiclimate'], label='multiclimate')

heating_standard, = axs[1,0].plot(np.arange(330), standard_model_info['offline_heating'], color = color_dict['standard'], label='standard')
heating_nowind, = axs[1,0].plot(np.arange(330), nowind_model_info['offline_heating'], color = color_dict['nowind'], label='no wind')
heating_noozone, = axs[1,0].plot(np.arange(330), noozone_model_info['offline_heating'], color = color_dict['noozone'], label='no ozone')
heating_nocoszrs, = axs[1,0].plot(np.arange(330), nocoszrs_model_info['offline_heating'], color = color_dict['nocoszrs'], label='no zenith angle')

heating_standard, = axs[2,0].plot(np.arange(330), standard_model_info['offline_heating'], color = color_dict['standard'], label='standard')
heating_specific = axs[2,0].plot(np.arange(330), specific_model_info['offline_heating'], color = color_dict['specific'], label='specific humidity')
heating_mae, = axs[2,0].plot(np.arange(330), mae_model_info['offline_heating'], color = color_dict['mae'], label='MAE')
heating_nodropout, = axs[2,0].plot(np.arange(330), nodropout_model_info['offline_heating'], color = color_dict['nodropout'], label='no dropout')
heating_multiclimate, = axs[2,0].plot(np.arange(330), multiclimate_model_info['offline_heating'], color = color_dict['multiclimate'], label='multiclimate')

moistening_standard, = axs[0,1].plot(np.arange(330), standard_model_info['offline_moistening'], color = color_dict['standard'], label='standard')
moistening_specific, = axs[0,1].plot(np.arange(330), specific_model_info['offline_moistening'], color = color_dict['specific'], label='specific humidity')
moistening_nomemory, = axs[0,1].plot(np.arange(330), nomemory_model_info['offline_moistening'], color = color_dict['nomemory'], label='no memory')
moistening_nowind, = axs[0,1].plot(np.arange(330), nowind_model_info['offline_moistening'], color = color_dict['nowind'], label='no wind')
moistening_noozone, = axs[0,1].plot(np.arange(330), noozone_model_info['offline_moistening'], color = color_dict['noozone'], label='no ozone')
moistening_nocoszrs, = axs[0,1].plot(np.arange(330), nocoszrs_model_info['offline_moistening'], color = color_dict['nocoszrs'], label='no zenith angle')
moistening_mae, = axs[0,1].plot(np.arange(330), mae_model_info['offline_moistening'], color = color_dict['mae'], label='MAE')
moistening_nodropout, = axs[0,1].plot(np.arange(330), nodropout_model_info['offline_moistening'], color = color_dict['nodropout'], label='no dropout')
moistening_multiclimate, = axs[0,1].plot(np.arange(330), multiclimate_model_info['offline_moistening'], color = color_dict['multiclimate'], label='multiclimate')

moistening_standard, = axs[1,1].plot(np.arange(330), standard_model_info['offline_moistening'], color = color_dict['standard'], label='standard')
moistening_nowind, = axs[1,1].plot(np.arange(330), nowind_model_info['offline_moistening'], color = color_dict['nowind'], label='no wind')
moistening_noozone, = axs[1,1].plot(np.arange(330), noozone_model_info['offline_moistening'], color = color_dict['noozone'], label='no ozone')
moistening_nocoszrs, = axs[1,1].plot(np.arange(330), nocoszrs_model_info['offline_moistening'], color = color_dict['nocoszrs'], label='no zenith angle')

moistening_standard, = axs[2,1].plot(np.arange(330), standard_model_info['offline_moistening'], color = color_dict['standard'], label='standard')
moistening_specific, = axs[2,1].plot(np.arange(330), specific_model_info['offline_moistening'], color = color_dict['specific'], label='specific humidity')
moistening_mae, = axs[2,1].plot(np.arange(330), mae_model_info['offline_moistening'], color = color_dict['mae'], label='MAE')
moistening_nodropout, = axs[2,1].plot(np.arange(330), nodropout_model_info['offline_moistening'], color = color_dict['nodropout'], label='no dropout')
moistening_multiclimate, = axs[2,1].plot(np.arange(330), multiclimate_model_info['offline_moistening'], color = color_dict['multiclimate'], label='multiclimate')

axs[0,0].yaxis.get_offset_text().set_size(8) 
axs[0,0].set_title('Offline Heating Root Mean Squared Error', fontsize = title_fontsize)
axs[2,0].set_xlabel('model rank', fontsize = 12.5)
axs[0,0].set_ylabel('K/s')
axs[1,0].set_ylabel('K/s')
axs[2,0].set_ylabel('K/s')
axs[0,0].set_ylim(1e-5, 5e-5)
axs[1,0].set_ylim(2e-5, 2.5e-5)
axs[2,0].set_ylim(2e-5, 2.5e-5)
axs[0,0].grid(True, which="both", ls="--")
axs[1,0].grid(True, which="both", ls="--")
axs[2,0].grid(True, which="both", ls="--")
axs[0,0].axhline(y = average_heating_rmse, color='#8B4513', linestyle='--', label='average heating RMSE')
axs[0,0].axhline(y = martingale_heating_rmse, color='#FF00FF', linestyle='--', label='martingale heating RMSE')
axs[0,0].axhline(y = mlr_heating_rmse, color='#00FFFF', linestyle='--', label='MLR heating RMSE')

axs[0,1].yaxis.get_offset_text().set_size(8) 
axs[0,1].set_title('Offline Moistening Root Mean Squared Error', fontsize = title_fontsize)
axs[2,1].set_xlabel('model rank', fontsize = 12.5)
axs[0,1].set_ylabel('g/kg')
axs[1,1].set_ylabel('g/kg')
axs[2,1].set_ylabel('g/kg')
axs[0,1].set_ylim(1e-5, 5e-5)
axs[1,1].set_ylim(1.8e-5, 2.4e-5)
axs[2,1].set_ylim(1.8e-5, 2.4e-5)
axs[0,1].grid(True, which="both", ls="--")
axs[1,1].grid(True, which="both", ls="--")
axs[2,1].grid(True, which="both", ls="--")
line_average = axs[0,1].axhline(y = average_moistening_rmse, color='#8B4513', linestyle='--', label='average moistening RMSE')
line_martingale = axs[0,1].axhline(y = martingale_moistening_rmse, color='#FF00FF', linestyle='--', label='martingale moistening RMSE')
line_mlr = axs[0,1].axhline(y = mlr_moistening_rmse, color='#00FFFF', linestyle='--', label='MLR moistening RMSE')
axs[0,1].legend([line_average, \
               line_martingale, \
               line_mlr, \
               moistening_standard, \
               moistening_specific, \
               moistening_nomemory, \
               moistening_nowind, \
               moistening_noozone, \
               moistening_nocoszrs, \
               moistening_mae, \
               moistening_nodropout, \
               moistening_multiclimate], \
               ['average', 'martingale', 'MLR'] + config_names, loc='upper right', ncol = 2, fontsize = 11.3)
axs[1,1].legend([moistening_standard, \
               moistening_nowind, \
               moistening_noozone, \
               moistening_nocoszrs], \
               ['standard', 'no wind', 'no ozone', 'no zenith angle'], loc='upper right', fontsize = 11.3)
axs[2,1].legend([moistening_standard, \
               moistening_specific, \
               moistening_mae, \
               moistening_nodropout, \
               moistening_multiclimate], \
               ['standard', 'specific humidity', 'MAE', 'no dropout', 'multiclimate'], loc='upper right', fontsize = 11.3)
sublabels = ['a', 'b', 'c', 'd', 'e', 'f']
for i, ax in enumerate(axs.flat):
    ax.text(-0.1, 1.05, sublabels[i], transform=ax.transAxes,
            fontsize=14, va='top', ha='right')

plt.tight_layout()
plt.show()

In [None]:
plt.plot(np.arange(330), standard_model_info['offline_heating'], color = color_dict['standard'], label='standard')
plt.plot(np.arange(330), nowind_model_info['offline_heating'], color = color_dict['nowind'], label='no wind')
plt.plot(np.arange(330), noozone_model_info['offline_heating'], color = color_dict['noozone'], label='no ozone')
plt.plot(np.arange(330), nocoszrs_model_info['offline_heating'], color = color_dict['nocoszrs'], label='no zenith angle')
plt.plot(np.arange(330), multiclimate_model_info['offline_heating'], color = color_dict['multiclimate'], label='multiclimate')
plt.legend()

# Online story

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(14, 8))
alpha = .5
# Define the number of bins and bin edges
bins = np.linspace(0, 10, 50)

# Plot histograms
axs[0,0].hist(survived_dict['standard']['online_temperature'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[0,0].hist(survived_dict['specific']['online_temperature'], bins = bins, label=label_dict['specific'], color = color_dict['specific'], alpha = alpha)
axs[0,0].set_ylim(0, 30)
axs[0,0].set_xlim(0, 10)
axs[0,0].set_ylabel('count')

axs[0,0].legend()

axs[0,1].hist(survived_dict['standard']['online_temperature'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[0,1].hist(survived_dict['nomemory']['online_temperature'], bins = bins, label=label_dict['nomemory'], color = color_dict['nomemory'], alpha = alpha)
axs[0,1].set_ylim(0, 30)
axs[0,1].set_xlim(0, 10)
axs[0,1].legend()

axs[0,2].hist(survived_dict['standard']['online_temperature'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[0,2].hist(survived_dict['nowind']['online_temperature'], bins = bins, label=label_dict['nowind'], color = color_dict['nowind'], alpha = alpha)
axs[0,2].set_ylim(0, 30)
axs[0,2].set_xlim(0, 10)
axs[0,2].legend()

axs[0,3].hist(survived_dict['standard']['online_temperature'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[0,3].hist(survived_dict['noozone']['online_temperature'], bins = bins, label=label_dict['noozone'], color = color_dict['noozone'], alpha = alpha)
axs[0,3].set_ylim(0, 30)
axs[0,3].set_xlim(0, 10)
axs[0,3].legend()

axs[1,0].hist(survived_dict['standard']['online_temperature'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[1,0].hist(survived_dict['nocoszrs']['online_temperature'], bins = bins, label=label_dict['nocoszrs'], color = color_dict['nocoszrs'], alpha = alpha)
axs[1,0].set_ylim(0, 30)
axs[1,0].set_xlim(0, 10)
axs[1,0].legend()
axs[1,0].set_xlabel('Online Temperature RMSE (K)')
axs[1,0].set_ylabel('count')

axs[1,1].hist(survived_dict['standard']['online_temperature'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[1,1].hist(survived_dict['mae']['online_temperature'], bins = bins, label=label_dict['mae'], color = color_dict['mae'], alpha = alpha)
axs[1,1].set_ylim(0, 30)
axs[1,1].set_xlim(0, 10)
axs[1,1].legend()
axs[1,1].set_xlabel('Online Temperature RMSE (K)')

axs[1,2].hist(survived_dict['standard']['online_temperature'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[1,2].hist(survived_dict['nodropout']['online_temperature'], bins = bins, label=label_dict['nodropout'], color = color_dict['nodropout'], alpha = alpha)
axs[1,2].set_ylim(0, 30)
axs[1,2].set_xlim(0, 10)
axs[1,2].legend()
axs[1,2].set_xlabel('Online Temperature RMSE (K)')

axs[1,3].hist(survived_dict['standard']['online_temperature'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[1,3].hist(survived_dict['multiclimate']['online_temperature'], bins = bins, label=label_dict['multiclimate'], color = color_dict['multiclimate'], alpha = alpha)
axs[1,3].set_ylim(0, 30)
axs[1,3].set_xlim(0, 10)
axs[1,3].legend()
axs[1,3].set_xlabel('Online Temperature RMSE (K)')

axs[0,0].grid(True, which="both", ls="--")
axs[0,1].grid(True, which="both", ls="--")
axs[0,2].grid(True, which="both", ls="--")
axs[0,3].grid(True, which="both", ls="--")
axs[1,0].grid(True, which="both", ls="--")
axs[1,1].grid(True, which="both", ls="--")
axs[1,2].grid(True, which="both", ls="--")
axs[1,3].grid(True, which="both", ls="--")

sublabels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
for i, ax in enumerate(axs.flat):
    ax.text(-0.13, 1.05, sublabels[i], transform=ax.transAxes,
            fontsize=13, va='top', ha='right')

# Add legend and labels
# plt.legend(loc='upper right')
# plt.xlabel('K')
# plt.xlim(0,10)
# plt.ylabel('Frequency')
# plt.title('Online Temperature Error')

# Show the plot
plt.show()

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(14, 8))
alpha = .5
# Define the number of bins and bin edges
bins = np.linspace(0, 2, 50)

# Plot histograms
axs[0,0].hist(survived_dict['standard']['online_moisture'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[0,0].hist(survived_dict['specific']['online_moisture'], bins = bins, label=label_dict['specific'], color = color_dict['specific'], alpha = alpha)
axs[0,0].set_ylim(0, 30)
axs[0,0].set_xlim(0, 2)
axs[0,0].set_ylabel('count')

axs[0,0].legend()

axs[0,1].hist(survived_dict['standard']['online_moisture'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[0,1].hist(survived_dict['nomemory']['online_moisture'], bins = bins, label=label_dict['nomemory'], color = color_dict['nomemory'], alpha = alpha)
axs[0,1].set_ylim(0, 30)
axs[0,1].set_xlim(0, 2)
axs[0,1].legend()

axs[0,2].hist(survived_dict['standard']['online_moisture'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[0,2].hist(survived_dict['nowind']['online_moisture'], bins = bins, label=label_dict['nowind'], color = color_dict['nowind'], alpha = alpha)
axs[0,2].set_ylim(0, 30)
axs[0,2].set_xlim(0, 2)
axs[0,2].legend()

axs[0,3].hist(survived_dict['standard']['online_moisture'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[0,3].hist(survived_dict['noozone']['online_moisture'], bins = bins, label=label_dict['noozone'], color = color_dict['noozone'], alpha = alpha)
axs[0,3].set_ylim(0, 30)
axs[0,3].set_xlim(0, 2)
axs[0,3].legend()

axs[1,0].hist(survived_dict['standard']['online_moisture'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[1,0].hist(survived_dict['nocoszrs']['online_moisture'], bins = bins, label=label_dict['nocoszrs'], color = color_dict['nocoszrs'], alpha = alpha)
axs[1,0].set_ylim(0, 30)
axs[1,0].set_xlim(0, 2)
axs[1,0].legend()
axs[1,0].set_xlabel('Online Moisture RMSE (g/kg)')
axs[1,0].set_ylabel('count')

axs[1,1].hist(survived_dict['standard']['online_moisture'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[1,1].hist(survived_dict['mae']['online_moisture'], bins = bins, label=label_dict['mae'], color = color_dict['mae'], alpha = alpha)
axs[1,1].set_ylim(0, 30)
axs[1,1].set_xlim(0, 2)
axs[1,1].legend()
axs[1,1].set_xlabel('Online Moisture RMSE (g/kg)')

axs[1,2].hist(survived_dict['standard']['online_moisture'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[1,2].hist(survived_dict['nodropout']['online_moisture'], bins = bins, label=label_dict['nodropout'], color = color_dict['nodropout'], alpha = alpha)
axs[1,2].set_ylim(0, 30)
axs[1,2].set_xlim(0, 2)
axs[1,2].legend()
axs[1,2].set_xlabel('Online Moisture RMSE (g/kg)')

axs[1,3].hist(survived_dict['standard']['online_moisture'], bins = bins, label=label_dict['standard'], color = color_dict['standard'], alpha = alpha)
axs[1,3].hist(survived_dict['multiclimate']['online_moisture'], bins = bins, label=label_dict['multiclimate'], color = color_dict['multiclimate'], alpha = alpha)
axs[1,3].set_ylim(0, 30)
axs[1,3].set_xlim(0, 2)
axs[1,3].legend()
axs[1,3].set_xlabel('Online Moisture RMSE (g/kg)')

axs[0,0].grid(True, which="both", ls="--")
axs[0,1].grid(True, which="both", ls="--")
axs[0,2].grid(True, which="both", ls="--")
axs[0,3].grid(True, which="both", ls="--")
axs[1,0].grid(True, which="both", ls="--")
axs[1,1].grid(True, which="both", ls="--")
axs[1,2].grid(True, which="both", ls="--")
axs[1,3].grid(True, which="both", ls="--")

sublabels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
for i, ax in enumerate(axs.flat):
    ax.text(-0.13, 1.05, sublabels[i], transform=ax.transAxes,
            fontsize=13, va='top', ha='right')

# Add legend and labels
# plt.legend(loc='upper right')
# plt.xlabel('K')
# plt.xlim(0,10)
# plt.ylabel('Frequency')
# plt.title('Online Temperature Error')

# Show the plot
plt.show()

# Hypothesis tests

In [None]:
loc_shift = np.median(survived_dict['noozone']['online_temperature']) - np.median(survived_dict['standard']['online_temperature'])

In [None]:
mannwhitneyu(survived_dict['standard']['online_temperature'], survived_dict['standard']['online_temperature'] + loc_shift)

In [None]:
((norm.ppf(.8) + norm.ppf(.95))**2)/(6*(((8363/(141*141))-.5)**2))

In [None]:
8363/(141*141)

In [None]:
164*(330/141)

In [None]:
len(survived_dict['standard']['online_temperature'])

In [None]:
8363/(141*141)

In [None]:
mannwhitneyu(survived_dict['standard']['online_temperature'], survived_dict['noozone']['online_temperature'])

In [None]:
mannwhitneyu(survived_dict['noozone']['online_temperature'], survived_dict['standard']['online_temperature'])

In [None]:
len(survived_dict['noozone']['online_temperature'])

In [None]:
((norm.ppf(.8) + norm.ppf(.95))**2)/(6*(((10489/(141*185))-.5)**2))

In [None]:
((norm.ppf(.8) + norm.ppf(.95))**2)/(6*(((15596/(141*185))-.5)**2))

In [None]:
*(330/185)

In [None]:
164*(330/141)

In [None]:
def getpower(baseline_arr, loc_shift, sample_size, num_iter = 10000):
    num_reject = 0
    for i in range(num_iter):
        bootstrap = baseline_arr.sample(sample_size*2, replace = True)
        bootstrap[sample_size:] = bootstrap[sample_size:] + loc_shift
        stat, p_value = ranksums(bootstrap[:sample_size], bootstrap[sample_size:])
        if p_value < 0.05:
            num_reject += 1
    return num_reject/num_iter

In [None]:
loc_shift

In [None]:
samplesize_arr = np.arange(1,51)*10
power_arr = np.array([getpower(survived_dict['standard']['online_temperature'], loc_shift, x) for x in samplesize_arr])

In [None]:
330/141

In [None]:
plt.plot(samplesize_arr*(330/141), power_arr)
plt.xlabel('sample size')
plt.ylabel('power')
plt.title('Sample size estimate')
# add grid lines
plt.grid(True, which="both", ls="--")
# add vertical line where line crosses .8 on y-axis
plt.axvline(490, ymin = 0, ymax = .76, color='r', linestyle='--')
plt.axhline(.8, xmin = 0, xmax = .41, color = 'r', linestyle= '--')

In [None]:
def permutation_test(df1, df2, var, quantile = .50, num_permutations = 10000):
    n1 = len(df1)
    observed_diff = df1[var].quantile(quantile) - df2[var].quantile(quantile)
    df = pd.concat([df1, df2], axis = 0)
    differences = []
    for i in range(num_permutations):
        df = df.sample(frac = 1).reset_index(drop=True)
        diff = df.iloc[:n1][var].quantile(quantile) - df.iloc[n1:][var].quantile(quantile)
        differences.append(diff)
    diffs = pd.Series(differences).sort_values()
    if observed_diff >= 0:
        p_value = 2*sum(diffs >= observed_diff)/num_permutations
    else:
        p_value = 2*sum(diffs < observed_diff)/num_permutations
    if p_value >= 1:
        p_value = 1
    return p_value

In [None]:
getpower(survived_dict['standard']['online_temperature'], loc_shift, 200)

In [None]:
permutation_test(survived_dict['noozone'], survived_dict['standard'], var = 'online_temperature')

In [None]:
def get_stability_pvalue(p1, p2, n1 = 330, n2 = 330):
    p = (p1 * n1 + p2 * n2) / (n1 + n2)
    zstatistic = (p1 - p2) / np.sqrt(p * (1 - p) * (1 / n1 + 1 / n2))
    if zstatistic < 0:
        return (stats.norm.cdf(zstatistic))*2
    else:
        return (1 - stats.norm.cdf(zstatistic))*2

stability_pvals = [get_stability_pvalue(len(survived_dict['specific'])/330, len(survived_dict['standard'])/330), \
                   get_stability_pvalue(len(survived_dict['nomemory'])/330, len(survived_dict['standard'])/330), \
                   get_stability_pvalue(len(survived_dict['nowind'])/330, len(survived_dict['standard'])/330), \
                   get_stability_pvalue(len(survived_dict['noozone'])/330, len(survived_dict['standard'])/330), \
                   get_stability_pvalue(len(survived_dict['nocoszrs'])/330, len(survived_dict['standard'])/330), \
                   get_stability_pvalue(len(survived_dict['mae'])/330, len(survived_dict['standard'])/330), \
                   get_stability_pvalue(len(survived_dict['nodropout'])/330, len(survived_dict['standard'])/330), \
                   get_stability_pvalue(len(survived_dict['multiclimate'])/330, len(survived_dict['standard'])/330)]

def permutation_test(df1, df2, var, quantile = .50, num_permutations = 10000):
    n1 = len(df1)
    observed_diff = df1[var].quantile(quantile) - df2[var].quantile(quantile)
    df = pd.concat([df1, df2], axis = 0)
    differences = []
    for i in range(num_permutations):
        df = df.sample(frac = 1).reset_index(drop=True)
        diff = df.iloc[:n1][var].quantile(quantile) - df.iloc[n1:][var].quantile(quantile)
        differences.append(diff)
    diffs = pd.Series(differences).sort_values()
    if observed_diff >= 0:
        p_value = 2*sum(diffs >= observed_diff)/num_permutations
    else:
        p_value = 2*sum(diffs < observed_diff)/num_permutations
    if p_value >= 1:
        p_value = 1
    return p_value

temperature_pvals = [permutation_test(survived_dict['specific'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['nomemory'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['nowind'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['noozone'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['nocoszrs'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['mae'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['nodropout'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['multiclimate'], survived_dict['standard'], var = 'online_temperature')]

moisture_pvals = [permutation_test(survived_dict['specific'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['nomemory'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['nowind'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['noozone'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['nocoszrs'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['mae'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['nodropout'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['multiclimate'], survived_dict['standard'], var = 'online_moisture')]

offline_heating_overall = pd.concat([survived_dict['standard']['offline_heating'], \
                                     survived_dict['specific']['offline_heating'], \
                                     survived_dict['nomemory']['offline_heating'], \
                                     survived_dict['nowind']['offline_heating'], \
                                     survived_dict['noozone']['offline_heating'], \
                                     survived_dict['nocoszrs']['offline_heating'], \
                                     survived_dict['mae']['offline_heating'], \
                                     survived_dict['nodropout']['offline_heating'], \
                                     survived_dict['multiclimate']['offline_heating']], axis = 0)

offline_moistening_overall = pd.concat([survived_dict['standard']['offline_moistening'], \
                                        survived_dict['specific']['offline_moistening'], \
                                        survived_dict['nomemory']['offline_moistening'], \
                                        survived_dict['nowind']['offline_moistening'], \
                                        survived_dict['noozone']['offline_moistening'], \
                                        survived_dict['nocoszrs']['offline_moistening'], \
                                        survived_dict['mae']['offline_moistening'], \
                                        survived_dict['nodropout']['offline_moistening'], \
                                        survived_dict['multiclimate']['offline_moistening']], axis = 0)

online_temperature_overall = pd.concat([survived_dict['standard']['online_temperature'], \
                                        survived_dict['specific']['offline_moistening'], \
                                        survived_dict['nomemory']['online_temperature'], \
                                        survived_dict['nowind']['online_temperature'], \
                                        survived_dict['noozone']['online_temperature'], \
                                        survived_dict['nocoszrs']['online_temperature'], \
                                        survived_dict['mae']['online_temperature'], \
                                        survived_dict['nodropout']['online_temperature'], \
                                        survived_dict['multiclimate']['online_temperature']], axis = 0)
                            
online_moisture_overall = pd.concat([survived_dict['standard']['online_moisture'], \
                                     survived_dict['specific']['offline_moistening'], \
                                     survived_dict['nomemory']['online_moisture'], \
                                     survived_dict['nowind']['online_moisture'], \
                                     survived_dict['noozone']['online_moisture'], \
                                     survived_dict['nocoszrs']['online_moisture'], \
                                     survived_dict['mae']['online_moisture'], \
                                     survived_dict['nodropout']['online_moisture'], \
                                     survived_dict['multiclimate']['online_moisture']], axis = 0)

overall_T_corr, overall_T_pval = spearmanr(offline_heating_overall, online_temperature_overall)
overall_Q_corr, overall_Q_pval = spearmanr(offline_moistening_overall, online_moisture_overall)

standard_T_corr, standard_T_pval = spearmanr(survived_dict['standard']['offline_heating'], survived_dict['standard']['online_temperature'])
specific_T_corr, specific_T_pval = spearmanr(survived_dict['specific']['offline_heating'], survived_dict['specific']['online_temperature'])
nomemory_T_corr, nomemory_T_pval = spearmanr(survived_dict['nomemory']['offline_heating'], survived_dict['nomemory']['online_temperature'])
nowind_T_corr, nowind_T_pval = spearmanr(survived_dict['nowind']['offline_heating'], survived_dict['nowind']['online_temperature'])
noozone_T_corr, noozone_T_pval = spearmanr(survived_dict['noozone']['offline_heating'], survived_dict['noozone']['online_temperature'])
nocoszrs_T_corr, nocoszrs_T_pval = spearmanr(survived_dict['nocoszrs']['offline_heating'], survived_dict['nocoszrs']['online_temperature'])
mae_T_corr, mae_T_pval = spearmanr(survived_dict['mae']['offline_heating'], survived_dict['mae']['online_temperature'])
nodropout_T_corr, nodropout_T_pval = spearmanr(survived_dict['nodropout']['offline_heating'], survived_dict['nodropout']['online_temperature'])
multiclimate_T_corr, multiclimate_T_pval = spearmanr(survived_dict['multiclimate']['offline_heating'], survived_dict['multiclimate']['online_temperature'])

standard_Q_corr, standard_Q_pval = spearmanr(survived_dict['standard']['offline_moistening'], survived_dict['standard']['online_moisture'])
specific_Q_corr, specific_Q_pval = spearmanr(survived_dict['specific']['offline_moistening'], survived_dict['specific']['online_moisture'])
nomemory_Q_corr, nomemory_Q_pval = spearmanr(survived_dict['nomemory']['offline_moistening'], survived_dict['nomemory']['online_moisture'])
nowind_Q_corr, nowind_Q_pval = spearmanr(survived_dict['nowind']['offline_moistening'], survived_dict['nowind']['online_moisture'])
noozone_Q_corr, noozone_Q_pval = spearmanr(survived_dict['noozone']['offline_moistening'], survived_dict['noozone']['online_moisture'])
nocoszrs_Q_corr, nocoszrs_Q_pval = spearmanr(survived_dict['nocoszrs']['offline_moistening'], survived_dict['nocoszrs']['online_moisture'])
mae_Q_corr, mae_Q_pval = spearmanr(survived_dict['mae']['offline_moistening'], survived_dict['mae']['online_moisture'])
nodropout_Q_corr, nodropout_Q_pval = spearmanr(survived_dict['nodropout']['offline_moistening'], survived_dict['nodropout']['online_moisture'])
multiclimate_Q_corr, multiclimate_Q_pval = spearmanr(survived_dict['multiclimate']['offline_moistening'], survived_dict['multiclimate']['online_moisture'])

offlinevsonline_T_pvals = [standard_T_pval, specific_T_pval, nomemory_T_pval, nowind_T_pval, noozone_T_pval, nocoszrs_T_pval, mae_T_pval, nodropout_T_pval, multiclimate_T_pval]

offlinevsonline_Q_pvals = [standard_Q_pval, specific_Q_pval, nomemory_Q_pval, nowind_Q_pval, noozone_Q_pval, nocoszrs_Q_pval, mae_Q_pval, nodropout_Q_pval, multiclimate_Q_pval]

offlinevsonline_pvals = offlinevsonline_T_pvals + offlinevsonline_Q_pvals + [overall_T_pval, overall_Q_pval]

pvals = stability_pvals + temperature_pvals + moisture_pvals + offlinevsonline_pvals

hypothesis_tests = ['specific humidity (stability)', 'no memory (stability)', 'no wind (stability)', 'no ozone (stability)', 'no zenith angle (stability)', 'MAE (stability)', 'no dropout (stability)', 'multiclimate (stability)', \
                    'specific humidity (temperature)', 'no memory (temperature)', 'no wind (temperature)', 'no ozone (temperature)', 'no zenith angle (temperature)', 'MAE (temperature)', 'no dropout (temperature)', 'multiclimate (temperature)', \
                    'specific humidity (moisture)', 'no memory (moisture)', 'no wind (moisture)', 'no ozone (moisture)', 'no zenith angle (moisture)', 'MAE (moisture)', 'no dropout (moisture)', 'multiclimate (moisture)', \
                    'offline vs. online T (standard)', 'offline vs. online T (specific)', 'offline vs. online T (no memory)', 'offline vs. online T (no wind)', 'offline vs. online T (no ozone)', 'offline vs. online T (no zenith angle)', 'offline vs. online T (MAE)', 'offline vs. online T (no dropout)', 'offline vs. online T (multiclimate)', \
                    'offline vs. online Q (standard)', 'offline vs. online Q (specific)', 'offline vs. online Q (no memory)', 'offline vs. online Q (no wind)', 'offline vs. online Q (no ozone)', 'offline vs. online Q (no zenith angle)', 'offline vs. online Q (MAE)', 'offline vs. online Q (no dropout)', 'offline vs. online Q (multiclimate)', \
                    'offline vs. online T (overall)', 'offline vs. online Q (overall)']
alpha_global = .05
num_tests = len(pvals)
alpha_scaled = [alpha_global*(i+1)/num_tests for i in range(num_tests)]
sorted_pvals, sorted_tests = zip(*sorted(zip(pvals, hypothesis_tests)))

In [None]:
print('hello world')

# mann whitney T

In [None]:
mannwhitneyu(survived_dict['specific']['online_temperature'], survived_dict['standard']['online_temperature'])

In [None]:
mannwhitneyu(survived_dict['nomemory']['online_temperature'], survived_dict['standard']['online_temperature'])

In [None]:
mannwhitneyu(survived_dict['nowind']['online_temperature'], survived_dict['standard']['online_temperature'])

In [None]:
mannwhitneyu(survived_dict['noozone']['online_temperature'], survived_dict['standard']['online_temperature'])

In [None]:
mannwhitneyu(survived_dict['nocoszrs']['online_temperature'], survived_dict['standard']['online_temperature'])

In [None]:
mannwhitneyu(survived_dict['mae']['online_temperature'], survived_dict['standard']['online_temperature'])

In [None]:
mannwhitneyu(survived_dict['nodropout']['online_temperature'], survived_dict['standard']['online_temperature'])

In [None]:
mannwhitneyu(survived_dict['multiclimate']['online_temperature'], survived_dict['standard']['online_temperature'])

# mann whitney Q

In [None]:
mannwhitneyu(survived_dict['specific']['online_moisture'], survived_dict['standard']['online_moisture'])

In [None]:
mannwhitneyu(survived_dict['nomemory']['online_moisture'], survived_dict['standard']['online_moisture'])

In [None]:
mannwhitneyu(survived_dict['nowind']['online_moisture'], survived_dict['standard']['online_moisture'])

In [None]:
mannwhitneyu(survived_dict['noozone']['online_moisture'], survived_dict['standard']['online_moisture'])

In [None]:
mannwhitneyu(survived_dict['nocoszrs']['online_moisture'], survived_dict['standard']['online_moisture'])

In [None]:
temperature_pvals = [permutation_test(survived_dict['specific'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['nomemory'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['nowind'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['noozone'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['nocoszrs'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['mae'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['nodropout'], survived_dict['standard'], var = 'online_temperature'),
                     permutation_test(survived_dict['multiclimate'], survived_dict['standard'], var = 'online_temperature')]

moisture_pvals = [permutation_test(survived_dict['specific'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['nomemory'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['nowind'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['noozone'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['nocoszrs'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['mae'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['nodropout'], survived_dict['standard'], var = 'online_moisture'),
                  permutation_test(survived_dict['multiclimate'], survived_dict['standard'], var = 'online_moisture')]

In [None]:
len(survived_dict['noozone'])

In [None]:
185/2

In [None]:
len(survived_dict['standard'])

In [None]:
survived_dict['noozone'].sample(92, replace = True)

In [None]:
permutation_test(survived_dict['noozone'], survived_dict['standard'], var = 'online_temperature')

In [None]:
len(survived_dict['noozone'])

In [None]:
len(survived_dict['standard'])

In [None]:
temperature_pvals

In [None]:
plt.figure(figsize=(17, 5))
x_range = range(1, num_tests + 1)
plt.plot(x_range, alpha_scaled, color = 'grey', linestyle = '--')
plt.plot(x_range, [alpha_global for x in range(num_tests)], color = 'black', linestyle = ':')
plt.ylim(0, 1)
plt.xlabel('p-value rank')
plt.ylabel('p-value')
crossed = False
handles = []
for rank, pval, test in reversed(list(zip(x_range, sorted_pvals, sorted_tests))):
    if pval >= alpha_scaled[rank - 1] and not crossed:
        handle = plt.scatter(rank, pval, s = 30, marker = 'x', label = test)
        handles.append(handle)
    elif pval < alpha_scaled[rank - 1]:
        crossed = True
        handle = plt.scatter(rank, pval, s = 60, marker = '.', label = test)
        handles.append(handle)
plt.axvline(x = 33.6, linestyle = '--', linewidth = .6, color = 'black')
plt.legend(loc = 'upper left', handles = handles, ncol = 3, fontsize = 8.8)
plt.title('Hypothesis Tests (using Benjamini-Hochberg procedure)')

# Online story

In [None]:
# Assigning a unique color to each bar
x_labels = ['standard', 'specific', 'nomemory', 'nowind', 'noozone', 'nocoszrs', 'mae', 'nodropout', 'multiclimate']
adj_labels = [label_dict[x] for x in x_labels]
subset_indices = [1, 2, 4, 5, 6, 7, 8]
survival_rates = [len(survived_dict[x])/330 for x in x_labels]
sig_values = [survival_rates[i] for i in subset_indices]
colors = [color_dict[x] for x in x_labels]

# Creating a bar chart with different colors for each bar and adding a legend
plt.figure(figsize=(10, 6))
bars = plt.bar([label_dict[x] for x in x_labels], survival_rates, color=colors, width = .5)
plt.xlabel('Configuration')
plt.ylabel('Survival Rate (%)')
plt.title('Survival rate by configuration')
plt.xticks(rotation=45)  # Rotating the labels for better visibility
delta = .05
ylim_min = 0
ylim_max = 1
plt.yticks(np.arange(ylim_min, ylim_max + delta, delta))
plt.ylim(ylim_min, ylim_max)
plt.grid(True, axis = 'y')

# # Adding stars on the subset of bars
for i, value in zip(subset_indices, sig_values):
    plt.text(i, value + .01, '*', fontsize = 15, ha='center', color='black')

# Adding a legend
plt.legend(bars, adj_labels)
plt.show()

In [None]:
survival_rates

In [None]:
# Assigning a unique color to each bar
x_labels = ['standard', 'specific', 'nomemory', 'nowind', 'noozone', 'nocoszrs', 'mae', 'nodropout', 'multiclimate']
adj_labels = [label_dict[x] for x in x_labels]
subset_indices = [2, 4, 6, 7, 8]
online_temp_rmse_quantiles = [survived_dict[x]['online_temperature'].quantile(.50) for x in x_labels]
sig_values = [online_temp_rmse_quantiles[i] for i in subset_indices]
colors = [color_dict[x] for x in x_labels]

# Creating a bar chart with different colors for each bar and adding a legend
plt.figure(figsize=(10, 6))
bars = plt.bar([label_dict[x] for x in x_labels], online_temp_rmse_quantiles, color=colors, width = .5)
plt.xlabel('Configuration')
plt.ylabel('K')
plt.title('Median Online Temperature RMSE by configuration')
plt.xticks(rotation=45)  # Rotating the labels for better visibility
ylim_min = 0
ylim_max = 6
delta = .5
plt.yticks(np.arange(ylim_min, ylim_max + delta, delta))
plt.ylim(ylim_min, ylim_max)
plt.ylim(ylim_min, ylim_max)
plt.grid(True, axis = 'y')

# Adding stars on the subset of bars
for i, value in zip(subset_indices, sig_values):
    plt.text(i, value + .01, '*', fontsize = 15, ha='center', color='black')

# Adding a legend
plt.legend(bars, adj_labels)
plt.show()

In [None]:
online_temp_rmse_quantiles

In [None]:
# Assigning a unique color to each bar
x_labels = ['standard', 'specific', 'nomemory', 'nowind', 'noozone', 'nocoszrs', 'mae', 'nodropout', 'multiclimate']
adj_labels = [label_dict[x] for x in x_labels]
subset_indices = [2, 6, 7, 8]
online_hum_rmse_quantiles = [survived_dict[x]['online_moisture'].quantile(.50) for x in x_labels]
sig_values = [online_hum_rmse_quantiles[i] for i in subset_indices]
colors = [color_dict[x] for x in x_labels]

# Creating a bar chart with different colors for each bar and adding a legend
plt.figure(figsize=(10, 6))
bars = plt.bar([label_dict[x] for x in x_labels], online_hum_rmse_quantiles, color=colors, width = .5)
plt.xlabel('Configuration')
plt.ylabel('g/kg')
plt.title('Median Online Moisture RMSE by configuration')
plt.xticks(rotation=45)  # Rotating the labels for better visibility
ylim_min = 0
ylim_max = 1
delta = .1
plt.yticks(np.arange(ylim_min, ylim_max + delta, delta))
plt.ylim(ylim_min, ylim_max)
plt.grid(True, axis = 'y', which = 'both')

# Adding stars on the subset of bars
for i, value in zip(subset_indices, sig_values):
    plt.text(i, value + .01, '*', fontsize = 15, ha='center', color='black')

# Adding a legend
plt.legend(bars, adj_labels)
plt.show()

In [None]:
online_hum_rmse_quantiles

# Offline vs. Online story

In [None]:
dotsize = 3
alpha = .2
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

ax1.scatter(survived_dict['standard']['offline_heating'], survived_dict['standard']['online_temperature'], color = color_dict['standard'], label='standard', s = dotsize, alpha = alpha)
ax1.scatter(survived_dict['specific']['offline_heating'], survived_dict['specific']['online_temperature'], color = color_dict['specific'], label='specific humidity', s = dotsize, alpha = alpha)
ax1.scatter(survived_dict['nomemory']['offline_heating'], survived_dict['nomemory']['online_temperature'], color = color_dict['nomemory'], label='no memory', s = dotsize, alpha = alpha)
ax1.scatter(survived_dict['nowind']['offline_heating'], survived_dict['nowind']['online_temperature'], color = color_dict['nowind'], label='no wind', s = dotsize, alpha = alpha)
ax1.scatter(survived_dict['noozone']['offline_heating'], survived_dict['noozone']['online_temperature'], color = color_dict['noozone'], label='no ozone', s = dotsize, alpha = alpha)
ax1.scatter(survived_dict['nocoszrs']['offline_heating'], survived_dict['nocoszrs']['online_temperature'], color = color_dict['nocoszrs'], label='no zenith angle', s = dotsize, alpha = alpha)
ax1.scatter(survived_dict['mae']['offline_heating'], survived_dict['mae']['online_temperature'], color = color_dict['mae'], label='MAE', s = dotsize, alpha = alpha)
ax1.scatter(survived_dict['nodropout']['offline_heating'], survived_dict['nodropout']['online_temperature'], color = color_dict['nodropout'], label='no dropout', s = dotsize, alpha = alpha)
ax1.scatter(survived_dict['multiclimate']['offline_heating'], survived_dict['multiclimate']['online_temperature'], color = color_dict['multiclimate'], label='multiclimate', s = dotsize, alpha = alpha)
ax1.set_xlabel('offline heating RMSE (K/s)', fontsize = 12)
ax1.set_ylabel('online temperature RMSE (K)', fontsize = 12)
ax1.set_yscale('log')

ax2.scatter(survived_dict['standard']['offline_moistening'], survived_dict['standard']['online_moisture'], color = color_dict['standard'], label='standard', s = dotsize, alpha = alpha)
ax2.scatter(survived_dict['specific']['offline_moistening'], survived_dict['specific']['online_moisture'], color = color_dict['specific'], label='specific humidity', s = dotsize, alpha = alpha)
ax2.scatter(survived_dict['nomemory']['offline_moistening'], survived_dict['nomemory']['online_moisture'], color = color_dict['nomemory'], label='no memory', s = dotsize, alpha = alpha)
ax2.scatter(survived_dict['nowind']['offline_moistening'], survived_dict['nowind']['online_moisture'], color = color_dict['nowind'], label='no wind', s = dotsize, alpha = alpha)
ax2.scatter(survived_dict['noozone']['offline_moistening'], survived_dict['noozone']['online_moisture'], color = color_dict['noozone'], label='no ozone', s = dotsize, alpha = alpha)
ax2.scatter(survived_dict['nocoszrs']['offline_moistening'], survived_dict['nocoszrs']['online_moisture'], color = color_dict['nocoszrs'], label='no zenith angle', s = dotsize, alpha = alpha)
ax2.scatter(survived_dict['mae']['offline_moistening'], survived_dict['mae']['online_moisture'], color = color_dict['mae'], label='MAE', s = dotsize, alpha = alpha)
ax2.scatter(survived_dict['nodropout']['offline_moistening'], survived_dict['nodropout']['online_moisture'], color = color_dict['nodropout'], label='no dropout', s = dotsize, alpha = alpha)
ax2.scatter(survived_dict['multiclimate']['offline_moistening'], survived_dict['multiclimate']['online_moisture'], color = color_dict['multiclimate'], label='multiclimate', s = dotsize, alpha = alpha)
ax2.set_xlabel('offline moistening RMSE (g/kg/s)', fontsize = 12)
ax2.set_ylabel('online moisture RMSE (g/kg)', fontsize = 12)
ax2.set_yscale('log')
ax2.legend()

fig.suptitle('Offline vs. Online Error, all configurations', fontsize = 14)
plt.tight_layout()

In [None]:
def make_boxplots(model_dicts):
    # Create a new figure and axis
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

    # Define the position and size of the square
    handles_Q = []
    for config in model_dicts:
        model_info = model_dicts[config]
        label = label_dict[config]
        color = color_dict[config]

        offline_min_T = model_info[model_info['num_months']==12]['offline_heating'].min()
        offline_25_T = model_info[model_info['num_months']==12]['offline_heating'].quantile(.25)
        offline_50_T = model_info[model_info['num_months']==12]['offline_heating'].quantile(.5)
        offline_75_T = model_info[model_info['num_months']==12]['offline_heating'].quantile(.75)
        offline_max_T = model_info[model_info['num_months']==12]['offline_heating'].max()
        offline_iqr_T = offline_75_T - offline_25_T
        offline_outlier_mask_low_T = model_info[model_info['num_months']==12]['offline_heating'] < offline_25_T - 1.5*offline_iqr_T
        offline_outlier_mask_high_T = model_info[model_info['num_months']==12]['offline_heating'] > offline_75_T + 1.5*offline_iqr_T
        online_min_T = model_info[model_info['num_months']==12]['online_temperature'].min()
        online_25_T = model_info[model_info['num_months']==12]['online_temperature'].quantile(.25)
        online_50_T = model_info[model_info['num_months']==12]['online_temperature'].quantile(.5)
        online_75_T = model_info[model_info['num_months']==12]['online_temperature'].quantile(.75)
        online_max_T = model_info[model_info['num_months']==12]['online_temperature'].max()
        online_iqr_T = online_75_T - online_25_T
        online_outlier_mask_low_T = model_info[model_info['num_months']==12]['online_temperature'] < online_25_T - 1.5*online_iqr_T
        online_outlier_mask_high_T = model_info[model_info['num_months']==12]['online_temperature'] > online_75_T + 1.5*online_iqr_T
        outlier_mask_T = offline_outlier_mask_low_T | offline_outlier_mask_high_T | online_outlier_mask_low_T | online_outlier_mask_high_T
        outliers_T = model_info[model_info['num_months']==12][outlier_mask_T]
        x_T, y_T = offline_25_T, online_25_T  # bottom left corner
        width_T, height_T = offline_75_T - offline_25_T, online_75_T - online_25_T  # width and height of the square
        square_T = patches.Rectangle((x_T, y_T), width_T, height_T, linewidth=1, edgecolor = color, facecolor = 'none')
        ax1.plot([offline_50_T, offline_50_T], [max(online_min_T, online_25_T - 1.5*online_iqr_T), min(online_max_T, online_75_T + 1.5*online_iqr_T)], color = color, linewidth=1)
        ax1.plot([max(offline_min_T, offline_25_T - 1.5*offline_iqr_T), min(offline_max_T, offline_75_T + 1.5*offline_iqr_T)], [online_50_T, online_50_T], color=color, linewidth=1)
        ax1.scatter(outliers_T['offline_heating'], outliers_T['online_temperature'], color = color, s = 2, alpha = .4)
        ax1.add_patch(square_T)

        offline_min_Q = model_info[model_info['num_months']==12]['offline_moistening'].min()
        offline_25_Q = model_info[model_info['num_months']==12]['offline_moistening'].quantile(.25)
        offline_50_Q = model_info[model_info['num_months']==12]['offline_moistening'].quantile(.5)
        offline_75_Q = model_info[model_info['num_months']==12]['offline_moistening'].quantile(.75)
        offline_max_Q = model_info[model_info['num_months']==12]['offline_moistening'].max()
        offline_iqr_Q = offline_75_Q - offline_25_Q
        offline_outlier_mask_low_Q = model_info[model_info['num_months']==12]['offline_moistening'] < offline_25_Q - 1.5*offline_iqr_Q
        offline_outlier_mask_high_Q = model_info[model_info['num_months']==12]['offline_moistening'] > offline_75_Q + 1.5*offline_iqr_Q
        online_min_Q = model_info[model_info['num_months']==12]['online_moisture'].min()
        online_25_Q = model_info[model_info['num_months']==12]['online_moisture'].quantile(.25)
        online_50_Q = model_info[model_info['num_months']==12]['online_moisture'].quantile(.5)
        online_75_Q = model_info[model_info['num_months']==12]['online_moisture'].quantile(.75)
        online_max_Q = model_info[model_info['num_months']==12]['online_moisture'].max()
        online_iqr_Q = online_75_Q - online_25_Q
        online_outlier_mask_low_Q = model_info[model_info['num_months']==12]['online_moisture'] < online_25_Q - 1.5*online_iqr_Q
        online_outlier_mask_high_Q = model_info[model_info['num_months']==12]['online_moisture'] > online_75_Q + 1.5*online_iqr_Q
        outlier_mask_Q = offline_outlier_mask_low_Q | offline_outlier_mask_high_Q | online_outlier_mask_low_Q | online_outlier_mask_high_Q
        outliers_Q = model_info[model_info['num_months']==12][outlier_mask_Q]
        x_Q, y_Q = offline_25_Q, online_25_Q  # bottom left corner
        width_Q, height_Q = offline_75_Q - offline_25_Q, online_75_Q - online_25_Q  # width and height of the square
        square_Q = patches.Rectangle((x_Q, y_Q), width_Q, height_Q, linewidth=1, edgecolor = color, facecolor = 'none', label = label)
        handles_Q.append(square_Q)
        ax2.plot([offline_50_Q, offline_50_Q], [max(online_min_Q, online_25_Q - 1.5*online_iqr_Q), min(online_max_Q, online_75_Q + 1.5*online_iqr_Q)], color = color, linewidth=1)
        ax2.plot([max(offline_min_Q, offline_25_Q - 1.5*offline_iqr_Q), min(offline_max_Q, offline_75_Q + 1.5*offline_iqr_Q)], [online_50_Q, online_50_Q], color=color, linewidth=1)
        ax2.scatter(outliers_Q['offline_moistening'], outliers_Q['online_moisture'], color = color, s = 2, alpha = .4)
        ax2.add_patch(square_Q)

    # Set the limits of the plot
    ax1.set_xlim(1.9e-5, 3e-5)
    ax1.set_ylim(.9, 50)
    ax1.set_xlabel('offline heating RMSE (K/s)')
    ax1.set_ylabel('online temperature RMSE (K)')
    ax2.set_xlim(1.8e-5, 2.4e-5)
    ax2.set_ylim(1e-1, 10)
    ax2.set_xlabel('offline moistening RMSE (g/kg/s)')
    ax2.set_ylabel('online moisture RMSE (g/kg)')

    # Add grid
    ax1.grid(True, which = 'both')
    ax1.set_yscale('log')
    ax2.grid(True)
    ax2.set_yscale('log')
    ax2.legend(handles = handles_Q, fontsize = 8)

    # Show the plot
    fig.suptitle('Offline vs. Online Error, all configurations', fontsize = 14)
    plt.show()

In [None]:
make_boxplots(config_dict)

In [None]:
offline_heating_overall = pd.concat([survived_dict['standard']['offline_heating'], \
                                     survived_dict['nomemory']['offline_heating'], \
                                     survived_dict['nowind']['offline_heating'], \
                                     survived_dict['noozone']['offline_heating'], \
                                     survived_dict['nocoszrs']['offline_heating'], \
                                     survived_dict['mae']['offline_heating'], \
                                     survived_dict['nodropout']['offline_heating'], \
                                     survived_dict['multiclimate']['offline_heating']], axis = 0)

offline_moistening_overall = pd.concat([survived_dict['standard']['offline_moistening'], \
                                        survived_dict['nomemory']['offline_moistening'], \
                                        survived_dict['nowind']['offline_moistening'], \
                                        survived_dict['noozone']['offline_moistening'], \
                                        survived_dict['nocoszrs']['offline_moistening'], \
                                        survived_dict['mae']['offline_moistening'], \
                                        survived_dict['nodropout']['offline_moistening'], \
                                        survived_dict['multiclimate']['offline_moistening']], axis = 0) 

online_temperature_overall = pd.concat([survived_dict['standard']['online_temperature'], \
                                        survived_dict['nomemory']['online_temperature'], \
                                        survived_dict['nowind']['online_temperature'], \
                                        survived_dict['noozone']['online_temperature'], \
                                        survived_dict['nocoszrs']['online_temperature'], \
                                        survived_dict['mae']['online_temperature'], \
                                        survived_dict['nodropout']['online_temperature'], \
                                        survived_dict['multiclimate']['online_temperature']], axis = 0)
                            
online_moisture_overall = pd.concat([survived_dict['standard']['online_moisture'], \
                                     survived_dict['nomemory']['online_moisture'], \
                                     survived_dict['nowind']['online_moisture'], \
                                     survived_dict['noozone']['online_moisture'], \
                                     survived_dict['nocoszrs']['online_moisture'], \
                                     survived_dict['mae']['online_moisture'], \
                                     survived_dict['nodropout']['online_moisture'], \
                                     survived_dict['multiclimate']['online_moisture']], axis = 0)

overall_T_corr, overall_T_pval = spearmanr(offline_heating_overall, online_temperature_overall)
overall_Q_corr, overall_Q_pval = spearmanr(offline_moistening_overall, online_moisture_overall)

standard_T_corr, standard_T_pval = spearmanr(survived_dict['standard']['offline_heating'], survived_dict['standard']['online_temperature'])
nomemory_T_corr, nomemory_T_pval = spearmanr(survived_dict['nomemory']['offline_heating'], survived_dict['nomemory']['online_temperature'])
nowind_T_corr, nowind_T_pval = spearmanr(survived_dict['nowind']['offline_heating'], survived_dict['nowind']['online_temperature'])
noozone_T_corr, noozone_T_pval = spearmanr(survived_dict['noozone']['offline_heating'], survived_dict['noozone']['online_temperature'])
nocoszrs_T_corr, nocoszrs_T_pval = spearmanr(survived_dict['nocoszrs']['offline_heating'], survived_dict['nocoszrs']['online_temperature'])
mae_T_corr, mae_T_pval = spearmanr(survived_dict['mae']['offline_heating'], survived_dict['mae']['online_temperature'])
nodropout_T_corr, nodropout_T_pval = spearmanr(survived_dict['nodropout']['offline_heating'], survived_dict['nodropout']['online_temperature'])
multiclimate_T_corr, multiclimate_T_pval = spearmanr(survived_dict['multiclimate']['offline_heating'], survived_dict['multiclimate']['online_temperature'])

standard_Q_corr, standard_Q_pval = spearmanr(survived_dict['standard']['offline_moistening'], survived_dict['standard']['online_moisture'])
nomemory_Q_corr, nomemory_Q_pval = spearmanr(survived_dict['nomemory']['offline_moistening'], survived_dict['nomemory']['online_moisture'])
nowind_Q_corr, nowind_Q_pval = spearmanr(survived_dict['nowind']['offline_moistening'], survived_dict['nowind']['online_moisture'])
noozone_Q_corr, noozone_Q_pval = spearmanr(survived_dict['noozone']['offline_moistening'], survived_dict['noozone']['online_moisture'])
nocoszrs_Q_corr, nocoszrs_Q_pval = spearmanr(survived_dict['nocoszrs']['offline_moistening'], survived_dict['nocoszrs']['online_moisture'])
mae_Q_corr, mae_Q_pval = spearmanr(survived_dict['mae']['offline_moistening'], survived_dict['mae']['online_moisture'])
nodropout_Q_corr, nodropout_Q_pval = spearmanr(survived_dict['nodropout']['offline_moistening'], survived_dict['nodropout']['online_moisture'])
multiclimate_Q_corr, multiclimate_Q_pval = spearmanr(survived_dict['multiclimate']['offline_moistening'], survived_dict['multiclimate']['online_moisture'])

In [None]:
print(standard_T_corr, standard_T_pval)
print(specific_T_corr, specific_T_pval)
print(nomemory_T_corr, nomemory_T_pval)
print(nowind_T_corr, nowind_T_pval)
print(noozone_T_corr, noozone_T_pval)
print(nocoszrs_T_corr, nocoszrs_T_pval)
print(mae_T_corr, mae_T_pval)
print(nodropout_T_corr, nodropout_T_pval)
print(multiclimate_T_corr, multiclimate_T_pval)

In [None]:
print(standard_Q_corr, standard_Q_pval)
print(specific_Q_corr, specific_Q_pval)
print(nomemory_Q_corr, nomemory_Q_pval)
print(nowind_Q_corr, nowind_Q_pval)
print(noozone_Q_corr, noozone_Q_pval)
print(nocoszrs_Q_corr, nocoszrs_Q_pval)
print(mae_Q_corr, mae_Q_pval)
print(nodropout_Q_corr, nodropout_Q_pval)
print(multiclimate_Q_corr, multiclimate_Q_pval)

In [None]:
print(overall_T_corr, overall_T_pval)
print(overall_Q_corr, overall_Q_pval)

In [None]:
# def plot_kde_with_mode(series, label, color):
#     # sns.kdeplot(series, bw_adjust=0.5, fill=True, color=color, alpha=0.5, label=f'KDE of {label}')
#     # Compute KDE
#     kde = sns.kdeplot(series).get_lines()[-1].get_data()
#     x = kde[0]
#     y = kde[1]
#     # Find mode
#     mode_index = argrelextrema(y, np.greater)[0]
#     mode = x[mode_index][y[mode_index].argmax()]  # Highest peak
#     plt.axvline(x=mode, color=color, linestyle='--', label=f'Mode of {label}: {mode:.2f}')
#     plt.legend()
#     return mode
# plot_kde_with_mode(standard_model_info[standard_model_info['num_months']==12]['online_temperature'], 'standard', color_dict['standard'])

# Zonal mean eval

In [None]:
sp_path = '/ocean/projects/atm200007p/jlin96/longSPrun_clean/trim_dir/trimmed/'
sp_list = os.popen(" ".join(["ls", sp_path + "*.h1.0000*"])).read().splitlines()
sp_data = xr.open_mfdataset(sp_list)
sp_temp = sp_data['NNTBSP'].mean(dim = ['time', 'lon'])
sp_hum = sp_data['NNQBSP'].mean(dim = ['time', 'lon'])*1000
X, Y = np.meshgrid(np.sin(sp_data["lat"]*np.pi/180), np.array(range(30)))
pressure_grid = sp_data['P0']*sp_data['hyam'] + sp_data['hybm']*sp_data['NNPSBSP']
Y = pressure_grid.mean(dim = ['lon', 'time']).values

In [None]:
def plot_zonal_diff(config, model_rank, var = 'NNTBSP'):
    model_rank = str(model_rank).zfill(3)
    nn_path = '/ocean/projects/atm200007p/jlin96/nnspreadtesting_good/{}/coupled_results/{}_model_{}/'.format(config, config, model_rank)
    nn_list = os.popen(" ".join(["ls", nn_path + "*.h1.0000*"])).read().splitlines()
    nn_data = xr.open_mfdataset(nn_list)
    if var == 'NNTBSP':
        nn_var = nn_data['NNTBSP'].mean(dim = ['time', 'lon'])
        sp_var = sp_temp
        vmin = -5
        vmax = 5
        var_title = 'Zonal mean temperature difference in (K)'
        cmap = 'bwr'
    elif var == 'NNQBSP':
        nn_var = nn_data['NNQBSP'].mean(dim = ['time', 'lon'])*1000
        sp_var = sp_hum
        vmin = -5
        vmax = 5
        var_title = 'Zonal mean humidity difference in (g/kg)'
        cmap = 'BrBG'
    plt.pcolormesh(X, Y, nn_var - sp_var, cmap = cmap, vmin = vmin, vmax = vmax)
    plt.xticks([np.sin(-50/180*np.pi), 0, np.sin(50/180*np.pi)], ['50$^\circ$S', '0$^\circ$', '50$^\circ$N'])
    plt.xlabel('latitude')
    plt.ylabel('pressure')
    plt.title(var_title)
    plt.gca().invert_yaxis()
    plt.colorbar()
    plt.show()

def showplots(config, model_info, var = "NNTBSP"):
    if var == 'NNTBSP':
        ranks = model_info[model_info['num_months']==12]['online_temperature'].sort_values().head().index
    elif var == 'NNQBSP':
        ranks = model_info[model_info['num_months']==12]['online_moisture'].sort_values().head().index
    for rank in ranks:
        plot_zonal_diff(config, rank, var)

In [None]:
showplots('specific', specific_model_info)

# Dropout analysis

In [None]:
standard_dropout_T_corr, standard_dropout_T_pval = spearmanr(survived_dict['standard']['offline_heating'], survived_dict['standard']['online_temperature'])
specific_dropout_T_corr, specific_dropout_T_pval = spearmanr(survived_dict['specific']['offline_heating'], survived_dict['specific']['online_temperature'])
nomemory_dropout_T_corr, nomemory_dropout_T_pval = spearmanr(survived_dict['nomemory']['offline_heating'], survived_dict['nomemory']['online_temperature'])
nowind_dropout_T_corr, nowind_dropout_T_pval = spearmanr(survived_dict['nowind']['offline_heating'], survived_dict['nowind']['online_temperature'])
noozone_dropout_T_corr, noozone_dropout_T_pval = spearmanr(survived_dict['noozone']['offline_heating'], survived_dict['noozone']['online_temperature'])
nocoszrs_dropout_T_corr, nocoszrs_dropout_T_pval = spearmanr(survived_dict['nocoszrs']['offline_heating'], survived_dict['nocoszrs']['online_temperature'])
mae_dropout_T_corr, mae_dropout_T_pval = spearmanr(survived_dict['mae']['offline_heating'], survived_dict['mae']['online_temperature'])
multiclimate_dropout_T_corr, multiclimate_dropout_T_pval = spearmanr(survived_dict['multiclimate']['offline_heating'], survived_dict['multiclimate']['online_temperature'])  

standard_dropout_Q_corr, standard_dropout_Q_pval = spearmanr(survived_dict['standard']['offline_moistening'], survived_dict['standard']['online_moisture'])
specific_dropout_Q_corr, specific_dropout_Q_pval = spearmanr(survived_dict['specific']['offline_moistening'], survived_dict['specific']['online_moisture'])
nomemory_dropout_Q_corr, nomemory_dropout_Q_pval = spearmanr(survived_dict['nomemory']['offline_moistening'], survived_dict['nomemory']['online_moisture'])
nowind_dropout_Q_corr, nowind_dropout_Q_pval = spearmanr(survived_dict['nowind']['offline_moistening'], survived_dict['nowind']['online_moisture'])
noozone_dropout_Q_corr, noozone_dropout_Q_pval = spearmanr(survived_dict['noozone']['offline_moistening'], survived_dict['noozone']['online_moisture'])
nocoszrs_dropout_Q_corr, nocoszrs_dropout_Q_pval = spearmanr(survived_dict['nocoszrs']['offline_moistening'], survived_dict['nocoszrs']['online_moisture'])
mae_dropout_Q_corr, mae_dropout_Q_pval = spearmanr(survived_dict['mae']['offline_moistening'], survived_dict['mae']['online_moisture'])
multiclimate_dropout_Q_corr, multiclimate_dropout_Q_pval = spearmanr(survived_dict['multiclimate']['offline_moistening'], survived_dict['multiclimate']['online_moisture'])

In [None]:
print(standard_dropout_T_corr, standard_dropout_T_pval)
print(specific_dropout_T_corr, specific_dropout_T_pval)
print(nomemory_dropout_T_corr, nomemory_dropout_T_pval)
print(nowind_dropout_T_corr, nowind_dropout_T_pval)
print(noozone_dropout_T_corr, noozone_dropout_T_pval)
print(nocoszrs_dropout_T_corr, nocoszrs_dropout_T_pval)
print(mae_dropout_T_corr, mae_dropout_T_pval)
print(multiclimate_dropout_T_corr, multiclimate_dropout_T_pval)

In [None]:
print(standard_dropout_Q_corr, standard_dropout_Q_pval)
print(nomemory_dropout_Q_corr, nomemory_dropout_Q_pval)
print(nowind_dropout_Q_corr, nowind_dropout_Q_pval)
print(noozone_dropout_Q_corr, noozone_dropout_Q_pval)
print(nocoszrs_dropout_Q_corr, nocoszrs_dropout_Q_pval)
print(mae_dropout_Q_corr, mae_dropout_Q_pval)
print(multiclimate_dropout_Q_corr, multiclimate_dropout_Q_pval)

# Residual analysis

In [None]:
from tensorflow import keras

In [None]:
standard_path = '/ocean/projects/atm200007p/jlin96/nnspreadtesting_good/standard/coupling_folder/'
mae_path = '/ocean/projects/atm200007p/jlin96/nnspreadtesting_good/mae/coupling_folder/'

standard_model_path = standard_path + 'h5_models/'
mae_model_path = mae_path + 'h5_models/'

norm_path = standard_path + 'norm_files/'

inp_sub = np.loadtxt(norm_path + 'inp_sub.txt')[None,:]
inp_div = np.loadtxt(norm_path + 'inp_div.txt')[None,:]
out_scale = np.loadtxt(norm_path + 'out_scale.txt')[None,:]

standard_models = [keras.models.load_model(standard_model_path + 'standard_model_' + str(i).zfill(3) + '.h5', compile = False) for i in range(1, 6)]
mae_models = [keras.models.load_model(mae_model_path + 'mae_model_' + str(i).zfill(3) + '.h5', compile = False) for i in range(1, 6)]

In [None]:
data_path = "/ocean/projects/atm200007p/jlin96/longSPrun_clean/"
sp_data = load_data(month = 9, year = 1, data_path = data_path)
sp_data_test_input = np.load('standard/offline_evaluation/testing_data/test_input.npy')
sp_data_test_target = np.load('standard/offline_evaluation/testing_data/test_target.npy')
heating_true = sp_data_test_target[:,:30,:,:]
moistening_true = sp_data_test_target[:,30:,:,:]
num_timesteps = 336

In [None]:
def reshape_input(nn_input):
    nn_input = nn_input.transpose(1,0,2,3)
    ans = nn_input.ravel(order = 'F').reshape(175,-1,order = 'F')
    print(ans.shape)
    return ans

def reverse_reshape(reshaped_arr, original_shape):
    '''
    reshaped_arr should be num_samples x features for this function to work properly
    '''
    arr = reshaped_arr.transpose().reshape(60, original_shape[0], original_shape[2], original_shape[3], order='F')
    ans = arr.transpose(1,0,2,3)
    print(ans.shape)
    return ans

reshaped_input = (reshape_input(sp_data_test_input).transpose() - inp_sub)/inp_div

In [None]:
def get_diffs(nn_model):
    nn_predict = nn_model.predict(reshaped_input)/out_scale
    nn_predict = np.concatenate((nn_predict[:,0:30], np.zeros((nn_predict.shape[0], 5)), nn_predict[:,30:]), axis = 1)
    nn_predict_unshaped = reverse_reshape(nn_predict, sp_data_test_target.shape)
    diff_predict_unshaped = nn_predict_unshaped - sp_data_test_target
    heating_diff = diff_predict_unshaped[:,0:30,:,:]
    moistening_diff = diff_predict_unshaped[:,30:60,:,:]
    return heating_diff, moistening_diff

In [None]:
heating_diff, moistening_diff = get_diffs(mae_models[0])

In [None]:
heating_diff.shape

In [None]:
plt.hist(heating_diff[:,0,:,:].flatten(), bins = 100);

In [None]:
sample = np.random.choice(heating_diff[:,0,:,:].flatten(), 4000, replace=False)

# Create QQ plot with Laplace distribution
stats.probplot(sample, dist=stats.laplace, plot=plt)
plt.title('QQ Residuals Plot against Laplace distribution')

In [None]:
sample = np.random.choice(heating_diff[:,0,:,:].flatten(), 4000, replace=False)

# Create QQ plot with Laplace distribution
stats.probplot(sample, dist=stats.norm, plot=plt)
plt.title('QQ Residuals Plot against Normal distribution')

In [None]:
data = np.random.laplace(loc=0, scale=1, size=1000)  # replace with your actual data

# Create QQ plot with Laplace distribution
stats.probplot(sample, dist=stats.norm, plot=plt)
plt.title('QQ Plot against normal distribution')

In [None]:
stats.probplot(heating_diff.flatten(), dist=stats.norm, plot=plt)
plt.title('QQ Plot against Laplace distribution')