### Evaluate final U-Net models on CESM and FOCI slp data

In [1]:
import os
import sys
sys.path.append('../reconstruct_missing_data')

from pathlib import Path
from json import dump, load

import numpy as np
import xarray as xr
from matplotlib import pyplot as plt

from data_loading import (
    find_data_files, 
    load_data_set, 
    get_anomalies, 
    clone_data, 
    create_missing_mask, 
    split_and_scale_data,
    area_mean_weighted,
    spatial_mask,
)
from models import build_unet_4conv
from indices import (
    southern_annular_mode_zonal_mean,
    north_atlantic_oscillation_station,
    north_pacific,
)

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Lambda, concatenate, Conv1D, Conv2D, MaxPool2D, UpSampling2D, BatchNormalization, LeakyReLU
from tensorflow.keras.optimizers import Adam, SGD
import tensorflow.keras.initializers as tfi
import tensorflow.keras.regularizers as tfr
from tensorflow.keras.utils import plot_model

# Suppress Tensorflow warnings
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [2]:
# Set working directory, according to working directory in scripts:
os.chdir('/gxfs_work1/geomar/smomw511')

In [29]:
## Set paths to final models:

# CESM
paths_to_final_models=[
    'GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_CESM_fixed_discrete_factor_1_final',
    'GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_CESM_variable_discrete_factor_1_final',
    'GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_CESM_variable_discrete_factor_2_final',
    'GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_CESM_variable_discrete_factor_3_final',
    'GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_CESM_optimal_discrete_factor_1_final',
]

# FOCI
# paths_to_final_models=[
#     'GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_FOCI_fixed_discrete_factor_1_final',
#     'GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_FOCI_variable_discrete_factor_1_final',
#     'GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_FOCI_variable_discrete_factor_2_final',
#     'GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_FOCI_variable_discrete_factor_3_final',
#     'GitGeomar/marco-landt-hayen/reconstruct_missing_data_results/unet_4conv_slp_FOCI_optimal_discrete_factor_1_final',
# ]

In [45]:
# Load train and validation loss per sample for first model, to infer number of training and validation samples, 
# lat and lon, and number of missing value rates. Do this from first experiment, since it has augmentation factor 1:
n_train=np.load(Path(paths_to_final_models[0]) / 'train_loss_per_sample_all.npy').shape[-1]
n_val=np.load(Path(paths_to_final_models[0]) / 'val_loss_per_sample_all.npy').shape[-1]
lat=np.load(Path(paths_to_final_models[0]) / 'train_loss_map_all.npy').shape[1]
lon=np.load(Path(paths_to_final_models[0]) / 'train_loss_map_all.npy').shape[2]
n_missing=np.load(Path(paths_to_final_models[0]) / 'train_loss_per_sample_all.npy').shape[0]

# Initialize storage for parameters:
feature_all = []
feature_short_all = []
source_all = []
mask_type_all = []
missing_type_all = []
augmentation_factor_all = []
missing_values_all = []

# Initialize storage for reloaded results, for all models and missing rates.
train_loss_per_sample_all = np.zeros((len(paths_to_final_models),n_missing,n_train))
val_loss_per_sample_all = np.zeros((len(paths_to_final_models),n_missing,n_val))
train_loss_map_all = np.zeros((len(paths_to_final_models),n_missing,lat,lon))
val_loss_map_all = np.zeros((len(paths_to_final_models),n_missing,lat,lon))
SAM_train_pred_all = np.zeros((len(paths_to_final_models),n_missing,n_train))
SAM_val_pred_all = np.zeros((len(paths_to_final_models),n_missing,n_val))
SAM_train_target_all = np.zeros((len(paths_to_final_models),n_missing,n_train))
SAM_val_target_all = np.zeros((len(paths_to_final_models),n_missing,n_val))
NAO_train_pred_all = np.zeros((len(paths_to_final_models),n_missing,n_train))
NAO_val_pred_all = np.zeros((len(paths_to_final_models),n_missing,n_val))
NAO_train_target_all = np.zeros((len(paths_to_final_models),n_missing,n_train))
NAO_val_target_all = np.zeros((len(paths_to_final_models),n_missing,n_val))
NP_train_pred_all = np.zeros((len(paths_to_final_models),n_missing,n_train))
NP_val_pred_all = np.zeros((len(paths_to_final_models),n_missing,n_val))
NP_train_target_all = np.zeros((len(paths_to_final_models),n_missing,n_train))
NP_val_target_all = np.zeros((len(paths_to_final_models),n_missing,n_val))

# Loop over final models:
for i in range(len(paths_to_final_models)):
    
    # Get path to final model:
    path_to_final_model = paths_to_final_models[i]
    
    # Reload parameters for this experiment:
    with open(Path(path_to_final_model) / 'parameters.json', 'r') as f:
        parameters=load(f)

    # Store parameters:    
    feature_all.append(parameters['feature'])
    feature_short_all.append(parameters['feature_short'])
    source_all.append(parameters['source'])
    mask_type_all.append(parameters['mask_type'])
    missing_type_all.append(parameters['missing_type'])
    augmentation_factor_all.append(parameters['augmentation_factor'])
    missing_values_all.append(parameters['missing_values'])
    
    ## Reload results.
    ## Take augmentation factor into account: If data is used n times, only store every n-th sample.
    ## Like this, we have equal dimensions for all augmentation factors and include each target only once.
    
    # Get step size from augmentation factor:
    step = augmentation_factor_all[-1]

    # Get number of missing value rates:
    n_missing = len(missing_values_all[-1])

    ## Reload results and filter:
    # Consider all missing value rates, although optimal model has only 99.9%, 99% and 95% missing values. 
    # And take each sample only once.
    train_loss_per_sample_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'train_loss_per_sample_all.npy')[:,np.arange(0,n_train*step,step)]
    val_loss_per_sample_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'val_loss_per_sample_all.npy')[:,np.arange(0,n_val*step,step)]
    train_loss_map_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'train_loss_map_all.npy')
    val_loss_map_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'val_loss_map_all.npy')
    SAM_train_pred_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'SAM_train_pred_all.npy')[:,np.arange(0,n_train*step,step)]
    SAM_val_pred_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'SAM_val_pred_all.npy')[:,np.arange(0,n_val*step,step)]
    SAM_train_target_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'SAM_train_target_all.npy')[:,np.arange(0,n_train*step,step)]
    SAM_val_target_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'SAM_val_target_all.npy')[:,np.arange(0,n_val*step,step)]
    NAO_train_pred_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'NAO_train_pred_all.npy')[:,np.arange(0,n_train*step,step)]
    NAO_val_pred_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'NAO_val_pred_all.npy')[:,np.arange(0,n_val*step,step)]
    NAO_train_target_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'NAO_train_target_all.npy')[:,np.arange(0,n_train*step,step)]
    NAO_val_target_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'NAO_val_target_all.npy')[:,np.arange(0,n_val*step,step)]
    NP_train_pred_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'NP_train_pred_all.npy')[:,np.arange(0,n_train*step,step)]
    NP_val_pred_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'NP_val_pred_all.npy')[:,np.arange(0,n_val*step,step)]
    NP_train_target_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'NP_train_target_all.npy')[:,np.arange(0,n_train*step,step)]
    NP_val_target_all[i,:n_missing]=np.load(Path(path_to_final_model) / 'NP_val_target_all.npy')[:,np.arange(0,n_val*step,step)]
