### Evaluate the cross-validation models

Throws an error when run inside a slurm job:

*QStandardPaths: XDG_RUNTIME_DIR not set, defaulting to '/tmp/runtime-b309170'
qt.qpa.screen: QXcbConnection: Could not connect to display mlogin103:31.0
Could not connect to any X display.*

-> This error happens inside save_figure

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import sys
import os
import gc

#Import sklearn before tensorflow (static Thread-local storage)
from sklearn.preprocessing import StandardScaler

from tensorflow.keras.models import load_model
from tensorflow.keras import backend as K
from tensorflow.errors import ResourceExhaustedError
import tensorflow as tf

# Add path with my_classes to sys.path
path = '/pf/b/b309170'
sys.path.insert(0, path + '/workspace_icon-ml/cloud_cover_parameterization/')

import my_classes
from my_classes import write_infofile

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.set_visible_devices(physical_devices[0], 'GPU')

In [4]:
# Won't run on a CPU node
try:
    # Prevents crashes of the code
    physical_devices = tf.config.list_physical_devices('GPU')
    tf.config.set_visible_devices(physical_devices[0], 'GPU')
    # Allow the growth of memory Tensorflow allocates (limits memory usage overall)
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
except:
    pass

In [5]:
# Cloud Cover or Cloud Area?
output_var = 'clc' # Set output_var to one of {'clc', 'cl_area'}
# QUBICC only or QUBICC+NARVAL training data?
qubicc_only = True
# Do we evaluate a model trained on all data?
all_data_model = False

path_base = os.path.join(path, 'workspace_icon-ml/cloud_cover_parameterization/grid_column_based_QUBICC_R02B05')
path_data = os.path.join(path, 'my_work/icon-ml_data/cloud_cover_parameterization/grid_column_based_QUBICC_R02B05/based_on_var_interpolated_data')

if output_var == 'clc':
    full_output_var_name = 'cloud_cover'
elif output_var == 'cl_area':
    full_output_var_name = 'cloud_area'
    
if qubicc_only:
    output_folder = '%s_R2B5_QUBICC'%full_output_var_name
else:
    output_folder = '%s_R2B5_QUBICC+NARVAL'%full_output_var_name
path_model = os.path.join(path_base, 'saved_models', output_folder)
path_figures = os.path.join(path_base, 'figures', output_folder)
narval_output_file = '%s_output_narval.npy'%full_output_var_name
qubicc_output_file = '%s_output_qubicc.npy'%full_output_var_name

#### Load models

In [6]:
fold_1 = 'cross_validation_column_based_fold_1.h5'
fold_2 = 'cross_validation_column_based_fold_2.h5'
fold_3 = 'cross_validation_column_based_fold_3.h5'

model_fold_1 = load_model(os.path.join(path_model, fold_1))
model_fold_2 = load_model(os.path.join(path_model, fold_2))
model_fold_3 = load_model(os.path.join(path_model, fold_3))

#### Load data

In [7]:
input_data = np.concatenate((np.load(path_data + '/cloud_cover_input_narval.npy'), 
                             np.transpose(np.load(path_data + '/cloud_cover_input_qubicc.npy'))), axis=0)
output_data = np.concatenate((np.load(os.path.join(path_data, narval_output_file)), 
                              np.transpose(np.load(os.path.join(path_data, qubicc_output_file)))), axis=0)

In [8]:
samples_narval = np.load(os.path.join(path_data, narval_output_file)).shape[0]

In [9]:
(samples_total, no_of_features) = input_data.shape

#### Remove columns that were constant in at least one of the training folds

In [10]:
remove_fields = [27, 28, 29, 30, 31, 32, 135, 136, 137]
assert no_of_features == 163
input_data = np.delete(input_data, remove_fields, axis=1)
no_of_features = no_of_features - len(remove_fields)

#### Define cross-validation folds to recreate training and validation data

In [11]:
def set_training_validation_folds(samples_total, samples_narval):
    training_folds = []
    validation_folds = []
    two_week_incr = (samples_total-samples_narval)//6

    for i in range(3):
        # Note that this is a temporal split since time was the first dimension in the original tensor
        first_incr = np.arange(samples_narval+two_week_incr*i, samples_narval+two_week_incr*(i+1))
        second_incr = np.arange(samples_narval+two_week_incr*(i+3), samples_narval+two_week_incr*(i+4))
        
        print(second_incr)

        validation_folds.append(np.append(first_incr, second_incr))
        training_folds.append(np.arange(samples_narval, samples_total))
        training_folds[i] = np.setdiff1d(training_folds[i], validation_folds[i])
        
    return training_folds, validation_folds

if qubicc_only:
    # We have to skip the NARVAL data if we do qubicc_only
    training_folds, validation_folds = set_training_validation_folds(samples_total, samples_narval)
else:
    training_folds, validation_folds = set_training_validation_folds(samples_total, 0)

[ 91933934  91933935  91933936 ... 120025759 120025760 120025761]
[120025762 120025763 120025764 ... 148117587 148117588 148117589]
[148117590 148117591 148117592 ... 176209415 176209416 176209417]


#### The data will need to be scaled according to the training folds

In [12]:
scaler = StandardScaler()

#### Useful functions to plot results

In [13]:
def mean_clc_per_vertical_layer(model, input_data, output_data, batch_size=2**20):
    '''
        Model prediction and the Ground Truth
    '''
    # output_var means for first model
    clc_data_mean = []
    for i in range(27):
        clc_data_mean.append(np.mean(output_data[:, i], dtype=np.float64))
    # Predicted output_var means
#     # The batch predicting makes things faster, however, it can run into oom problems
#     # Start with a large batch size and decrease it until it works
#     for j in range(3):
#         try:
#             pred_adj = np.minimum(np.maximum(model.predict(input_valid, batch_size=batch_size//(8**j)), 0), 100)
#             break
#         except(ResourceExhaustedError):
#             K.clear_session()
#             gc.collect()
#             print('Model predict did not work with a batch size of %d'%(batch_size//(8**j)))

    # Curiously it works best if we use predict_on_batch on small subsets of the data instead of predict(..., batch_size=...) 
    # In future correct to: for i in range(1 + input_data.shape[0]//batch_size):
    for i in range(input_data.shape[0]//batch_size): 
        if i == 0:
            a = model.predict_on_batch(input_data[i*batch_size:(i+1)*batch_size])
        else:
            a = np.concatenate((a, model.predict_on_batch(input_data[i*batch_size:(i+1)*batch_size])), axis=0)
        K.clear_session()
        gc.collect()
        
    pred_adj = np.minimum(np.maximum(a, 0), 100) 
    
    return list(np.mean(pred_adj, axis=0, dtype=np.float64)), clc_data_mean

In [14]:
def save_figure(fig_name, fig_title, model_predictions, valid_means=None, all_data_model=False):
    '''
        Note that this figure truly is a different performance measure than the validation error.
        The reason is that the mean can in principle be good even when the model is really bad.
        
        model_predictions: Array of length 3 or 4, covers predictions from all three folds for a given TL setup
        valid_means: Array of length 3 or 4, covers validation means from all three folds for a given TL setup
   '''
#     assert len(model_biases) == 3
    
    # Vertical layers
    a = np.linspace(5, 31, 27)
    fig = plt.figure(figsize=(11,7))
    # For model
    ax = fig.add_subplot(111, xlabel='Mean %s'%output_var, ylabel='Vertical layer', title=fig_title)
    
    if all_data_model:    
        if not valid_means[0] == valid_means[1] == valid_means[2]:
            colors = ['g', 'b', 'r']
            for i in range(len(model_predictions)):
                ax.plot(model_predictions[i], a, colors[i])
                if valid_means != None:
                    ax.plot(valid_means[i], a, '%s--'%colors[i])
            plt.gca().invert_yaxis()
            ax.legend(['Model Fold 1 Predictions', 'Fold 1 Truth', 'Model Fold 2 Predictions', 'Fold 2 Truth', 
                       'Model Fold 3 Predictions', 'Fold 3 Truth', 'Model All Data Predictions', 'Truth'])
        else:
            for i in range(len(model_predictions)):
                ax.plot(model_predictions[i], a)
            ax.plot(valid_means[0], a, 'black')
            plt.gca().invert_yaxis()
            ax.legend(['Model Fold 1 Predictions', 'Model Fold 2 Predictions', 'Model Fold 3 Predictions', 
                       'Model All Data Predictions', 'Truth'])
    else:
        if not valid_means[0] == valid_means[1] == valid_means[2]:
            colors = ['g', 'b', 'r']
            for i in range(len(model_predictions)):
                ax.plot(model_predictions[i], a, colors[i])
                if valid_means != None:
                    ax.plot(valid_means[i], a, '%s--'%colors[i])
            plt.gca().invert_yaxis()
            ax.legend(['Model Fold 1 Predictions', 'Fold 1 Truth', 'Model Fold 2 Predictions', 'Fold 2 Truth', 
                       'Model Fold 3 Predictions', 'Fold 3 Truth'])
        else:
            for i in range(len(model_predictions)):
                ax.plot(model_predictions[i], a)
            ax.plot(valid_means[0], a, 'black')
            plt.gca().invert_yaxis()
            ax.legend(['Model Fold 1 Predictions', 'Model Fold 2 Predictions', 'Model Fold 3 Predictions', 
                       'Truth'])

    fig.savefig(os.path.join(path_figures, fig_name+'.pdf'))

#### Evaluate the models on the data

Add training and validation losses to the text files. <br>
Print results per vertical layer (respective validation set/NARVAL/QUBICC)

In [14]:
train_losses = [] ; valid_losses = [] ; valid_means = [] ; valid_model_predictions = [] ;
narval_means = [] ; narval_model_predictions = [] ; qubicc_means = [] ; qubicc_model_predictions = [] ;
qubicc_month_0 = [] ; qubicc_model_pred_month_0 = [] ; qubicc_month_1 = [] ; qubicc_model_pred_month_1 = [] ;
qubicc_month_2 = [] ; qubicc_model_pred_month_2 = [] ;

for i in range(3): 
    filename = 'cross_validation_column_based_fold_%d'%(i+1)
    # Choose appropriate model for this fold
    if i == 0: model = model_fold_1
    if i == 1: model = model_fold_2
    if i == 2: model = model_fold_3
    
    #Standardize according to the fold
    scaler.fit(input_data[training_folds[i]])
    
    #Load the data for the respective fold
    input_train = scaler.transform(input_data[training_folds[i]])
    input_valid = scaler.transform(input_data[validation_folds[i]])
    output_train = output_data[training_folds[i]]
    output_valid = output_data[validation_folds[i]]
    
    ## Training and validation losses
    train_loss = model.evaluate(input_train, output_train, verbose=2, batch_size=10**5)
    valid_loss = model.evaluate(input_valid, output_valid, verbose=2, batch_size=10**5)
    
    # Clear up some memory
    del input_train, output_train
    gc.collect()
    
    train_losses.append(train_loss)
    valid_losses.append(valid_loss)
    
    with open(os.path.join(path_model, filename+'.txt'), 'a') as file:
        file.write('Unbounded training loss: %.4f\n'%(train_loss))
        file.write('Unbounded validation loss: %.4f\n'%(valid_loss))
        
    ## Compute mean cloud cover per vertical layer
    # On the respective validation sets (QUBICC and NARVAL)
    try:
        clc_pred_mean, clc_data_mean = mean_clc_per_vertical_layer(model, input_valid, output_valid)
    except(ResourceExhaustedError):
        print('Resource Exhausted Qubicc')
        clc_pred_mean, clc_data_mean = mean_clc_per_vertical_layer(model, input_valid, output_valid, 
                                                                   batch_size=2**15)
    valid_means.append(clc_data_mean)
    valid_model_predictions.append(clc_pred_mean)
    
    # Clear up some memory
    del input_valid, output_valid
    gc.collect()
    
    # For NARVAL
    input_narval = scaler.transform(input_data[:samples_narval])
    output_narval = output_data[:samples_narval]
    try:
        clc_pred_mean, clc_data_mean = mean_clc_per_vertical_layer(model, input_narval, output_narval)
    except(ResourceExhaustedError):
        print('Resource Exhausted Narval')
        clc_pred_mean, clc_data_mean = mean_clc_per_vertical_layer(model, input_narval, output_narval, 
                                                                   batch_size=2**15)
    narval_means.append(clc_data_mean)
    narval_model_predictions.append(clc_pred_mean)
    
    # Clear up some memory
    del input_narval, output_narval
    gc.collect()
    
    # For QUBICC  
    input_qubicc = scaler.transform(input_data[samples_narval:])
    output_qubicc = output_data[samples_narval:]
    try:
        clc_pred_mean, clc_data_mean = mean_clc_per_vertical_layer(model, input_qubicc, output_qubicc)
    except(ResourceExhaustedError):
        print('Resource Exhausted Qubicc')
        clc_pred_mean, clc_data_mean = mean_clc_per_vertical_layer(model, input_qubicc, output_qubicc, 
                                                                   batch_size=2**15)
    qubicc_means.append(clc_data_mean)
    qubicc_model_predictions.append(clc_pred_mean)
    
    # Clear up some memory
    del input_qubicc, output_qubicc
    gc.collect()
    
    # QUBICC months
    qubicc_month = (samples_total - samples_narval)//3
    for month in range(3):
        first_ind = samples_narval + month*qubicc_month
        last_ind = samples_narval + (month+1)*qubicc_month
        input_qubicc = scaler.transform(input_data[first_ind:last_ind])
        output_qubicc = output_data[first_ind:last_ind]
        try:
            clc_pred_mean, clc_data_mean = mean_clc_per_vertical_layer(model, input_qubicc, output_qubicc)
        except(ResourceExhaustedError):
            print('Resource Exhausted Qubicc')
            clc_pred_mean, clc_data_mean = mean_clc_per_vertical_layer(model, input_qubicc, output_qubicc, 
                                                                       batch_size=2**15)
        if month==0: 
            qubicc_month_0.append(clc_data_mean)
            qubicc_model_pred_month_0.append(clc_pred_mean)
        if month==1:
            qubicc_month_1.append(clc_data_mean)
            qubicc_model_pred_month_1.append(clc_pred_mean)
        if month==2:
            qubicc_month_2.append(clc_data_mean)
            qubicc_model_pred_month_2.append(clc_pred_mean)

    # Clear up some memory
    del input_qubicc, output_qubicc
    gc.collect()

In [15]:
# # Plot results
save_figure('cross_validation_validation_means', 'Column-based models on the respective validation sets', 
            valid_model_predictions, valid_means, all_data_model)
save_figure('cross_validation_narval', 'Column-based models on the NARVAL data', 
            narval_model_predictions, narval_means, all_data_model)
save_figure('cross_validation_qubicc', 'Column-based models on the QUBICC data', 
            qubicc_model_predictions, qubicc_means, all_data_model)
# Qubicc months (I checked below that the order is hc2, then hc3, then hc4.)
save_figure('cross_validation_qubicc_hc2', 'Column-based models on the QUBICC data, November 2004', 
            qubicc_model_pred_month_0, qubicc_month_0, all_data_model)
save_figure('cross_validation_qubicc_hc3', 'Column-based models on the QUBICC data, April 2005', 
            qubicc_model_pred_month_1, qubicc_month_1, all_data_model)
save_figure('cross_validation_qubicc_hc4', 'Column-based models on the QUBICC data, November 2005', 
            qubicc_model_pred_month_2, qubicc_month_2, all_data_model)

In [16]:
# In case we want to reproduce the plots without running everything again:
with open(os.path.join(path_figures, 'values_for_figures.txt'), 'w') as file:
    file.write('On validation sets\n')
    file.write(str(valid_means))
    file.write(str(valid_model_predictions))
    file.write('\n\nNARVAL data\n')
    file.write(str(narval_means))
    file.write(str(narval_model_predictions))
    file.write('\n\nQubicc data\n')
    file.write(str(qubicc_means))
    file.write(str(qubicc_model_predictions))
    file.write('\n\nQubicc data, November 2004\n')
    file.write(str(qubicc_month_0))
    file.write(str(qubicc_model_pred_month_0))
    file.write('\n\nQubicc data, April 2005\n')
    file.write(str(qubicc_month_1))
    file.write(str(qubicc_model_pred_month_1))
    file.write('\n\nQubicc data, November 2005\n')
    file.write(str(qubicc_month_2))
    file.write(str(qubicc_model_pred_month_2))

In [17]:
# The QUBICC data is loaded in the order that I would expect (hc2, then hc3, then hc4)

path = '/pf/b/b309170/my_work/QUBICC/data_var_vertinterp_R02B05/'
resolution = 'R02B05'

# Order of experiments
DS = xr.open_mfdataset(path+'hus/*'+resolution+'.nc', combine='by_coords')
print(DS.time[0*len(DS.time)//3])
print(DS.time[1*len(DS.time)//3])
print(DS.time[2*len(DS.time)//3])

#### Compute bounded losses

We also save the scaling parameters for the fold-based models as we haven't done that yet.

In [18]:
# Takes long!
def compute_bounded_loss(model, input_data, output_data, batch_size=2**20):
    for i in range(1 + input_data.shape[0]//batch_size): 
        if i == 0:
            a = model.predict_on_batch(input_data[i*batch_size:(i+1)*batch_size])
        else:
            a = np.concatenate((a, model.predict_on_batch(input_data[i*batch_size:(i+1)*batch_size])), axis=0)
        K.clear_session()
        gc.collect()
        
    pred_adj = np.minimum(np.maximum(a, 0), 100)
    
    # Mean Squared Error
    return np.mean((pred_adj - output_data)**2, dtype=np.float64)

In [None]:
seed = 10

for i in range(3): # for i in range(3): 
    filename = 'cross_validation_column_based_fold_%d'%(i+1)
    # Choose appropriate model for this fold
    if i == 0: model = model_fold_1
    if i == 1: model = model_fold_2
    if i == 2: model = model_fold_3
        
    #Standardize according to the fold
    scaler.fit(input_data[training_folds[i]])
    
#     # We save the scaling parameters in a file [only once]
#     seed_i = int(str(seed) + str(i))
#     with open(path_model+'/scaler_%d.txt'%seed_i, 'a') as file:
#         file.write('Standard Scaler mean values:\n')
#         file.write(str(scaler.mean_))
#         file.write('\nStandard Scaler standard deviation:\n')
#         file.write(str(np.sqrt(scaler.var_)))
        
#     # Define remove_fields
#     remove_fields = [27, 28, 29, 30, 31, 32, 135, 136, 137]

#     # Taken from preprocessing_narval
#     input_variables = []
#     variables = ['qv', 'qc', 'qi', 'temp', 'pres', 'zg']
#     for el in variables:
#         for i in range(21, 48):
#             input_variables.append(el+'_%d'%i)
#     input_variables.append('fr_land')

#     in_and_out_variables = input_variables.copy()
#     variables = [output_var]
#     for el in variables:
#         for i in range(21, 48):
#             in_and_out_variables.append(el+'_%d'%i)
        
#     in_and_out_variables = np.delete(in_and_out_variables, remove_fields)
#     input_variables = np.delete(input_variables, remove_fields)

#     # Write the accompanying info-file [only once]
#     with open(os.path.join(path_model, filename + '.txt'), 'a') as file:
#         write_infofile(file, str(in_and_out_variables), str(input_variables), path_model, path_data, seed_i)
    
    print(i)
    
    #Load the data for the respective fold
    input_train = scaler.transform(input_data[training_folds[i]])
    input_valid = scaler.transform(input_data[validation_folds[i]])
    output_train = output_data[training_folds[i]]
    output_valid = output_data[validation_folds[i]]
    
    train_loss = compute_bounded_loss(model, input_train, output_train, batch_size=2**17)
    valid_loss = compute_bounded_loss(model, input_valid, output_valid, batch_size=2**17)
        
    with open(os.path.join(path_model, filename+'.txt'), 'a') as file:
        file.write('Bounded training loss: %.4f\n'%(train_loss))
        file.write('Bounded validation loss: %.4f\n'%(valid_loss))

0
