We want to use the tuned Sundqvist scheme (tuned to the QUBICC R2B5 data) as a baseline for the performance of the QUBICC-trained NNs on the NARVAL data
(shown in q1_cell_based_qubicc_r2b5/source_code/cross_validation_evaluate.ipynb)

So we want to plot the R2-values and compute their average of the Sundqvist scheme on the NARVAL R2B5 data (for both cloud volume and cloud area fraction).

In [1]:
import os
import sys
import numpy as np

# Add path with my_classes to sys.path
sys.path.insert(0, '/home/b/b309170/workspace_icon-ml/iconml_clc/')

import my_classes
from my_classes import simple_sundqvist_scheme

output_var = 'clc'

2022-04-21 14:53:44.266619: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
path_data = '/home/b/b309170/my_work/icon-ml_data/cloud_cover_parameterization/grid_cell_based_QUBICC_R02B05/based_on_var_interpolated_data'

if output_var == 'clc':
    full_output_var_name = 'cloud_cover'
elif output_var == 'cl_area':
    full_output_var_name = 'cloud_area'

narval_output_file = '%s_output_narval.npy'%full_output_var_name

input_data = np.load(path_data + '/cloud_cover_input_narval.npy')
output_data = np.load(os.path.join(path_data, narval_output_file))

In [3]:
input_variables = np.array(['qv', 'qc', 'qi', 'temp', 'pres', 'u', 'v', 'zg', 'coriolis', 'fr_land'])

In [4]:
samples_total = input_data.shape[0]

In [5]:
qv = input_data[:, 0]
temp = input_data[:, 3]
pres = input_data[:, 4]
fr_land = input_data[:, 9]
ps = np.load(path_data + '/ps_input_narval.npy')

In [6]:
print(np.mean(temp))
print(np.mean(fr_land))
print(np.mean(ps))

259.62268
0.31190652
100036.94075209455


In [7]:
# # Takes 1400s, so close to 30 minutes

# ind = np.random.randint(0, samples_total, samples_total)

# Entries will be in [0, 1]
sundqvist = []
for i in range(samples_total):
    sundqvist.append(simple_sundqvist_scheme(qv[i], temp[i], pres[i], fr_land[i], ps=ps[i], tuned='manually_r2b5'))

# We want the output to be in the [0, 100] range
sundqvist = 100*np.array(sundqvist)

In [8]:
v_layers = np.load(path_data + '/samples_vertical_layers_narval.npy')

In [9]:
full_output_var_name = 'cloud_cover'
narval_output_file = '%s_output_narval.npy'%full_output_var_name
output_data = np.load(os.path.join(path_data, narval_output_file))

In [10]:
r2_values_clc = []
for k in range(5, 32):
    var = np.var(output_data[np.where(v_layers == k)])
    mse = np.mean((np.array(sundqvist)[np.where(v_layers == k)] - output_data[np.where(v_layers == k)])**2, dtype=np.float64)
    r2_value = 1 - mse/var
    r2_values_clc.append(r2_value)

In [11]:
full_output_var_name = 'cloud_area'
narval_output_file = '%s_output_narval.npy'%full_output_var_name
output_data = np.load(os.path.join(path_data, narval_output_file))

In [12]:
r2_values_cl_area = []
for k in range(5, 32):
    var = np.var(output_data[np.where(v_layers == k)])
    mse = np.mean((np.array(sundqvist)[np.where(v_layers == k)] - output_data[np.where(v_layers == k)])**2, dtype=np.float64)
    r2_value = 1 - mse/var
    r2_values_cl_area.append(r2_value)

In [21]:
np.mean(r2_values_clc[2:])

-1.598695366870946

In [22]:
np.mean(r2_values_cl_area[2:])

-0.8400229853675434

In [19]:
np.mean(r2_values_clc[2:-1])

-0.5420558510265896

In [20]:
np.mean(r2_values_cl_area[2:-1])

0.2923658213192281