In this notebook I want to diagnose a cloud in the DYAMOND output whenever 

qi +  qc > 1e-6

Code is okay so far.

In [1]:
# Needs 480GB
import os
import gc
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Define all paths
path = '~/bd1179_work/DYAMOND'
path_clc = '~/bd1179_work/DYAMOND/clc_data'
path_content = os.listdir(path)

threshold = 1e-6

# Iterate over qc files. Do not process coarse-grained qc and qi files.
files = [path_content[k] for k in range(len(path_content)) if 'tot_qc_dia' in path_content[k] and 'Z.nc' in path_content[k]]

# Which file to load
for file in files:
    print(file)
    # Do not overwrite
    if not (True in [file.split('_')[-1] in os.listdir(path_clc)[k] for k in range(len(os.listdir(path_clc)))]):
        # Load qc file
        input_file_qc = os.path.join(path, file)
        ds_qc = xr.open_dataset(input_file_qc)

        # Load qi file
        input_file_qi = input_file_qc.replace('tot_qc_dia', 'tot_qi_dia')
        ds_qi = xr.open_dataset(input_file_qi)

        # Extract coordinates
        time_coo = ds_qi.time
        height_coo = ds_qi.height

        # Up to a height of 21km: 322 Gigabytes are required to hold both ds_qc and ds_qi
        ds_qc = getattr(ds_qc, 'param212.1.0').values[:, -60:]
        ds_qi = getattr(ds_qi, 'param213.1.0').values[:, -60:]

        (TIME, VERT, HOR) = ds_qc.shape

        # Skip problematic files for now
        if ds_qi.shape != (TIME, VERT, HOR):
            print('ds_qc shape: %s'%str((TIME, VERT, HOR)))
            print('ds_qi shape: %s'%str(ds_qi.shape))
            continue

        # Initialize byte ndarray
        clc_out = np.ones(ds_qc.shape, dtype=np.bool_)

        # Loop over temporal dimension so that ds_qc + ds_qi doesn't have to be computed at once (otherwise we run into another OOM error)
        for t in range(TIME):
            clc_out[t] = (ds_qc[t] + ds_qi[t] > threshold)

        # Clean memory
        del ds_qc, ds_qi
        gc.collect()

        clc_new_da = xr.DataArray(np.int8(clc_out), coords={'time':time_coo, 'height':height_coo[-60:]}, 
                              dims=['time', 'height', 'cells'], name='clc')

        # Save it in a new file
        output_file = input_file_qi.replace('tot_qi_dia', 'clc')
        clc_new_da.to_netcdf(output_file)

        # Remove original qi and qc files
        os.remove(input_file_qc)
        os.remove(input_file_qi)

        # Clean memory
        del clc_new_da, clc_out
        gc.collect()

nwp_R2B10_lkm1007_atm_3d_tot_qc_dia_ml_20160831T000000Z.nc


### TESTING
As a first test I compared the means, max/means with the linearly interpolated data to see if they are close. <br>
As a second test I compare the vertically interpolated cloud cover for an arbitrary data point:

In [None]:
# input_file = xr.open_dataset('clc_dei4_NARVALI_2013122600_cloud_DOM01_ML_0023.nc')
# output_file = xr.open_dataset('int_var_clc_dei4_NARVALI_2013122600_cloud_DOM01_ML_0023.nc')

# input_clc = input_file.clc.values
# output_clc = output_file.clc.values

# input_clc_mean = np.mean(input_clc[0], axis=1)
# output_clc_mean = np.mean(output_clc[0], axis=1)

# input_clc_max = np.max(input_clc[0], axis=1)
# output_clc_max = np.max(output_clc[0], axis=1)

In [None]:
# fig = plt.figure(figsize=(11,3))

# ax_1 = fig.add_subplot(121)
# ax_1.plot(input_clc_mean, np.arange(len(input_clc_mean)))

# ax_2 = fig.add_subplot(122)
# ax_2.plot(output_clc_mean, np.arange(len(output_clc_mean)))

In [None]:
# fig = plt.figure(figsize=(11,3))

# ax_1 = fig.add_subplot(121)
# ax_1.plot(input_clc_max, np.arange(len(input_clc_max)))

# ax_2 = fig.add_subplot(122)
# ax_2.plot(output_clc_max, np.arange(len(output_clc_max)))

In [None]:
# m = 3000000

# fig = plt.figure(figsize=(11,3))

# ax_1 = fig.add_subplot(121)
# ax_1.plot(input_clc[0][:, m], np.arange(len(input_clc_max)))

# ax_2 = fig.add_subplot(122)
# ax_2.plot(output_clc[0][:, m], np.arange(len(output_clc_max)))

In [None]:
# # Arbitrary data point
# k = 20
# l = 2071456

# z_u = zh_lr[k, l] #3647.9666
# z_l = zh_lr[k+1, l] #3069.0796
# np.where(zh_hr[:, l] <= z_u) #51 and above
# np.where(zh_hr[:, l] >= z_l) #52 and below
# # clc[0, 50, l] #83.6644
# # clc[0, 51, l] #78.92006
# # clc[0, 52, l] #74.58538

In [None]:
# clc_out[0,k,l] == np.max([clc[0, 50, l], clc[0, 51, l], clc[0, 52, l]])