## Compare effective droplet radii

In [None]:
import os
import netCDF4 as nc
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats
import scipy.optimize

Define paths where to find the data. Cloudnet data must be downloaded from Pangaea first!

In [None]:
fname_tcwret = os.getenv('HOME') + '/code_richter_et_al/data_TCWret/mixed_ice_shape.nc'
path_cnet = os.getenv('HOME') + '/code_richter_et_al/get_cloudnet'

Read data from TCWret and save them as Pandas.DataFrame

In [None]:
with nc.Dataset(fname_tcwret) as f:
    seconds = f.variables['time_of_measurement'][:]
    r_ice = f.variables['ice_water_effective_droplet_radius'][:]
    r_liq = f.variables['liquid_water_effective_droplet_radius'][:]
    r_liq_err = f.variables['liquid_water_effective_droplet_radius_error'][:]
    red_chi_2 = f.variables['reduced_chi_2'][:]
    t_cw = f.variables['liquid_water_optical_depth'][:] + f.variables['ice_water_optical_depth'][:]
    dof = f.variables['degrees_of_freedom_of_signal'][:]
    type_ice = f.variables['ice_shape'][:]
    pwv = f.variables['precipitable_water_vapour'][:]
#plt.scatter(r_liq, dof)

time = np.array([])
for ii in range(len(seconds)):
    sec = int(seconds[ii])
    time = np.concatenate((time, [dt.timedelta(seconds=sec) + dt.datetime(2017, 5, 1)]))
    
tcwret_raw = pd.DataFrame({'time': time, 'pwv(cm)': pwv, 'ri(um)': r_ice, 'rl(um)': r_liq, 'drl(um)': r_liq_err, 'red_chi_2(1)': red_chi_2, 'tcw(1)': t_cw, 'dof(1)': dof, 'type(1)': type_ice})

Apply filtering of data

In [None]:
tau_max = 6.0
tau_min = 0.0

idx_conv = np.where((tcwret_raw['red_chi_2(1)'] <= 1.0) & (tcwret_raw['red_chi_2(1)'] >= 0.0))[0]
idx_tau = np.where((tcwret_raw['tcw(1)'] <= tau_max) & (tcwret_raw['tcw(1)'] >= tau_min))[0]
idx_valid = np.intersect1d(idx_conv, idx_tau)
tcwret = tcwret_raw.iloc[idx_valid]

counter = 0
idx = np.array([])
for ii in range(len(tcwret)):
    if tcwret['rl(um)'].iloc[ii] < tcwret['ri(um)'].iloc[ii]:
        idx = np.concatenate((idx, [ii]))
        counter += 1
tcwret = tcwret.iloc[idx]

Read Cloudnet data. Only allow retrieval flags 0,1,3 and 4

In [None]:
reff_st_invalid = [2]
cloudnet = {'time': [], 'rliq(um)': [], 'rliq_err(um)': []}
for file_ in sorted(os.listdir(path_cnet)):
    if ".nc" in file_:
        with nc.Dataset(os.path.join(path_cnet, file_)) as f:
            if "cnet" not in file_:
                continue
            day_month = dt.datetime.strptime(file_, 'cnet_%m_%d.nc')
            day = dt.datetime(2017, day_month.month, day_month.day)
            #latitude = f.variables['latitude'][:]
            #longitude = f.variables['longitude'][:]
            time = f.variables['datetime'][:]
            lwp = f.variables['liquid_water_path_per_layer'][:]
            #lwp_st = f.variables['liquid_water_content_status'][:]
            #iwp = f.variables['ice_water_path_per_layer'][:]
            #iwp_st = f.variables['ice_water_content_status'][:]
            #lwp_mwr = f.variables['liquid_water_path_MWR'][:]
            #lwp_err_mwr = f.variables['liquid_water_path_error_MWR'][:]
            rliq = f.variables['reff_Frisch'][:]
            rliq_err = f.variables['reff_Frisch_error']
            #rice = f.variables['reff_ice'][:]
            rliq_st = f.variables['reff_Frisch_status'][:]
            for time_idx in range(len(time)):
                time_iter = day + dt.timedelta(seconds=int(np.round(time[time_idx]*3600)))
                idx_liq = np.where(lwp[time_idx] > 0.0)[0]
                reff_invalid = np.intersect1d(rliq_st[time_idx], np.array(reff_st_invalid))
                if reff_invalid.size != 0: continue
                if idx_liq.size == 0:
                    continue 
                else:
                    try:
                        rliq_sum = np.mean(rliq[time_idx, idx_liq])
                        rliq_err_sum = np.mean(rliq_err[time_idx, idx_liq])
                    except AttributeError:
                        continue
                cloudnet['time'].append(time_iter)
                cloudnet['rliq(um)'].append(rliq_sum)
                cloudnet['rliq_err(um)'].append(rliq_err_sum)
cloudnet = pd.DataFrame(cloudnet)
idx = np.array([])
for ii in range(len(cloudnet)):
    if  not np.ma.is_masked(cloudnet['rliq(um)'].iloc[ii]) and not np.ma.is_masked(cloudnet['rliq_err(um)'].iloc[ii]):
        idx = np.concatenate((idx, [ii]))
idx = np.array(idx, dtype=int)
cloudnet = cloudnet.iloc[idx]

Define averaging time interval

In [None]:
delta = 2

Average Cloudnet data

In [None]:
rice_mean = []
rliq_mean = []
rliq_err = []
time_mean = []
datetime_start = np.datetime64("2017-05-24T20:25:00")
datetime_iter = datetime_start
datetime_stop = np.datetime64("2017-07-18T00:00:00")
while datetime_iter < datetime_stop:
    idx = np.where((np.array(cloudnet['time']) > datetime_iter) & \
                   (np.array(cloudnet['time']) < datetime_iter+np.timedelta64(delta*60, 's')))[0]
    if idx.size != 0:
        rliq_mean.append(np.mean(np.array(cloudnet['rliq(um)'])[idx]))
        rliq_err.append(np.mean(np.array(cloudnet['rliq_err(um)'])[idx]))
        time_mean.append(datetime_iter)
    datetime_iter += np.timedelta64(delta*60, 's')
    
cloudnet_av = pd.DataFrame({'time': time_mean, 'rliq(um)': rliq_mean, 'rliq_err(um)': rliq_err})

Average TCWret data

In [None]:
lwp_mean = []
iwp_mean = []
cwp_mean = []
rliq_mean = []
drliq_mean = []
rice_mean = []
lat_mean = []
lon_mean = []
time_mean = []
red_chi2_mean = []
tcw_mean = []
dof_mean = []
type_mean = []
pwv_mean = []
datetime_start = np.datetime64("2017-05-24T20:25:00")
datetime_iter = datetime_start
datetime_stop = np.datetime64("2017-07-18T00:00:00")
while datetime_iter < datetime_stop:
    idx = np.where((np.array(tcwret['time']) > datetime_iter) & \
                   (np.array(tcwret['time']) < datetime_iter+np.timedelta64(delta*60, 's')))[0]
    if idx.size != 0:
        rice_mean.append(np.mean(np.array(tcwret['ri(um)'])[idx]))
        rliq_mean.append(np.mean(np.array(tcwret['rl(um)'])[idx]))
        drliq_mean.append(np.mean(np.array(tcwret['drl(um)'])[idx]))
        red_chi2_mean.append(np.mean(np.array(tcwret['red_chi_2(1)'])[idx]))
        dof_mean.append(np.mean(np.array(tcwret['dof(1)'])[idx]))
        type_mean.append(np.mean(np.array(tcwret['type(1)'])[idx]))
        tcw_mean.append(np.mean(np.array(tcwret['tcw(1)'])[idx]))
        pwv_mean.append(np.mean(np.array(tcwret['pwv(cm)'])[idx]))
        time_mean.append(datetime_iter)
    datetime_iter += np.timedelta64(delta*60, 's')
    
tcwret_av = pd.DataFrame({'time': time_mean, 'pwv(cm)': pwv_mean, 'tcw(1)': tcw_mean, 'rice(um)': rice_mean, 'dof': dof_mean, 'drliq(um)': drliq_mean, 'rliq(um)': rliq_mean, 'red_chi_2': red_chi2_mean, 'type': type_mean})

Calculate correlation coefficient, p-Value, mean and standard deviation

In [None]:
intersect, idx_tcwret, idx_cloudnet = np.intersect1d(tcwret_av['time'], cloudnet_av['time'], return_indices=True)
xax = np.array(tcwret_av['rliq(um)'].iloc[idx_tcwret])
yax = np.array(cloudnet_av['rliq(um)'].iloc[idx_cloudnet])
pearsonr, pval = scipy.stats.pearsonr(xax,yax)

print("Data\t\t\tcor\tp-Value\tMean\tSD\tNumber")
print("rliq All\t\t{:.2f}\t{:.2f}\t{:.2f}\t{:.2f}\t{}".format(pearsonr, pval, np.mean(xax-yax), np.std(xax-yax), xax.size))

intersect, idx_tcwret, idx_cloudnet = np.intersect1d(tcwret_av['time'], cloudnet_av['time'], return_indices=True)
xax = np.array(tcwret_av['rliq(um)'].iloc[idx_tcwret])
yax = np.array(cloudnet_av['rliq(um)'].iloc[idx_cloudnet])
cax = np.array(tcwret_av['pwv(cm)'].iloc[idx_tcwret])
idx = np.where(cax < 1.0)[0]
xax = xax[idx]
yax = yax[idx]
pearsonr, pval = scipy.stats.pearsonr(xax,yax)

print("rliq PWV < 1cm\t\t{:.2f}\t{:.2f}\t{:.2f}\t{:.2f}\t{}".format(pearsonr, pval, np.mean(xax-yax), np.std(xax-yax), xax.size))