In [1]:
# merge the diag_ges and diag_anl files into panda dataframe
# so it can be easier to compare the DA effect from same obs
# initial built: 2025/02/06

import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import netCDF4 as nc
import pandas as pd

In [20]:
root=f'/scratch2/GFDL/gfdlscr/Chih-Chi.Hu/scrub/radstats/amsua/single_cycle_VarBC_sens_exp/varbc_sens_CTRL'

instr='amsua'
sat='n15'
gesanl=['ges', 'anl']
time='2022070106'


In [59]:
rr={};
df={};

for ga in gesanl:
    fn  = f'diag_{instr}_{sat}_{ga}.{time}.nc4'
    exp = f'{instr}_{sat}_{ga}_{time}'
    rr[exp] = nc.Dataset(f"{root}/{fn}")
    
    print(f'processing {fn}')
    
    # transform selected data into panda dataframe:
    data = {
        "channel": rr[exp].variables['Channel_Index'][:],
        "lat":     rr[exp].variables['Latitude'][:],
        "lon":     rr[exp].variables['Longitude'][:],
        "fwater":  rr[exp].variables['Water_Fraction'][:],
        "qc":      rr[exp].variables['QC_Flag'][:],

        "obs":     rr[exp].variables['Observation'][:],
        "omf":     rr[exp].variables['Obs_Minus_Forecast_adjusted'][:],
        "omfnbc":  rr[exp].variables['Obs_Minus_Forecast_unadjusted'][:],

        "clw_obs":   rr[exp].variables['clw_obs'][:],
        "clw_model": rr[exp].variables['clw_guess_retrieval'][:],
        "clw_sym":   0.5*(rr[exp].variables['clw_obs'][:] + rr[exp].variables['clw_guess_retrieval'][:])
    }

    # optional additional variables:
    if 'VarBC_Data_Control_Index' in rr[exp].variables:
        data["varbc_idx"] = rr[exp].variables['VarBC_Data_Control_Index'][:]

    if 'Inner_domain_Pressure' in rr[exp].variables: # if save Jacobian information:
        data["inner_pres"] = rr[exp].variables['Inner_domain_Pressure'][:].tolist()
        data["inner_t"]    = rr[exp].variables['Inner_domain_Temperature'][:].tolist()
        data["inner_qv"]   = rr[exp].variables['Inner_domain_Water_Vapor'][:].tolist()
        data["inner_ql"]   = rr[exp].variables['Inner_domain_ql'][:].tolist()
        data["jac_t"]      = rr[exp].variables['Jacobian_Temperature'][:].tolist()
        data["jac_qv"]     = rr[exp].variables['Jacobian_Water_Vapor'][:].tolist()
        data["jac_ql"]     = rr[exp].variables['Jacobian_ql'][:].tolist()

    tmpdf = pd.DataFrame(data)
    
    # filtered criteria:
    df[exp] = tmpdf[tmpdf["qc"] == 0]
    
    del data


processing diag_amsua_n15_ges.2022070106.nc4
processing diag_amsua_n15_anl.2022070106.nc4


In [73]:
# merge the 'ges' and 'anl' dataframe:

merged_df = pd.merge(df['amsua_n15_ges_2022070106'], df['amsua_n15_anl_2022070106'], on=['channel', 'lon', 'lat', 'fwater', 'obs', 'clw_obs'], suffixes=('_ges', '_anl'), how='inner')

In [75]:
merged_df

Unnamed: 0,channel,lat,lon,fwater,qc_ges,obs,omf_ges,omfnbc_ges,clw_obs,clw_model_ges,...,clw_model_anl,clw_sym_anl,varbc_idx_anl,inner_pres_anl,inner_t_anl,inner_qv_anl,inner_ql_anl,jac_t_anl,jac_qv_anl,jac_ql_anl
0,7,-86.523399,12.049100,0.0,0.0,200.580002,-0.147026,0.268755,0.00000,0.000000,...,0.000000,0.000000,1.0,"[0.17350132763385773, 0.5105040073394775, 0.84...","[238.1976318359375, 238.1976318359375, 238.197...","[0.002979044336825609, 0.002979044336825609, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-2.0306746591813862e-05, 8.363609595107846e-0...","[-1.778946716513019e-05, -2.0567829778883606e-...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
1,8,-86.523399,12.049100,0.0,0.0,194.229996,-0.051529,-0.052543,0.00000,0.000000,...,0.000000,0.000000,1.0,"[0.17350132763385773, 0.5105040073394775, 0.84...","[238.1976318359375, 238.1976318359375, 238.197...","[0.002979044336825609, 0.002979044336825609, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-5.4995289247017354e-05, -1.0468756954651326e...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
2,9,-86.523399,12.049100,0.0,0.0,186.130005,-0.189665,-0.575939,0.00000,0.000000,...,0.000000,0.000000,1.0,"[0.17350132763385773, 0.5105040073394775, 0.84...","[238.1976318359375, 238.1976318359375, 238.197...","[0.002979044336825609, 0.002979044336825609, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-6.324283458525315e-05, 1.1510488548083231e-0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
3,10,-86.523399,12.049100,0.0,0.0,183.899994,-0.292668,-0.400714,0.00000,0.000000,...,0.000000,0.000000,1.0,"[0.17350132763385773, 0.5105040073394775, 0.84...","[238.1976318359375, 238.1976318359375, 238.197...","[0.002979044336825609, 0.002979044336825609, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.00011347726103849709, 8.110422641038895e-0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
4,12,-86.523399,12.049100,0.0,0.0,192.080002,-0.994280,-0.979846,0.00000,0.000000,...,0.000000,0.000000,1.0,"[0.17350132763385773, 0.5105040073394775, 0.84...","[238.1976318359375, 238.1976318359375, 238.197...","[0.002979044336825609, 0.002979044336825609, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0012523392215371132, 0.0031137862242758274,...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62784,9,71.140099,358.946106,1.0,0.0,225.679993,-0.402902,-0.927839,0.10164,0.063773,...,0.071362,0.086501,1.0,"[0.17350132763385773, 0.5105040073394775, 0.84...","[268.8710632324219, 268.8710632324219, 268.871...","[0.004104620777070522, 0.004104620777070522, 0...","[1.000999986899842e-06, 1.000999986899842e-06,...","[3.4546825190773234e-05, 4.4381544284988195e-0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[36.27200698852539, 36.27200698852539, 36.2720..."
62785,10,71.140099,358.946106,1.0,0.0,228.550003,-0.040937,-0.522324,0.10164,0.063773,...,0.071362,0.086501,1.0,"[0.17350132763385773, 0.5105040073394775, 0.84...","[268.8710632324219, 268.8710632324219, 268.871...","[0.004104620777070522, 0.004104620777070522, 0...","[1.000999986899842e-06, 1.000999986899842e-06,...","[0.00018528933287598193, 0.0001841580087784677...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[34.16233444213867, 34.16233444213867, 34.1623..."
62786,12,71.140099,358.946106,1.0,0.0,244.539993,0.183202,-0.276498,0.10164,0.063773,...,0.071362,0.086501,1.0,"[0.17350132763385773, 0.5105040073394775, 0.84...","[268.8710632324219, 268.8710632324219, 268.871...","[0.004104620777070522, 0.004104620777070522, 0...","[1.000999986899842e-06, 1.000999986899842e-06,...","[0.001516189775429666, 0.0031460311729460955, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[20.65129852294922, 20.65129852294922, 20.6512..."
62787,13,71.140099,358.946106,1.0,0.0,257.649994,0.739880,0.177494,0.10164,0.063773,...,0.071362,0.086501,1.0,"[0.17350132763385773, 0.5105040073394775, 0.84...","[268.8710632324219, 268.8710632324219, 268.871...","[0.004104620777070522, 0.004104620777070522, 0...","[1.000999986899842e-06, 1.000999986899842e-06,...","[0.006395251024514437, 0.016404809430241585, 0...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[9.720008850097656, 9.720008850097656, 9.72000..."


In [None]:
df

In [41]:
list_data = data['inner_pres'].tolist()

In [43]:
shape = (len(list_data), len(list_data[0]))
print(shape)

(101385, 94)


In [None]:
list_data

In [None]:
chn = 1 # channel to be examined
lat_use={}; lon_use={}; obserr_use={}; inv_obserr_use={}; cloud_use={}; clouddiff_use={}; omf_use={}; omfnbc_use={};
lat_all={}; lon_all={}; qc_all={}; cld_rbc_idx_use={}; clw_obs_use={}; clw_model_use={}; clw_model_nobc_use={}; varbc_idx_use={};
bc_cnst_use={}; varbc_idx_use={}; fwater_use={};

Jq_use={}; Jt_use={}; Jql_use={};
pres_use={};
ql_use={};

#norm_omf_use={}; norm_omf_all={};

for exp in gesanl:
    ch         = rr[exp].variables['Channel_Index'][:]
    lat        = rr[exp].variables['Latitude'][:]
    lon        = rr[exp].variables['Longitude'][:]
    inv_obserr = rr[exp].variables['Inverse_Observation_Error'][:]
    obserr     = rr[exp].variables['Sym_Observation_Error'][:]
    clw_obs    = rr[exp].variables['clw_obs'][:]
    clw_model  = rr[exp].variables['clw_guess_retrieval'][:]
    if 'clw_guess_retrieval' in rr[exp].variables:
        clw_model_nobc = rr[exp].variables['clw_guess_retrieval_nobc'][:]
    if 'VarBC_Data_Control_Index' in rr[exp].variables:
        varbc_idx = rr[exp].variables['VarBC_Data_Control_Index'][:]
        
    if 'Inner_domain_ql' in rr[exp].variables:
        ql = rr[exp].variables['Inner_domain_ql'][:]
        
    if 'Jacobian_Water_Vapor' in rr[exp].variables:
        Jq = rr[exp].variables['Jacobian_Water_Vapor'][:]
        
    if 'Jacobian_Temperature' in rr[exp].variables:
        Jt = rr[exp].variables['Jacobian_Temperature'][:]
        
    if 'Jacobian_ql' in rr[exp].variables:
        Jql = rr[exp].variables['Jacobian_ql'][:]  
        
    if 'Inner_domain_Pressure' in rr[exp].variables:
        pres = rr[exp].variables['Inner_domain_Pressure'][:]
  
        
    omf        = rr[exp].variables['Obs_Minus_Forecast_adjusted'][:]
    omfnbc     = rr[exp].variables['Obs_Minus_Forecast_unadjusted'][:]
    fwater     = rr[exp].variables['Water_Fraction'][:]
    symcloud   = 0.5*(clw_obs + clw_model)
    diffcloud  = (clw_obs - clw_model)
    qc         = rr[exp].variables['QC_Flag'][:]
    bc_cnst    = rr[exp].variables['BC_Constant'][:]
    
    # new added variable: VarBC_Cloud_Consistency_Index (2024/08/22)
    # old nc4 files do not have this variable
    #cld_rbc_idx = rr[exp].variables['Cloud_Displacement_Index'][:]
    #varbc_idx   = rr[exp].variables['VarBC_Data_Control_Index'][:]

    sea_screen  = (fwater>= 0.99)
    #sea_screen  = (fwater< 0.99)
    
    #use          = np.where ( ch==chn )
    #use         = np.where( (ch==chn)&sea_screen )
    use         = np.where( (qc == 0)&(ch==chn) )
    #use         = np.where( (qc == 0)&(ch==chn)&sea_screen )
    #use         = np.where( (qc == 0)&(ch==chn)&sea_screen&(clw_model>0.3))
    #use         = np.where( (qc == 0)&(ch==chn)&sea_screen&(clw_model<0.05))
    
    #use         = np.where( (qc == 0)&(ch==chn)&sea_screen&(varbc_idx==1) )

    
    #use         = np.where( (qc == 0)&(ch==chn))
    #use         = np.where( (ch==chn)&sea_screen )
    #use         = np.where( (qc == 0)&(ch==chn)&(cld_rbc_idx==1))
    #use         = np.where( (qc == 0)&(ch==chn)&(varbc_idx==1))

    lat_use[exp]        = lat[use]
    lon_use[exp]        = lon[use]
    obserr_use[exp]     = obserr[use]
    inv_obserr_use[exp] = inv_obserr[use]
    cloud_use[exp]      = symcloud[use]
    clw_obs_use[exp]    = clw_obs[use]
    clw_model_use[exp]  = clw_model[use]
    fwater_use[exp]     = fwater[use]
    
    if 'clw_guess_retrieval' in rr[exp].variables:      clw_model_nobc_use[exp] = clw_model_nobc[use]
    if 'VarBC_Data_Control_Index' in rr[exp].variables:    varbc_idx_use[exp]   = varbc_idx[use]
    
    if 'Inner_domain_ql' in rr[exp].variables: ql_use[exp] = np.squeeze(ql[use,:])
    if 'Inner_domain_Pressure' in rr[exp].variables: pres_use[exp] = np.squeeze(pres[use,:])
    if 'Jacobian_Water_Vapor' in rr[exp].variables: Jq_use[exp]    = np.squeeze(Jq[use,:])
    if 'Jacobian_Temperature' in rr[exp].variables: Jt_use[exp]    = np.squeeze(Jt[use,:])
    if 'Jacobian_ql' in rr[exp].variables: Jql_use[exp] = np.squeeze(Jql[use,:])
    
    clouddiff_use[exp]  = diffcloud[use]
    omf_use[exp]        = omf[use]
    omfnbc_use[exp]     = omfnbc[use]
    #cld_rbc_idx_use[exp] = cld_rbc_idx[use]
    bc_cnst_use[exp]     = bc_cnst[0]
        
    qc_all[exp]         = qc[np.where(ch==chn)]
    lat_all[exp]        = lat[np.where(ch==chn)]
    lon_all[exp]        = lon[np.where(ch==chn)]