In [1]:
# calculate the (spatially averaged) total varBC increment time series
# initial built: 2024/08/21
# modified: 2025/02/10: add the calculation of normalized VarBC increments
# modified: 2025/09/10:
# (1) add emissivity
# (2) "all-in-one" (no need to read abias file)

# this has been transformed into .py file (2025/09/10)
# /gpfs/f6/bil-coastal-gfdl/scratch/Chih-Chi.Hu/varbc_coeff/calculate_varbc_increment_temp.py 
# /gpfs/f6/bil-coastal-gfdl/scratch/Chih-Chi.Hu/varbc_coeff/calculate_varbc_increment_driver.sh

import netCDF4 as nc
import numpy as np
import os
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from datetime import datetime, timedelta


In [2]:
exps=['full_CTRL']
#exps=['full_CTRL_noinf']
#exps = ['full_clr_clr_and_cld_cld_0.05_varbcpred_sym_cld_nobc']
#exps= ['full_clr_clr_and_cld_cld_0.05']
#exps = ['full_clr_clr_and_cld_cld_0.05_varbcpred_sym_cld_clwnbc']

'''
exps=['full_ctrl','full_only_clr_clr', 'full_clr_clr_and_cld_cld_0.05', \
      'full_clr_clr_and_cld_cld_0.05_varbcpred_sym_cld_nobc', \
      'full_clr_clr_and_cld_cld_0.05_varbcpred_obs_cld', \
      'full_clr_clr_and_cld_cld_0.05_varbcpred_model_cld_nobc' ]
'''

root_dir='/gpfs/f6/bil-coastal-gfdl/scratch/Chih-Chi.Hu/scrub/radstats/'
instr = 'amsua_n15'
#instr = 'amsua_n19'
#instr ='atms_n20'
if instr.split("_")[0] == 'amsua':
    chanls=[1,2,3,4,5,6,7,8,9,10,12,13,15]
elif instr.split("_")[0] == 'atms':
    chanls=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22]

init_time  = datetime(2022, 6, 10, 0, 0, 0)  # Year, month, day, hour, minute, second
final_time = datetime(2022, 8, 10, 0, 0, 0)  # Year, month, day, hour, minute, second
dt         = 6 # in hours

In [3]:
total_bc_inc={};
total_bc_inc_sea={};
total_bc_inc_land={};

bc_global_offset={};

bc_emiss = {};
bc_emiss_sea  = {};
bc_emiss_land = {};

bc_cloud = {};
bc_cloud_sea = {};
bc_cloud_land = {};

obs_num_total={};
obs_num_varbc={};
datetime_str=[];

norminc     ={};
norminc_sea ={};
norminc_land={};

#obs_num_test={};

for e, exp in enumerate(exps):
    nc_dir=f'{root_dir}/{exp}/{instr.split("_")[0]}/'
    print(f'processing {nc_dir}...')

    current_time = init_time
    while current_time <= final_time:
        format_time  = current_time.strftime("%Y%m%d%H")
        if e==0:
            datetime_str.append(format_time)

        ncfile = f'{nc_dir}/diag_{instr}_anl.{format_time}.nc4'
        fn     = nc.Dataset(ncfile)

        # read ncfile:
        omf       = fn.variables['Obs_Minus_Forecast_adjusted'][:]
        omfnbc    = fn.variables['Obs_Minus_Forecast_unadjusted'][:]
        inverr    = fn.variables['Inverse_Observation_Error'][:]
        clw_obs   = fn.variables['clw_obs'][:]
        clw_model = fn.variables['clw_guess_retrieval'][:]
        fwater    = fn.variables['Water_Fraction'][:]
        ch        = fn.variables['Channel_Index'][:]
        qc        = fn.variables['QC_Flag'][:]
        varbc_ct  = fn.variables['VarBC_Data_Control_Index'][:]
        bccnst    = fn.variables['BC_Constant'][:]
        bcemiss   = fn.variables['BC_Emissivity'][:]
        if 'BC_Cloud_Pred_Contrib' in fn.variables:
            bccloud = fn.variables['BC_Cloud_Pred_Contrib'][:]
        
        # norm increment (varbc increment normalized by final obs error variance)
        varbc_inc_norm = (omfnbc - omf)*(inverr**2)

        for chanl in chanls:            
            use = np.where( (inverr> 0)&(ch==chanl) )
            use_sea  = np.where( (inverr > 0)&(ch==chanl)&(fwater>=0.99) )
            use_land = np.where( (inverr > 0)&(ch==chanl)&(fwater< 0.99) )

            # check if the observation goes into varbc minimization:
            use_varbc = np.where((varbc_ct==1)&(inverr> 0)&(ch==chanl))

            # Initialize the key if it doesn't exist
            key = f'{exp}_{instr}_ch{chanl}'
            total_bc_inc.setdefault(key, [])
            total_bc_inc_sea.setdefault(key, [])
            total_bc_inc_land.setdefault(key, [])
            bc_global_offset.setdefault(key, [])
            bc_emiss.setdefault(key, [])
            bc_emiss_sea.setdefault(key, [])
            bc_emiss_land.setdefault(key, [])
            if 'BC_Cloud_Pred_Contrib' in fn.variables: bc_cloud.setdefault(key, [])
            if 'BC_Cloud_Pred_Contrib' in fn.variables: bc_cloud_sea.setdefault(key, [])
            if 'BC_Cloud_Pred_Contrib' in fn.variables: bc_cloud_land.setdefault(key, [])
            obs_num_total.setdefault(key, [])
            obs_num_varbc.setdefault(key, [])
            norminc.setdefault(key, [])
            norminc_sea.setdefault(key, [])
            norminc_land.setdefault(key, [])

            #obs_num_test.setdefault(key, [])

            # Append the mean difference to total_bc_inc
            total_bc_inc[key].append(np.mean(omfnbc[use] - omf[use]))
            total_bc_inc_sea[key].append(np.mean(omfnbc[use_sea] - omf[use_sea]))
            total_bc_inc_land[key].append(np.mean(omfnbc[use_land] - omf[use_land]))
         
            bc_global_offset[key].append(np.mean(bccnst[use]))

            bc_emiss[key].append(np.mean(bcemiss[use]))
            bc_emiss_sea[key].append(np.mean(bcemiss[use_sea]))
            bc_emiss_land[key].append(np.mean(bcemiss[use_land]))
            
            if 'BC_Cloud_Pred_Contrib' in fn.variables:
                bc_cloud[key].append(np.mean(bccloud[use]))
                bc_cloud_sea[key].append(np.mean(bccloud[use_sea]))
                bc_cloud_land[key].append(np.mean(bccloud[use_land]))
            
            obs_num_total[key].append(len(use[0]))
            obs_num_varbc[key].append(len(use_varbc[0]))
            
            norminc[key].append(np.mean(varbc_inc_norm[use]))
            norminc_sea[key].append(np.mean(varbc_inc_norm[use_sea]))
            norminc_land[key].append(np.mean(varbc_inc_norm[use_land]))

            #obs_num_test [key].append(sum(varbc_ct[use]))

        # progress to the next time step
        current_time = current_time + timedelta(hours=dt)


processing /gpfs/f6/bil-coastal-gfdl/scratch/Chih-Chi.Hu/scrub/radstats//full_CTRL/amsua/...


  result = super().mean(axis=axis, dtype=dtype, **kwargs)[()]
  ret = um.true_divide(


In [None]:
obs_num_varbc

In [4]:
# save the numbers into txt files:

# Save the list into a text file
for exp in exps:
    for chanl in chanls:
        #output_dir=f'/gpfs/f6/bil-coastal-gfdl/scratch/Chih-Chi.Hu/varbc_coeff/compiled_stats/{exp}'
        output_dir=f'/gpfs/f6/bil-coastal-gfdl/scratch/Chih-Chi.Hu/varbc_coeff/compiled_stats/{exp}_tmp'

        os.makedirs(output_dir, exist_ok=True)

        output_fn_total_bc      = f'{output_dir}/{instr}_ch{chanl}_total_bc'
        output_fn_global_offset = f'{output_dir}/{instr}_ch{chanl}_global_offset'
        output_fn_emissivity    = f'{output_dir}/{instr}_ch{chanl}_emissivity'
        if 'BC_Cloud_Pred_Contrib' in fn.variables: 
            output_fn_cloud = f'{output_dir}/{instr}_ch{chanl}_cloud'
        output_fn_total_obs_num = f'{output_dir}/{instr}_ch{chanl}_obs_num'
        output_fn_varbc_norminc = f'{output_dir}/{instr}_ch{chanl}_varbc_norminc'

        with open(output_fn_total_bc, 'w') as f:
            for tt, time in enumerate(datetime_str):
                key=f'{exp}_{instr}_ch{chanl}'
                f.write(f"{time} {chanl} {instr} {total_bc_inc[key][tt]} {total_bc_inc_sea[key][tt]} {total_bc_inc_land[key][tt]} \n")

        with open(output_fn_global_offset, 'w') as f:
            for tt, time in enumerate(datetime_str):
                key=f'{exp}_{instr}_ch{chanl}'
                f.write(f"{time} {chanl} {instr} {bc_global_offset[key][tt]} \n")

        with open(output_fn_emissivity, 'w') as f:
            for tt, time in enumerate(datetime_str):
                key=f'{exp}_{instr}_ch{chanl}'
                f.write(f"{time} {chanl} {instr} {bc_emiss[key][tt]} {bc_emiss_sea[key][tt]} {bc_emiss_land[key][tt]} \n")

        if 'BC_Cloud_Pred_Contrib' in fn.variables: 
            with open(output_fn_cloud, 'w') as f:
                for tt, time in enumerate(datetime_str):
                    key=f'{exp}_{instr}_ch{chanl}'
                    f.write(f"{time} {chanl} {instr} {bc_cloud[key][tt]} {bc_cloud_sea[key][tt]} {bc_cloud_land[key][tt]} \n")

        with open(output_fn_varbc_norminc, 'w') as f:
            for tt, time in enumerate(datetime_str):
                key=f'{exp}_{instr}_ch{chanl}'
                f.write(f"{time} {chanl} {instr} {norminc[key][tt]} {norminc_sea[key][tt]} {norminc_land[key][tt]} \n")
                
        with open(output_fn_total_obs_num, 'w') as f:
            for tt, time in enumerate(datetime_str):
                key=f'{exp}_{instr}_ch{chanl}'
                f.write(f"{time} {chanl} {instr} {obs_num_total[key][tt]} {obs_num_varbc[key][tt]} \n")
                


In [None]:
global_offset

In [None]:
obs_num