In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
from scipy import ndimage
from skimage.measure import label
from tqdm import tqdm
import re
from multiprocess import Pool
import os
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=RuntimeWarning)

In [2]:
ds_prcp = xr.open_dataset(f"/scratch/nf33/hk25_LSP/UK/MC/pr_hourly.nc")
domain_area = 200.0*200.0*25 

In [6]:
ds_coarse = xr.open_dataset("/scratch/nf33/hk25_LSP/UK/MC_chunk/rh500_2deg.nc")


0.5488934655786771 mm/hr

In [18]:
pr_th = 0.5488934655786771

In [21]:
lat_centers = ds_coarse.latitude.values
lon_centers = ds_coarse.longitude.values

# half‐width = 1° on each side of the center
lat_edges = np.concatenate((lat_centers - 1, [lat_centers[-1] + 1]))
lon_edges = np.concatenate((lon_centers - 1, [lon_centers[-1] + 1]))

In [22]:
def get_sub_prcp(args):
    i,j = args
    num_obj_arr = np.zeros((len(ds_prcp.time))
    
    ## array for total area
    tot_area_arr = np.zeros((len(ds_prcp.time))
    ## array to store mean cell size
    mean_obj_area_arr = np.zeros((len(ds_prcp.time))
    ## array to store convective area fraction
    area_frac_arr = np.zeros((len(ds_prcp.time))
    ## mean precipitation over convective area
    cvt_mean_prcp_arr = np.zeros((len(ds_prcp.time))
    ## mean precipitation over the entire radar scan
    tot_mean_prcp_arr = np.zeros((len(ds_prcp.time))
    ## total convective precipitation
    cvt_tot_prcp_arr = np.zeros((len(ds_prcp.time))
                                
    lat0, lat1 = lat_edges[i], lat_edges[i+1]
    lon0, lon1 = lon_edges[j], lon_edges[j+1]

    # extract fine data within that box
    sub = ds_prcp.sel(
        latitude = slice(lat0, lat1),
        longitude = slice(lon0, lon1)
    )
    for its in range(0,len(ds_prcp.time)):
        prcp = sub["pr"].isel(time=its)*3600
        cvt_prcp = np.sum(prcp.values[prcp.values>=pr_th])    
        cv_obj = prcp.copy().fillna(0)
        cv_obj.values[cv_obj.values < pr_th] = 0
        cv_obj.values[cv_obj.values >= pr_th] = 6
        ## use scikit learn to label
        label_arr = label(cv_obj)
        ## find unique objects
        unique_label = np.unique(label_arr)
        ## get the number of objects 
        num_obj_arr[its] = len(unique_label) - 1   ## the background is 0
        ## individual object area
        ind_obj_area = np.zeros(len(unique_label) - 1)
        for ilb in unique_label:
            if ilb == 0:
                continue
            else:
                ind_obj_area[ilb-1]= np.sum(label_arr == ilb)*25.0 ## the data is 5 km by 5 km so this is 25 km**2
                ## this may be needed for precip thresholds
                # if obj_area <=5:
                #     label_new[label_new==i] = 0
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=RuntimeWarning)
                
                tot_area_arr[its] = np.sum(ind_obj_area)
                mean_obj_area_arr[its] = tot_area_arr[its]/num_obj_arr[its]
                area_frac_arr[its] = tot_area_arr[its]/domain_area
            
                cvt_mean_prcp_arr[its] = cvt_prcp/tot_area_arr[its]
                tot_mean_prcp_arr[its] = cvt_prcp/domain_area
                cvt_tot_prcp_arr[its] = cvt_prcp
    return num_obj_arr, tot_area_arr, mean_obj_area_arr, area_frac_arr, cvt_mean_prcp_arr, tot_mean_prcp_arr, cvt_tot_prcp_arr

In [25]:
num_obj_3d = np.zeros((len(ds_prcp.time), ds_coarse.latitude.shape[0], ds_coarse.longitude.shape[0]))
    
## array for total area
tot_area_3d = np.zeros((len(ds_prcp.time), ds_coarse.latitude.shape[0], ds_coarse.longitude.shape[0]))
## array to store mean cell size
mean_obj_area_3d = np.zeros((len(ds_prcp.time), ds_coarse.latitude.shape[0], ds_coarse.longitude.shape[0]))
## array to store convective area fraction
area_frac_3d = np.zeros((len(ds_prcp.time), ds_coarse.latitude.shape[0], ds_coarse.longitude.shape[0]))
## mean precipitation over convective area
cvt_mean_prcp_3d = np.zeros((len(ds_prcp.time), ds_coarse.latitude.shape[0], ds_coarse.longitude.shape[0]))
## mean precipitation over the entire radar scan
tot_mean_prcp_3d = np.zeros((len(ds_prcp.time), ds_coarse.latitude.shape[0], ds_coarse.longitude.shape[0]))
## total convective precipitation
cvt_tot_prcp_3d = np.zeros((len(ds_prcp.time), ds_coarse.latitude.shape[0], ds_coarse.longitude.shape[0]))

In [28]:
args_list = [(i, j) for i in range(len(lat_edges)-1) for j in range(len(lon_edges)-1)]

# Set the number of processes (adjust max_pool based on your system and level count)
max_pool = 10

# Use Pool to parallelize the work across multiple processes
with Pool(max_pool) as p:
    pool_outputs = list(
        tqdm(
            p.imap(get_sub_prcp, args_list),
            total=len(args_list),
            position=0, leave=True
        )
    )

100%|██████████| 325/325 [12:14<00:00,  2.26s/it] 


In [14]:
## save to netcdf
out = xr.Dataset(
    {
        "num_obj": (("time","latitude","longitude"), num_obj_arr),
        "tot_area":  (("time","latitude","longitude"), tot_area_arr),
        "mean_obj_area":  (("time","latitude","longitude"), mean_obj_area_arr),
        "area_frac":  (("time","latitude","longitude"), area_frac_arr),
        "cvt_mean_prcp":  (("time","latitude","longitude"), cvt_mean_prcp_arr),
        "tot_mean_prcp":  (("time","latitude","longitude"), tot_mean_prcp_arr),
        "cvt_tot_prcp":  (("time","latitude","longitude"), cvt_tot_prcp_arr),
        
    },
    coords={
        "time":  ds_prcp.time.values,
        "latitude": ds_coarse.latitude.values,
        "longitude":ds_coarse.longitude.values,
    },
)
out.to_netcdf(f"/scratch/nf33/hk25_LSP/UK/MC_chunk/number_size_hourly.nc")
out.close()
ds_prcp.close()