In [5]:
import pandas as pd
import numpy as np
import os
import xarray as xr
import h5py
import glob

# Create a 30min datetime index and a daily datetime index
dates       = pd.date_range(start="2000-06-01", end="2023-08-31", freq="30min")
dates_daily = pd.date_range(start="2000-06-01", end="2023-08-31", freq="D")

import time

start_time = time.time()
# Process one day as an example (remove slicing to process all days)
# for i in range(1):
for i in range(np.size(dates)):        
    day_str = dates_daily[i].strftime("%Y%m%d")
    year  = dates_daily[i].year
    month = dates_daily[i].month
    day   = dates_daily[i].day

    # Get indices for this day
    posi = np.where((dates.year == year) & (dates.month == month) & (dates.day == day))[0]
    
    # Pre-allocate the daily precipitation array:
    # The number of time steps is len(posi) (should be 48 for a full day)
    # and the spatial dimensions are assumed to be 3600 x 1800.
    precip_daily = np.zeros((len(posi), 1800, 3600))
    
    # Loop over each 30min time for this day
    for count, j in enumerate(posi):
        # Construct filename pattern; adjust the wildcard as needed.
        filename = ('3B-HHR.MS.MRG.3IMERG.' +
                    f"{dates[j].year}{str(dates[j].month).zfill(2)}{str(dates[j].day).zfill(2)}" +
                    '-S' + f"{str(dates[j].hour).zfill(2)}{str(dates[j].minute).zfill(2)}" + '00-*')
        pattern = '/work/DATA/Satellite/IMERG_HDF5/' + filename
        
        # Use glob to handle wildcards instead of os.popen("ls ...")
        file_list = glob.glob(pattern)
        if not file_list:
            print(f"File does not exist for pattern: {pattern}")
            continue  # Skip if no file found
        
        # Take the first matching file
        fpath = file_list[0]
        #print("File exists:", fpath)
        with h5py.File(fpath, 'r') as f:
            # Access datasets under 'Grid'
            grid = f['Grid']
            precip_data = grid['precipitation']
            lat_data    = grid['lat']
            lon_data    = grid['lon']
            
            # Read data into numpy arrays
            precip = precip_data[:].T # shape (1800, 3600)
            lat    = lat_data[:]      # shape (1800,) or whatever the correct shape is
            lon    = lon_data[:]      # shape (3600,) or similar
            
            # Store the precipitation into the daily array
            precip[precip < 0]        = np.nan
            precip_daily[count, :, :] = np.squeeze(precip)
    
    # Create the time coordinate using the 30-min intervals for this day.
    # dates[posi] is already a DatetimeIndex with length equal to the number of time steps.
    time_coord = dates[posi]
    
    # Create an xarray DataArray for the daily precipitation data.
    # The data shape is (time, lat, lon) so we set dims accordingly.
    da = xr.DataArray(
        precip_daily,
        coords={"time": time_coord, "lat": lat, "lon": lon},
        dims=["time", "lat", "lon"]
    )
    
    # Convert DataArray to a Dataset for saving.
    ds = da.to_dataset(name="precipitation")
    
    # Save to a NetCDF file.
    out_filename = f"IMERG_{day_str}.nc"
    ds.to_netcdf(out_filename)
    os.system("/home/kaichiht/miniconda3/envs/cdoenv/bin/cdo ydaymean "+out_filename+" out.nc")
    os.system("/home/kaichiht/miniconda3/envs/cdoenv/bin/cdo setgrid,grid.txt out.nc temp.nc")
    os.system("sh regird.sh")
    os.system("mv out2.nc /data92/DATA/Satellite/IMERG_daily/"+out_filename)

    # os.system("mv out2.nc /data92/PeterChang/Paper2/IMERG_from_kai/"+out_filename) 
    os.system("rm *.nc")
    
    # os.system("/home/kaichiht/miniconda3/envs/cdoenv/bin/cdo ydaymean "+out_filename+" outfile.nc")
    
    print(f"Created daily NetCDF file: {out_filename}")
end_time = time.time()
print(f"Elapsed time: {end_time - start_time} seconds")

cdo    ydaymean: Processed 1 variable over 48 timesteps [1.40s 5011MB].
cdo    setgrid: Processed 1 variable over 1 timestep [0.07s 5011MB].


cdo    remapbil: Bilinear weights from lonlat (3600x1800) to lonlat (576x360) grid, with source mask (6450076)
Created daily NetCDF file: IMERG_20000601.nc
Elapsed time: 14.459201335906982 seconds


In [4]:
"/data92/PeterChang/Paper2/IMERG_from_kai/"+out_filename

'/data92/PeterChang/Paper2/IMERG_from_kai/IMERG_20000601.nc'