In [2]:
# Cell 0: split a .nc file into 24-hour (datewise) chunks and write to out/ folder.
# If a day's data is missing, an empty netCDF (time length 0) is written for that date.

import os
from pathlib import Path
import numpy as np
import pandas as pd
import xarray as xr

def split_nc_by_day(file_path, out_dir="out", time_coord=None, date_format="%Y-%m-%d"):
    """
    Split a NetCDF file into daily files (one file per calendar date).
    If a date has no samples, a file is still written with the time dimension length 0.
    Args:
      file_path: path to input .nc file
      out_dir: directory where daily .nc files will be written (created if needed)
      time_coord: name of the time coordinate (auto-detected if None)
      date_format: filename date format, default "YYYY-MM-DD"
    """
    file_path = Path(file_path)
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    ds = xr.open_dataset(file_path, decode_times=True, use_cftime=False)

    # detect time coordinate if not provided
    if time_coord is None:
        time_coord = None
        for name, coord in ds.coords.items():
            try:
                if np.issubdtype(coord.dtype, np.datetime64):
                    time_coord = name
                    break
            except Exception:
                pass
        if time_coord is None:
            # fallback: look for a coordinate literally named 'time'
            if "time" in ds.coords:
                time_coord = "time"
    if time_coord is None:
        ds.close()
        raise ValueError("Could not detect a datetime coordinate. Provide time_coord explicitly.")

    times = pd.to_datetime(ds[time_coord].values)  # pandas datetime index/array

    if len(times) == 0:
        ds.close()
        raise ValueError("Input dataset has zero length time coordinate; nothing to split.")

    start_day = pd.to_datetime(times.min()).normalize()
    end_day = pd.to_datetime(times.max()).normalize()

    for day in pd.date_range(start_day, end_day, freq="D"):
        # mask times for this calendar day: >= day and < next day
        next_day = day + pd.Timedelta(days=1)
        mask = (times >= day) & (times < next_day)

        if mask.any():
            indices = np.nonzero(mask)[0]
            out_ds = ds.isel({time_coord: indices})
        else:
            # create an "empty" dataset with time dimension zero-length
            # keep other coords/variables but select zero elements along time dimension
            out_ds = ds.isel({time_coord: []})

        out_path = out_dir / f"{day.strftime(date_format)}.nc"
        # Write file (overwrite if exists)
        out_ds.to_netcdf(out_path)
        print(f"Wrote {out_path} (records: {int(mask.sum())})")

    ds.close()

# Example usage:
# split_nc_by_day("input_file.nc", out_dir="out")

In [None]:
# Use forward slashes to avoid escaping issues on Windows paths
split_nc_by_day(
	"E:/RAWDATA/Weather Data/High Resolution Rapid Refresh (HRRR)/hrrr_20190111-20190131.nc",
	out_dir="out"
)

Example usage:
    time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
    ds = xr.open_dataset(decode_times=time_coder)

  ds = xr.open_dataset(file_path, decode_times=True, use_cftime=False)
Example usage:
    time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
    ds = xr.open_dataset(decode_times=time_coder)

  ds = xr.open_dataset(file_path, decode_times=True, use_cftime=False)


Wrote E:\RAWDATA\Weather Data\High Resolution Rapid Refresh (HRRR)\out\2019-01-11.nc (records: 24)
Wrote E:\RAWDATA\Weather Data\High Resolution Rapid Refresh (HRRR)\out\2019-01-12.nc (records: 24)
Wrote E:\RAWDATA\Weather Data\High Resolution Rapid Refresh (HRRR)\out\2019-01-13.nc (records: 24)
Wrote E:\RAWDATA\Weather Data\High Resolution Rapid Refresh (HRRR)\out\2019-01-14.nc (records: 24)
Wrote E:\RAWDATA\Weather Data\High Resolution Rapid Refresh (HRRR)\out\2019-01-15.nc (records: 24)
Wrote E:\RAWDATA\Weather Data\High Resolution Rapid Refresh (HRRR)\out\2019-01-16.nc (records: 24)
Wrote E:\RAWDATA\Weather Data\High Resolution Rapid Refresh (HRRR)\out\2019-01-17.nc (records: 24)
Wrote E:\RAWDATA\Weather Data\High Resolution Rapid Refresh (HRRR)\out\2019-01-18.nc (records: 24)
Wrote E:\RAWDATA\Weather Data\High Resolution Rapid Refresh (HRRR)\out\2019-01-19.nc (records: 24)
Wrote E:\RAWDATA\Weather Data\High Resolution Rapid Refresh (HRRR)\out\2019-01-20.nc (records: 24)
Wrote E:\R