In [11]:
import os
import xarray as xr

# Directory where your files are stored
data_dir = '../Data/ALAN'

# List all files in the directory
all_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.nc')]

# Group files by month based on the filename pattern `Month-XX`
monthly_files = {}
for file_path in all_files:
    # Extract month from the file name
    month = os.path.basename(file_path).split('_')[4].split('-')[1]  # Adjust as per your filename format
    if month not in monthly_files:
        monthly_files[month] = []
    monthly_files[month].append(file_path)

# Set chunk size for Dask
chunk_size = {'time': -1}  # Adjust this based on your data; `-1` keeps existing chunk sizes

# Merge files by month and save as NetCDF using lazy loading and optimization
for month, files in monthly_files.items():
    print(month)
    
    # Merge files using Dask's parallelism with optimized chunking
    combined = xr.open_mfdataset(files, combine='nested', parallel=True, chunks=chunk_size, compat='minimal')

    # Create a new variable 'risk_level' based on 'z_thresh'
    # combined['risk_level'] = (combined['z_thresh'] >= 10)#.astype(int)

    # Drop the 'z_thresh' variable from the dataset
    # combined = combined.drop_vars('z_thresh')

    # Optimize writing by limiting compression level for speed
    encoding = {var: {"zlib": True, "complevel": 1} for var in combined.data_vars}
    
    # Save only the 'risk_level' variable to a NetCDF file
    combined_file = os.path.join(data_dir, f"combined_month_{month}_risk_level.nc")
    combined.to_netcdf(combined_file, encoding=encoding, mode='w')
    print(f"Saved combined file for Month-{month} as {combined_file}")

# Optional: Close Dask distributed client if one is used
# client.close()
combine='by_coords',
            parallel=True,
            preprocess=preprocess,
            join='outer'  # Use outer join to merge different regions
        )

        # If needed, handle NaN values here
        # combined = combined.fillna(0)  # Example to fill NaNs with zero

        # Optimize writing by limiting compression level for speed
        encoding = {var: {"zlib": True, "complevel": 1} for var in combined.data_vars}

        # Save to NetCDF file
        combined_file = os.path.join(data_dir, f"combined_month_{month}_risk_level.nc")
        combined.to_netcdf(combined_file, encoding=encoding, mode='w')
        print(f"Saved combined file for Month-{month} as {combined_file}")
    
    except Exception as e:
        print(f"Failed to process month {month} due to error: {e}")


Processing month: 02
Failed to process month 02 due to error: NetCDF: Not a valid ID
Processing month: 12
Failed to process month 12 due to error: Cannot specify both coords='different' and compat='override'.
Processing month: 01
Failed to process month 01 due to error: NetCDF: Not a valid ID
Processing month: 08


HDF5-DIAG: Error detected in HDF5 (1.14.3) thread 2:
  #000: H5A.c line 2397 in H5Aexists(): can't synchronously check if attribute exists
    major: Attribute
    minor: Can't get value
  #001: H5A.c line 2364 in H5A__exists_api_common(): can't set object access arguments
    major: Attribute
    minor: Can't set value
  #002: H5VLint.c line 2634 in H5VL_setup_self_args(): invalid location identifier
    major: Invalid arguments to routine
    minor: Inappropriate type
  #003: H5VLint.c line 1733 in H5VL_vol_object(): invalid identifier
    major: Invalid arguments to routine
    minor: Inappropriate type
HDF5-DIAG: Error detected in HDF5 (1.14.3) thread 2:
  #000: H5A.c line 679 in H5Aopen_by_name(): unable to synchronously open attribute
    major: Attribute
    minor: Can't open object
  #001: H5A.c line 633 in H5A__open_by_name_api_common(): can't set object access arguments
    major: Attribute
    minor: Can't set value
  #002: H5VLint.c line 2677 in H5VL_setup_name_args(): inva

Failed to process month 08 due to error: Cannot specify both coords='different' and compat='override'.
Processing month: 03
Failed to process month 03 due to error: conflicting sizes for dimension 'lat': length 3600 on 'lat' and length 4800 on {'time': 'z_thresh', 'lat': 'z_thresh', 'lon': 'z_thresh'}
Processing month: 06
Failed to process month 06 due to error: Cannot specify both coords='different' and compat='override'.
Processing month: 05
Failed to process month 05 due to error: Cannot specify both coords='different' and compat='override'.
Processing month: 09
Failed to process month 09 due to error: Cannot specify both coords='different' and compat='override'.
Processing month: 07
Failed to process month 07 due to error: Cannot specify both coords='different' and compat='override'.
Processing month: 10
Failed to process month 10 due to error: Cannot specify both coords='different' and compat='override'.
Processing month: 11
Failed to process month 11 due to error: Cannot specify 

In [1]:
import numpy as np
import xarray as xr
import os

# Define the global grid parameters
lon_min, lon_max = -174.995833, 179.995833
lat_min, lat_max = -60.0, 84.99583333
resolution = 0.00833333

# Create the latitude and longitude arrays
lons = np.arange(lon_min, lon_max, resolution)
lats = np.arange(lat_min, lat_max, resolution)

# Create a mesh grid for latitude and longitude
lon_grid, lat_grid = np.meshgrid(lons, lats)


In [7]:
# Directory where your files are stored
data_dir = '../Data/ALAN'

# List all files in the directory
all_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.nc')]

# Group files by month based on the filename pattern `Month-XX`
monthly_files = {}
for file_path in all_files:
    # Extract month from the file name
    month = os.path.basename(file_path).split('_')[4].split('-')[1]  # Adjust as per your filename format
    if month not in monthly_files:
        monthly_files[month] = []
    monthly_files[month].append(file_path)

# Process each month's files and overlay them on the global grid
for month, files in monthly_files.items():
    print(f"Processing month: {month}")

    # Create an empty xarray DataArray to hold the global grid for this month
    global_grid = xr.DataArray(
        np.full((len(lats), len(lons)), np.nan),  # Initialize with NaNs
        coords={'lat': lats, 'lon': lons},
        dims=['lat', 'lon']
    )

    # Load each tile and interpolate onto the global grid
    for file in files:
        print(file)
        ds = xr.open_dataset(file)
        
        # Reindex to match the global grid using interpolation
        ds_interp = ds.interp(lat=lats, lon=lons, method="nearest")

        # Overlay the interpolated values onto the global grid
        global_grid = xr.where(np.isnan(global_grid), ds_interp, global_grid)

    # Define compression settings for all variables
    encoding = {var: {"zlib": True, "complevel": 4} for var in global_grid.data_vars}

    # Save the global grid to a NetCDF file with compression
    global_grid.to_netcdf(f"../Data/ALAN/global_month_{month}.nc", mode='w', encoding=encoding)
    print(f"Saved compressed global map for Month-{month} as ../Data/ALAN/global_month_{month}.nc")

Processing month: 02
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month-02_0S_20N_-120W_-60E_CAm.nc
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month-02_0S_30N_30W_60E_MidE_NInd.nc
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month-02_20S_85N_-32W_55E_EuropeMed.nc
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month-02_0S_30N_-26W_20E_NAfr.nc
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month-02_20S_85N_100W_180E_PacRim.nc
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month-02_0S_30N_60W_100E_NInd_FarE.nc
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month-02_-60S_0N_-90W_-30E_SAm.nc
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month-02_-40S_0N_0W_70E_SAfr.nc
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month-02_-10S_30N_100W_130E_FarE_Isl.nc
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month-02_-50S_0N_100W_180E_Oceania.nc
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month-02_20S_85N_-175W_-50E_NAm.nc
Saved compressed global map for Month-02 as ../Data/ALAN/global_month_02.nc
Processing month: 12
../Data/ALAN/In-water_clear-sky_ALAN_Zc_Month

In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import imageio
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# Load the dataset
combined = xr.open_mfdataset("../Data/ALAN/global_month_*.nc")

# Create a directory to store the images
os.makedirs("../Data/frames", exist_ok=True)

# Iterate through each time step
filenames = []
for i, time in enumerate(combined.time):
    print(time)
    plt.figure(figsize=(8, 6))
    
    # Create a GeoAxes with a PlateCarree projection
    ax = plt.axes(projection=ccrs.PlateCarree())
    
    # Set the background color to navy blue
    ax.set_facecolor('navy')
    
    # Plot with the 'cividis' colormap
    combined['z_thresh'].sel(time=time).plot(ax=ax, vmin=0, vmax=10, cmap='cividis', transform=ccrs.PlateCarree())
    
    # Add coastlines
    ax.coastlines()
    
    # Optionally, add other features like borders or gridlines
    ax.add_feature(cfeature.BORDERS, linestyle=':')
    ax.gridlines(draw_labels=True)
    
    plt.title(f'Time: {str(time.values)}')
    filename = f"../Data/frames/frame_{i:03d}.png"
    plt.savefig(filename, bbox_inches='tight')  # Ensure the plot fits well in the saved image
    filenames.append(filename)
    plt.close()

# Create a GIF
with imageio.get_writer('pollution_3_light.gif', mode='I', duration=0.5) as writer:
    for filename in filenames:
        image = imageio.imread(filename)
        writer.append_data(image)

# Clean up the frames
for filename in filenames:
    os.remove(filename)

<xarray.DataArray 'time' ()> Size: 8B
array('2019-01-01T00:00:00.000000000', dtype='datetime64[ns]')
Coordinates:
    time     datetime64[ns] 8B 2019-01-01


In [6]:
combined = xr.open_mfdataset("../Data/ALAN/global_month_*.nc")
combined

combined['risk_level'] = (combined['z_thresh'] >= 10)#.astype(int)

# Drop the 'z_thresh' variable from the dataset
combined = combined.drop_vars('z_thresh')
combined

Unnamed: 0,Array,Chunk
Bytes,8.28 GiB,11.05 MiB
Shape,"(17400, 42600, 12)","(2175, 5325, 1)"
Dask graph,768 chunks in 26 graph layers,768 chunks in 26 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 8.28 GiB 11.05 MiB Shape (17400, 42600, 12) (2175, 5325, 1) Dask graph 768 chunks in 26 graph layers Data type bool numpy.ndarray",12  42600  17400,

Unnamed: 0,Array,Chunk
Bytes,8.28 GiB,11.05 MiB
Shape,"(17400, 42600, 12)","(2175, 5325, 1)"
Dask graph,768 chunks in 26 graph layers,768 chunks in 26 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray


In [None]:
test['z_thresh']