CAMS

- mean, median, std, min, max for PM10 values by year and year-month

- maps for both results


Yearly

In [None]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\ERA5\Precipitation\1raw_year_1979_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store yearly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'valid_time': 500})  # Rechunk after loading 

    # Extract the year from the 'valid_time' dimension
    year = ds['valid_time'].dt.year.values[0]  # Extract the year from the first time step
    
    # Get the total precipitation data as a numpy array
    precip_data = ds['tp'].values  # Numpy array (all time steps)

    # Compute statistics using numpy's nan functions
    mean = np.nanmean(precip_data, axis=0)
    median = np.nanmedian(precip_data, axis=0)
    std = np.nanstd(precip_data, axis=0)
    max_ = np.nanmax(precip_data, axis=0)
    min_ = np.nanmin(precip_data, axis=0)
    total = np.nansum(precip_data, axis=0)  # Compute total precipitation
    
    # Combine results into a new dataset
    stats = xr.Dataset({
        'Mean': (['latitude', 'longitude'], mean),
        'Median': (['latitude', 'longitude'], median),
        'Std': (['latitude', 'longitude'], std),
        'Max': (['latitude', 'longitude'], max_),
        'Min': (['latitude', 'longitude'], min_),
        'Total_Precipitation': (['latitude', 'longitude'], total)  # Add total precipitation
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = year  # Add the year directly from valid_time
    stats_df = stats_df.set_index(['Year', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all years into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)


In [None]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\ERA5\Precipitation\yearly_precipitation_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")


Monthly

In [None]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\ERA5\Precipitation\1raw_month_1979_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store monthly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'valid_time': 500})  # Rechunk after loading 
    
    # Extract year and month from the 'valid_time' dimension
    ds.coords['year'] = ds['valid_time'].dt.year
    ds.coords['month'] = ds['valid_time'].dt.month
    
    # Get the total precipitation data as a numpy array (time, latitude, longitude)
    precip_data = ds['tp'].values
    
    # Calculate statistics using numpy's nan functions along the 'valid_time' dimension
    mean = np.nanmean(precip_data, axis=0)
    median = np.nanmedian(precip_data, axis=0)
    std = np.nanstd(precip_data, axis=0)
    max_ = np.nanmax(precip_data, axis=0)
    min_ = np.nanmin(precip_data, axis=0)
    total = np.nansum(precip_data, axis=0)  # Compute total precipitation for the month
    
    # Create a new xarray Dataset for the statistics
    stats = xr.Dataset({
        'Mean_Total_Precipitation': (['latitude', 'longitude'], mean),
        'Median_Total_Precipitation': (['latitude', 'longitude'], median),
        'Std_Total_Precipitation': (['latitude', 'longitude'], std),
        'Max_Total_Precipitation': (['latitude', 'longitude'], max_),
        'Min_Total_Precipitation': (['latitude', 'longitude'], min_),
        'Total_Precipitation': (['latitude', 'longitude'], total)  # Add total precipitation
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})
    
    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = ds['year'].values[0]  # Add the year directly from valid_time
    stats_df['Month'] = ds['month'].values[0]  # Add the month directly from valid_time
    stats_df = stats_df.set_index(['Year', 'Month', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all months into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)


In [None]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'Month', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\ERA5\Precipitation\monthly_precipitation_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")
