CAMS

- mean, median, std, min, max for PM10 values by year and year-month

- maps for both results


Yearly

In [1]:
import xarray as xr
import pandas as pd
import os
import numpy as np
from collections import defaultdict

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\CAMS\chem_singlvl\5compile_2003_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store yearly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'time': 500})  # Rechunk after loading 

    # Extract the year from the 'time' dimension
    year = ds['time'].dt.year.values[0]  # Extract the year from the first time step
    
    # Get the total pm10 data as a numpy array
    pm10_data = ds['pm10'].values  # Numpy array (all time steps)

    # Compute statistics using numpy's nan functions
    mean = np.nanmean(pm10_data, axis=0)
    median = np.nanmedian(pm10_data, axis=0)
    std = np.nanstd(pm10_data, axis=0)
    max_ = np.nanmax(pm10_data, axis=0)
    min_ = np.nanmin(pm10_data, axis=0)
    total = np.nansum(pm10_data, axis=0)
    
    # Combine results into a new dataset
    stats = xr.Dataset({
        'Mean': (['latitude', 'longitude'], mean),
        'Median': (['latitude', 'longitude'], median),
        'Std': (['latitude', 'longitude'], std),
        'Max': (['latitude', 'longitude'], max_),
        'Min': (['latitude', 'longitude'], min_),
        'Total': (['latitude', 'longitude'], total)
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = year  # Add the year directly from time
    stats_df = stats_df.set_index(['Year', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all years into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)


                              Mean     Median        Std         Max  \
Year latitude longitude                                                
2003 66.0     -12.00     20.295852  12.842081  22.223663  177.644418   
              -11.25     20.507330  12.934638  22.302052  169.945542   
              -10.50     20.570728  13.216716  22.064666  154.547792   
              -9.75      20.687042  13.316499  22.074636  174.438268   
              -9.00      20.743595  13.323997  21.937401  184.069829   
...                            ...        ...        ...         ...   
2024 34.5      33.00     15.479365  11.721056  14.352417  133.125809   
               33.75     15.329890  11.591623  14.241758  119.668472   
               34.50     15.801939  11.715542  14.993496  102.095894   
               35.25     23.081898  17.032959  20.566196  142.414825   
               36.00     47.364857  35.898323  33.837379  191.491043   

                                  Min          Total  
Year lat

In [2]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\CAMS\chem_singlvl\yearly_pm10_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")


Dataset saved to: E:\IPMA\CAMS\chem_singlvl\yearly_pm10_stats.nc


In [3]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\CAMS\chem_singlvl\yearly_pm10_stats.nc"
output_folder = r"E:\IPMA\CAMS\chem_singlvl\maps_year\pm10"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Mean'
title = 'Mean Concentration (µg/m³)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Projection (assume data is in PlateCarree, common for lat/lon grids)
data_crs = ccrs.PlateCarree()

# Loop through years
for year in ds['Year'].values:
    fig, ax = plt.subplots(figsize=(8, 6),
                           subplot_kw={'projection': ccrs.PlateCarree()})

    # Select data for current year
    data = ds[var].sel(Year=year)

    # Plot the data
    im = ax.pcolormesh(ds['longitude'], ds['latitude'], data,
                       cmap=cmap, vmin=vmin, vmax=vmax,
                       shading='auto', transform=data_crs)

    # Add features (land, ocean, borders)
    ax.coastlines(resolution='10m')
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.LAND, facecolor='lightgray')
    ax.add_feature(cfeature.OCEAN, facecolor='lightblue')
    ax.set_extent([-12, 36, 34, 66], crs=data_crs)  # Adjust to the working area

    # Gridlines (bottom and left only)
    gl = ax.gridlines(draw_labels=True, linewidth=0)
    gl.top_labels = False
    gl.right_labels = False
    gl.bottom_labels = True
    gl.left_labels = True

    # Title and colorbar
    ax.set_title(f'{title} - {year}', fontsize=12)

    # Create a colorbar with more space
    cbar = fig.colorbar(im, ax=ax, orientation='horizontal', label='µg/m³', shrink=0.7, pad=0.1)
    
    # Adjust the colorbar's position
    cbar.ax.yaxis.set_ticks_position('right')  # Put colorbar ticks on the right side

    # Save the plot
    plt.tight_layout()
    plt.savefig(os.path.join(output_folder, f"mean_concentration_pm10_{year}.png"), dpi=150)
    plt.close(fig)

print("✅ Mean temperature maps with adjusted colorbar spacing generated!")


✅ Mean temperature maps with adjusted colorbar spacing generated!


Monthly

In [4]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\CAMS\chem_singlvl\4conversion_2003_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store monthly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'time': 500})  # Rechunk after loading 
    
    # Extract year and month from the 'time' dimension
    ds.coords['year'] = ds['time'].dt.year
    ds.coords['month'] = ds['time'].dt.month
    
    # Get the total pm10 data as a numpy array (time, latitude, longitude)
    pm10_data = ds['pm10'].values
    
    # Calculate statistics using numpy's nan functions along the 'time' dimension
    mean = np.nanmean(pm10_data, axis=0)
    median = np.nanmedian(pm10_data, axis=0)
    std = np.nanstd(pm10_data, axis=0)
    max_ = np.nanmax(pm10_data, axis=0)
    min_ = np.nanmin(pm10_data, axis=0)
    total = np.nansum(pm10_data, axis=0)
    
    # Create a new xarray Dataset for the statistics
    stats = xr.Dataset({
        'Mean': (['latitude', 'longitude'], mean),
        'Median': (['latitude', 'longitude'], median),
        'Std': (['latitude', 'longitude'], std),
        'Max': (['latitude', 'longitude'], max_),
        'Min': (['latitude', 'longitude'], min_),
        'Total': (['latitude', 'longitude'], total)
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})
    
    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = ds['year'].values[0]  # Add the year directly from time
    stats_df['Month'] = ds['month'].values[0]  # Add the month directly from time
    stats_df = stats_df.set_index(['Year', 'Month', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all months into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)
df_final


                                    Mean     Median        Std         Max  \
Year Month latitude longitude                                                
2003 1     66.0     -12.00     19.548701  12.585088  21.477068  115.480764   
                    -11.25     20.100821  12.585088  22.348073  126.052238   
                    -10.50     20.722362  13.742916  23.231124  136.220989   
                    -9.75      21.284631  14.145639  23.579544  139.744813   
                    -9.00      21.881408  14.649042  23.886618  148.504034   
...                                  ...        ...        ...         ...   
2024 11    34.5      33.00     11.832513   7.933011  10.562286   58.037811   
                     33.75     14.390242   9.158271  13.488760   71.894760   
                     34.50     16.564747   9.806229  16.945272   83.909073   
                     35.25     22.121136  14.958431  19.844738   93.102242   
                     36.00     54.227848  47.056458  33.106899  

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Mean,Median,Std,Max,Min,Total
Year,Month,latitude,longitude,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2003,1,66.0,-12.00,19.548701,12.585088,21.477068,115.480764,0.503404,4848.077808
2003,1,66.0,-11.25,20.100821,12.585088,22.348073,126.052238,0.704765,4985.003561
2003,1,66.0,-10.50,20.722362,13.742916,23.231124,136.220989,1.107488,5139.145715
2003,1,66.0,-9.75,21.284631,14.145639,23.579544,139.744813,1.409530,5278.588486
2003,1,66.0,-9.00,21.881408,14.649042,23.886618,148.504034,1.510211,5426.589117
...,...,...,...,...,...,...,...,...,...
2024,11,34.5,33.00,11.832513,7.933011,10.562286,58.037811,0.266539,2839.802979
2024,11,34.5,33.75,14.390242,9.158271,13.488760,71.894760,0.170985,3453.657959
2024,11,34.5,34.50,16.564747,9.806229,16.945272,83.909073,0.002615,3975.539307
2024,11,34.5,35.25,22.121136,14.958431,19.844738,93.102242,0.425644,5309.072754


In [5]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'Month', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\CAMS\chem_singlvl\monthly_pm10_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")

df_final_xr


Dataset saved to: E:\IPMA\CAMS\chem_singlvl\monthly_pm10_stats.nc


In [6]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\CAMS\chem_singlvl\monthly_pm10_stats.nc"
output_folder = r"E:\IPMA\CAMS\chem_singlvl\maps_month\pm10"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Mean'
title = 'Mean Concentration (µg/m³)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Coordinates
lat = ds['latitude']
lon = ds['longitude']

# Loop through all years and months
for year in ds['Year'].values:
    for month in ds['Month'].values:
        data = ds[var].sel(Year=year, Month=month)

        # Skip empty or all-NaN values
        if np.isnan(data).all():
            continue

        # Plotting
        fig = plt.figure(figsize=(7, 6))
        ax = plt.axes(projection=ccrs.PlateCarree())
        ax.set_extent([-12, 36, 34, 66], crs=ccrs.PlateCarree())

        im = ax.pcolormesh(lon, lat, data, cmap=cmap, vmin=vmin, vmax=vmax,
                           shading='auto', transform=ccrs.PlateCarree())

        ax.set_title(f'{title} - {year} / {month:02d}', fontsize=12)
        ax.coastlines(resolution='10m', linewidth=0.8)
        ax.add_feature(cfeature.BORDERS, linewidth=0.5)
        ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='none')

        # Gridlines (bottom and left only)
        gl = ax.gridlines(draw_labels=True, linewidth=0)
        gl.top_labels = False
        gl.right_labels = False
        gl.bottom_labels = True
        gl.left_labels = True

        # Colorbar
        cbar = plt.colorbar(im, ax=ax, orientation='horizontal', shrink=0.7, pad=0.1)
        cbar.set_label('µg/m³')
        cbar.ax.tick_params(labelsize=8)

        # Save figure
        plt.tight_layout()
        filename = f"mean_concentration_pm10_{year}{month:02d}.png"
        plt.savefig(os.path.join(output_folder, filename), dpi=150)
        plt.close(fig)

print("✅ Monthly temperature maps generated for all years and months!")


✅ Monthly temperature maps generated for all years and months!


In [7]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\CAMS\chem_singlvl\monthly_pm10_stats.nc"
output_folder = r"E:\IPMA\CAMS\chem_singlvl\maps_month\pm10"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Mean'
title = 'Mean Concentration (µg/m³)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Coordinates
lat = ds['latitude']
lon = ds['longitude']

# Loop through all years
for year in ds['Year'].values:
    fig, axs = plt.subplots(3, 4, figsize=(15, 10),
                            subplot_kw={'projection': ccrs.PlateCarree()})
    fig.suptitle(f'{title} - {year}', fontsize=16)

    for i, month in enumerate(ds['Month'].values):
        row, col = divmod(i, 4)
        ax = axs[row, col]

        # Extract data
        data = ds[var].sel(Year=year, Month=month)

        # Skip if no data
        if np.isnan(data).all():
            ax.set_visible(False)
            continue

        ax.set_extent([-12, 36, 34, 66], crs=ccrs.PlateCarree())

        im = ax.pcolormesh(lon, lat, data, cmap=cmap, vmin=vmin, vmax=vmax,
                           shading='auto', transform=ccrs.PlateCarree())

        ax.set_title(f'Month {month:02d}', fontsize=10)
        ax.coastlines(resolution='10m', linewidth=0.6)
        ax.add_feature(cfeature.BORDERS, linewidth=0.4)
        ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='none')

        # Gridlines (bottom and left only)
        gl = ax.gridlines(draw_labels=True, linewidth=0)
        gl.top_labels = False
        gl.right_labels = False
        gl.bottom_labels = row == 2
        gl.left_labels = col == 0

    # Add a single colorbar for all subplots
    cbar_ax = fig.add_axes([0.92, 0.25, 0.015, 0.5])
    cbar = fig.colorbar(im, cax=cbar_ax)
    cbar.set_label('µg/m³')

    # Save figure
    plt.tight_layout(rect=[0, 0, 0.9, 0.95])
    filename = f"mean_concentration_pm10_year_{year}.png"
    plt.savefig(os.path.join(output_folder, filename), dpi=150)
    plt.close(fig)

print("✅ Yearly temperature grids generated (3x4 months per year)!")


  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])


✅ Yearly temperature grids generated (3x4 months per year)!


Daily

In [1]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\CAMS\chem_singlvl\4conversion_2003_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store monthly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'time': 500})  # Rechunk after loading 

    # Group by day
    daily_grouped = ds.groupby('time.date')  # Group by each unique date
    
    # Calculate statistics using numpy's nan functions along the 'time' dimension
    for date, daily_data in daily_grouped:
        pm10_data = daily_data['pm10'].values
        
        mean = np.nanmean(pm10_data, axis=0)
        median = np.nanmedian(pm10_data, axis=0)
        std = np.nanstd(pm10_data, axis=0)
        max_ = np.nanmax(pm10_data, axis=0)
        min_ = np.nanmin(pm10_data, axis=0)
        total = np.nansum(pm10_data, axis=0)
    
        # Create a new xarray Dataset for the statistics
        stats = xr.Dataset({
            'Mean': (['latitude', 'longitude'], mean),
            'Median': (['latitude', 'longitude'], median),
            'Std': (['latitude', 'longitude'], std),
            'Max': (['latitude', 'longitude'], max_),
            'Min': (['latitude', 'longitude'], min_),
            'Total': (['latitude', 'longitude'], total)
        }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

        # Convert to DataFrame and restructure
        stats_df = stats.to_dataframe().reset_index()
        date_pd = pd.to_datetime(str(date))
        stats_df['Year'] = date_pd.year
        stats_df['Month'] = date_pd.month
        stats_df['Day'] = date_pd.day
        stats_df = stats_df.set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])
    
        # Append to list
        df_list.append(stats_df)
        
# Concatenate all months into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)
df_final


                                        Mean     Median        Std        Max  \
Year Month Day latitude longitude                                               
2003 1     1   66.0     -12.00      4.794918   3.976888   2.795561  10.772835   
                        -11.25      5.524853   4.379611   3.490020  12.081684   
                        -10.50      6.229618   4.631312   3.933459  12.786449   
                        -9.75       6.896628   4.631312   4.333950  13.994617   
                        -9.00       7.374861   5.789140   4.552880  14.699382   
...                                      ...        ...        ...        ...   
2024 11    30  34.5      33.00      5.754870   6.455139   2.253540   8.231381   
                         33.75      6.666454   7.218318   3.297368  10.541498   
                         34.50     10.310188  12.764701   4.004848  13.701992   
                         35.25     17.556248  18.695858   4.033917  22.087761   
                         36.

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Mean,Median,Std,Max,Min,Total
Year,Month,Day,latitude,longitude,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2003,1,1,66.0,-12.00,4.794918,3.976888,2.795561,10.772835,1.711572,38.359347
2003,1,1,66.0,-11.25,5.524853,4.379611,3.490020,12.081684,1.812253,44.198828
2003,1,1,66.0,-10.50,6.229618,4.631312,3.933459,12.786449,2.315656,49.836947
2003,1,1,66.0,-9.75,6.896628,4.631312,4.333950,13.994617,2.214975,55.173024
2003,1,1,66.0,-9.00,7.374861,5.789140,4.552880,14.699382,2.517018,58.998891
...,...,...,...,...,...,...,...,...,...,...
2024,11,30,34.5,33.00,5.754870,6.455139,2.253540,8.231381,2.563411,46.038963
2024,11,30,34.5,33.75,6.666454,7.218318,3.297368,10.541498,1.820695,53.331635
2024,11,30,34.5,34.50,10.310188,12.764701,4.004848,13.701992,3.035666,82.481506
2024,11,30,34.5,35.25,17.556248,18.695858,4.033917,22.087761,9.168048,140.449982


In [2]:
# Convert the pandas DataFrame to xarray directly
df_final_xr = xr.Dataset.from_dataframe(df_final)

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\CAMS\chem_singlvl\daily_pm10_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")

df_final_xr


Dataset saved to: E:\IPMA\CAMS\chem_singlvl\daily_pm10_stats.nc
