CAMS

- mean, median, std, min, max for O3 values by year and year-month

- maps for both results


Yearly

In [1]:
import xarray as xr
import pandas as pd
import os
import numpy as np
from collections import defaultdict

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\CAMS\chem_multlvl\5compile_2003_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store yearly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'time': 500})  # Rechunk after loading 

    # Extract the year from the 'time' dimension
    year = ds['time'].dt.year.values[0]  # Extract the year from the first time step
    
    # Get the total o3 data as a numpy array
    o3_data = ds['go3'].values  # Numpy array (all time steps)

    # Compute statistics using numpy's nan functions
    mean = np.nanmean(o3_data, axis=0)
    median = np.nanmedian(o3_data, axis=0)
    std = np.nanstd(o3_data, axis=0)
    max_ = np.nanmax(o3_data, axis=0)
    min_ = np.nanmin(o3_data, axis=0)
    total = np.nansum(o3_data, axis=0)
    
    # Combine results into a new dataset
    stats = xr.Dataset({
        'Mean': (['latitude', 'longitude'], mean),
        'Median': (['latitude', 'longitude'], median),
        'Std': (['latitude', 'longitude'], std),
        'Max': (['latitude', 'longitude'], max_),
        'Min': (['latitude', 'longitude'], min_),
        'Total': (['latitude', 'longitude'], total)
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = year  # Add the year directly from time
    stats_df = stats_df.set_index(['Year', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all years into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)


                                 Mean       Median           Std  \
Year latitude longitude                                            
2003 66.0     -12.00    -31516.619767  3625.113084  1.348125e+06   
              -11.25     -1498.350985  3661.240146  1.836055e+05   
              -10.50     35535.966247  3641.902913  1.653171e+06   
              -9.75     -12374.634084  3604.012202  6.246835e+05   
              -9.00       -894.340877  3561.052233  1.593578e+05   
...                               ...          ...           ...   
2024 34.5      33.00      1397.944336  1301.596924  3.729471e+02   
               33.75      1433.067505  1355.090088  4.046527e+02   
               34.50      1469.189697  1407.615479  4.091550e+02   
               35.25      1402.454834  1292.108154  4.363062e+02   
               36.00      1015.415100  1027.885498  5.469069e+02   

                                  Max           Min         Total  
Year latitude longitude                        

In [2]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\CAMS\chem_multlvl\yearly_o3_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")


Dataset saved to: E:\IPMA\CAMS\chem_multlvl\yearly_o3_stats.nc


In [3]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\CAMS\chem_multlvl\yearly_o3_stats.nc"
output_folder = r"E:\IPMA\CAMS\chem_multlvl\maps_year\o3"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Mean'
title = 'Mean Concentration (µg/m³)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Projection (assume data is in PlateCarree, common for lat/lon grids)
data_crs = ccrs.PlateCarree()

# Loop through years
for year in ds['Year'].values:
    fig, ax = plt.subplots(figsize=(8, 6),
                           subplot_kw={'projection': ccrs.PlateCarree()})

    # Select data for current year
    data = ds[var].sel(Year=year)

    # Plot the data
    im = ax.pcolormesh(ds['longitude'], ds['latitude'], data,
                       cmap=cmap, vmin=vmin, vmax=vmax,
                       shading='auto', transform=data_crs)

    # Add features (land, ocean, borders)
    ax.coastlines(resolution='10m')
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.LAND, facecolor='lightgray')
    ax.add_feature(cfeature.OCEAN, facecolor='lightblue')
    ax.set_extent([-12, 36, 34, 66], crs=data_crs)  # Adjust to the working area

    # Gridlines (bottom and left only)
    gl = ax.gridlines(draw_labels=True, linewidth=0)
    gl.top_labels = False
    gl.right_labels = False
    gl.bottom_labels = True
    gl.left_labels = True

    # Title and colorbar
    ax.set_title(f'{title} - {year}', fontsize=12)

    # Create a colorbar with more space
    cbar = fig.colorbar(im, ax=ax, orientation='horizontal', label='µg/m³', shrink=0.7, pad=0.1)
    
    # Adjust the colorbar's position
    cbar.ax.yaxis.set_ticks_position('right')  # Put colorbar ticks on the right side

    # Save the plot
    plt.tight_layout()
    plt.savefig(os.path.join(output_folder, f"mean_concentration_o3_{year}.png"), dpi=150)
    plt.close(fig)

print("✅ Mean temperature maps with adjusted colorbar spacing generated!")


✅ Mean temperature maps with adjusted colorbar spacing generated!


Monthly

In [4]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\CAMS\chem_multlvl\4conversion_2003_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store monthly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'time': 500})  # Rechunk after loading 
    
    # Extract year and month from the 'time' dimension
    ds.coords['year'] = ds['time'].dt.year
    ds.coords['month'] = ds['time'].dt.month
    
    # Get the total o3 data as a numpy array (time, latitude, longitude)
    o3_data = ds['go3'].values
    
    # Calculate statistics using numpy's nan functions along the 'time' dimension
    mean = np.nanmean(o3_data, axis=0)
    median = np.nanmedian(o3_data, axis=0)
    std = np.nanstd(o3_data, axis=0)
    max_ = np.nanmax(o3_data, axis=0)
    min_ = np.nanmin(o3_data, axis=0)
    total = np.nansum(o3_data, axis=0)
    
    # Create a new xarray Dataset for the statistics
    stats = xr.Dataset({
        'Mean': (['latitude', 'longitude'], mean),
        'Median': (['latitude', 'longitude'], median),
        'Std': (['latitude', 'longitude'], std),
        'Max': (['latitude', 'longitude'], max_),
        'Min': (['latitude', 'longitude'], min_),
        'Total': (['latitude', 'longitude'], total)
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})
    
    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = ds['year'].values[0]  # Add the year directly from time
    stats_df['Month'] = ds['month'].values[0]  # Add the month directly from time
    stats_df = stats_df.set_index(['Year', 'Month', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all months into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)
df_final


                                       Mean       Median            Std  \
Year Month latitude longitude                                             
2003 1     66.0     -12.00    -14558.086272  4655.352484  269357.073633   
                    -11.25      5497.181617  4968.389506   44455.657379   
                    -10.50    -11896.813376  5123.981096  232110.077934   
                    -9.75       6633.807053  5299.292957  210582.342661   
                    -9.00      -2718.978198  5389.346106   62847.177876   
...                                     ...          ...            ...   
2024 11    34.5      33.00      1440.207642  1387.380615     311.632874   
                     33.75      1574.616699  1489.688965     288.717468   
                     34.50      1604.420776  1573.358521     294.306641   
                     35.25      1520.326782  1521.407715     347.673035   
                     36.00       941.721313   966.588074     607.889771   

                        

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Mean,Median,Std,Max,Min,Total
Year,Month,latitude,longitude,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2003,1,66.0,-12.00,-14558.086272,4655.352484,269357.073633,2.930290e+05,-4.181505e+06,-3.610405e+06
2003,1,66.0,-11.25,5497.181617,4968.389506,44455.657379,4.117546e+05,-2.206364e+05,1.363301e+06
2003,1,66.0,-10.50,-11896.813376,5123.981096,232110.077934,4.835737e+05,-2.942663e+06,-2.950410e+06
2003,1,66.0,-9.75,6633.807053,5299.292957,210582.342661,2.830988e+06,-1.245663e+06,1.645184e+06
2003,1,66.0,-9.00,-2718.978198,5389.346106,62847.177876,3.482560e+05,-5.218807e+05,-6.743066e+05
...,...,...,...,...,...,...,...,...,...
2024,11,34.5,33.00,1440.207642,1387.380615,311.632874,2.786466e+03,9.099949e+02,3.456498e+05
2024,11,34.5,33.75,1574.616699,1489.688965,288.717468,2.864270e+03,1.155251e+03,3.779080e+05
2024,11,34.5,34.50,1604.420776,1573.358521,294.306641,2.743536e+03,9.115381e+02,3.850610e+05
2024,11,34.5,35.25,1520.326782,1521.407715,347.673035,2.708905e+03,5.574509e+02,3.648784e+05


In [5]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'Month', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\CAMS\chem_multlvl\monthly_o3_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")

df_final_xr


Dataset saved to: E:\IPMA\CAMS\chem_multlvl\monthly_o3_stats.nc


In [6]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\CAMS\chem_multlvl\monthly_o3_stats.nc"
output_folder = r"E:\IPMA\CAMS\chem_multlvl\maps_month\o3"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Mean'
title = 'Mean Concentration (µg/m³)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Coordinates
lat = ds['latitude']
lon = ds['longitude']

# Loop through all years and months
for year in ds['Year'].values:
    for month in ds['Month'].values:
        data = ds[var].sel(Year=year, Month=month)

        # Skip empty or all-NaN values
        if np.isnan(data).all():
            continue

        # Plotting
        fig = plt.figure(figsize=(7, 6))
        ax = plt.axes(projection=ccrs.PlateCarree())
        ax.set_extent([-12, 36, 34, 66], crs=ccrs.PlateCarree())

        im = ax.pcolormesh(lon, lat, data, cmap=cmap, vmin=vmin, vmax=vmax,
                           shading='auto', transform=ccrs.PlateCarree())

        ax.set_title(f'{title} - {year} / {month:02d}', fontsize=12)
        ax.coastlines(resolution='10m', linewidth=0.8)
        ax.add_feature(cfeature.BORDERS, linewidth=0.5)
        ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='none')

        # Gridlines (bottom and left only)
        gl = ax.gridlines(draw_labels=True, linewidth=0)
        gl.top_labels = False
        gl.right_labels = False
        gl.bottom_labels = True
        gl.left_labels = True

        # Colorbar
        cbar = plt.colorbar(im, ax=ax, orientation='horizontal', shrink=0.7, pad=0.1)
        cbar.set_label('µg/m³')
        cbar.ax.tick_params(labelsize=8)

        # Save figure
        plt.tight_layout()
        filename = f"mean_concentration_o3_{year}{month:02d}.png"
        plt.savefig(os.path.join(output_folder, filename), dpi=150)
        plt.close(fig)

print("✅ Monthly temperature maps generated for all years and months!")


✅ Monthly temperature maps generated for all years and months!


In [7]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\CAMS\chem_multlvl\monthly_o3_stats.nc"
output_folder = r"E:\IPMA\CAMS\chem_multlvl\maps_month\o3"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Mean'
title = 'Mean Concentration (µg/m³)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Coordinates
lat = ds['latitude']
lon = ds['longitude']

# Loop through all years
for year in ds['Year'].values:
    fig, axs = plt.subplots(3, 4, figsize=(15, 10),
                            subplot_kw={'projection': ccrs.PlateCarree()})
    fig.suptitle(f'{title} - {year}', fontsize=16)

    for i, month in enumerate(ds['Month'].values):
        row, col = divmod(i, 4)
        ax = axs[row, col]

        # Extract data
        data = ds[var].sel(Year=year, Month=month)

        # Skip if no data
        if np.isnan(data).all():
            ax.set_visible(False)
            continue

        ax.set_extent([-12, 36, 34, 66], crs=ccrs.PlateCarree())

        im = ax.pcolormesh(lon, lat, data, cmap=cmap, vmin=vmin, vmax=vmax,
                           shading='auto', transform=ccrs.PlateCarree())

        ax.set_title(f'Month {month:02d}', fontsize=10)
        ax.coastlines(resolution='10m', linewidth=0.6)
        ax.add_feature(cfeature.BORDERS, linewidth=0.4)
        ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='none')

        # Gridlines (bottom and left only)
        gl = ax.gridlines(draw_labels=True, linewidth=0)
        gl.top_labels = False
        gl.right_labels = False
        gl.bottom_labels = row == 2
        gl.left_labels = col == 0

    # Add a single colorbar for all subplots
    cbar_ax = fig.add_axes([0.92, 0.25, 0.015, 0.5])
    cbar = fig.colorbar(im, cax=cbar_ax)
    cbar.set_label('µg/m³')

    # Save figure
    plt.tight_layout(rect=[0, 0, 0.9, 0.95])
    filename = f"mean_concentration_o3_year_{year}.png"
    plt.savefig(os.path.join(output_folder, filename), dpi=150)
    plt.close(fig)

print("✅ Yearly temperature grids generated (3x4 months per year)!")


  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])


✅ Yearly temperature grids generated (3x4 months per year)!
