CAMS

- mean, median, std, min, max for O3 values by year and year-month

- maps for both results


Yearly

In [1]:
import xarray as xr
import pandas as pd
import os
import numpy as np
from collections import defaultdict

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\CAMS\chem_multlvl\5compile_2003_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store yearly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'time': 500})  # Rechunk after loading 

    # Extract the year from the 'time' dimension
    year = ds['time'].dt.year.values[0]  # Extract the year from the first time step
    
    # Get the total o3 data as a numpy array
    o3_data = ds['go3'].values  # Numpy array (all time steps)

    # Compute statistics using numpy's nan functions
    mean = np.nanmean(o3_data, axis=0)
    median = np.nanmedian(o3_data, axis=0)
    std = np.nanstd(o3_data, axis=0)
    max_ = np.nanmax(o3_data, axis=0)
    min_ = np.nanmin(o3_data, axis=0)
    total = np.nansum(o3_data, axis=0)
    
    # Combine results into a new dataset
    stats = xr.Dataset({
        'Mean': (['latitude', 'longitude'], mean),
        'Median': (['latitude', 'longitude'], median),
        'Std': (['latitude', 'longitude'], std),
        'Max': (['latitude', 'longitude'], max_),
        'Min': (['latitude', 'longitude'], min_),
        'Total': (['latitude', 'longitude'], total)
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = year  # Add the year directly from time
    stats_df = stats_df.set_index(['Year', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all years into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)


                               Mean      Median        Std         Max  \
Year latitude longitude                                                  
2003 66.0     -12.00      68.674162   68.553379  12.907445  114.969981   
              -11.25      68.844949   68.555003  12.684363  113.666987   
              -10.50      69.057336   68.581005  12.513893  115.246693   
              -9.75       69.357552   68.835296  12.383167  116.911189   
              -9.00       69.673903   69.122738  12.282093  117.976491   
...                             ...         ...        ...         ...   
2024 34.5      33.00     100.239624  100.464249  13.039536  156.328644   
               33.75     101.207298  100.074951  11.074243  149.119873   
               34.50     101.405594  100.213654  11.764178  165.021500   
               35.25      96.709023   95.380074  14.481147  170.710251   
               36.00      75.126717   82.216850  37.390919  175.559692   

                               Min   

In [2]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\CAMS\chem_multlvl\yearly_o3_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")


Dataset saved to: E:\IPMA\CAMS\chem_multlvl\yearly_o3_stats.nc


In [10]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\CAMS\chem_multlvl\yearly_o3_stats.nc"
output_folder = r"E:\IPMA\CAMS\chem_multlvl\maps_year\o3"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Mean'
title = 'Mean Concentration O3 (µg/m³)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Projection (assume data is in PlateCarree, common for lat/lon grids)
data_crs = ccrs.PlateCarree()

# Loop through years
for year in ds['Year'].values:
    fig, ax = plt.subplots(figsize=(8, 6),
                           subplot_kw={'projection': ccrs.PlateCarree()})

    # Select data for current year
    data = ds[var].sel(Year=year)

    # Plot the data
    im = ax.pcolormesh(ds['longitude'], ds['latitude'], data,
                       cmap=cmap, vmin=vmin, vmax=vmax,
                       shading='auto', transform=data_crs)

    # Add features (land, ocean, borders)
    ax.coastlines(resolution='10m')
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.LAND, facecolor='lightgray')
    ax.add_feature(cfeature.OCEAN, facecolor='lightblue')
    ax.set_extent([-12, 36, 34, 66], crs=data_crs)  # Adjust to the working area

    # Gridlines (bottom and left only)
    gl = ax.gridlines(draw_labels=True, linewidth=0)
    gl.top_labels = False
    gl.right_labels = False
    gl.bottom_labels = True
    gl.left_labels = True

    # Title and colorbar
    ax.set_title(f'{title} - {year}', fontsize=12)

    # Create a colorbar with more space
    cbar = fig.colorbar(im, ax=ax, orientation='horizontal', label='µg/m³', shrink=0.7, pad=0.1)
    
    # Adjust the colorbar's position
    cbar.ax.yaxis.set_ticks_position('right')  # Put colorbar ticks on the right side

    # Save the plot
    plt.tight_layout()
    plt.savefig(os.path.join(output_folder, f"mean_concentration_o3_{year}.png"), dpi=150)
    plt.close(fig)

print("✅ Mean temperature maps with adjusted colorbar spacing generated!")


✅ Mean temperature maps with adjusted colorbar spacing generated!


Monthly

In [4]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\CAMS\chem_multlvl\4conversion_2003_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store monthly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'time': 500})  # Rechunk after loading 
    
    # Extract year and month from the 'time' dimension
    ds.coords['year'] = ds['time'].dt.year
    ds.coords['month'] = ds['time'].dt.month
    
    # Get the total o3 data as a numpy array (time, latitude, longitude)
    o3_data = ds['go3'].values
    
    # Calculate statistics using numpy's nan functions along the 'time' dimension
    mean = np.nanmean(o3_data, axis=0)
    median = np.nanmedian(o3_data, axis=0)
    std = np.nanstd(o3_data, axis=0)
    max_ = np.nanmax(o3_data, axis=0)
    min_ = np.nanmin(o3_data, axis=0)
    total = np.nansum(o3_data, axis=0)
    
    # Create a new xarray Dataset for the statistics
    stats = xr.Dataset({
        'Mean': (['latitude', 'longitude'], mean),
        'Median': (['latitude', 'longitude'], median),
        'Std': (['latitude', 'longitude'], std),
        'Max': (['latitude', 'longitude'], max_),
        'Min': (['latitude', 'longitude'], min_),
        'Total': (['latitude', 'longitude'], total)
    }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})
    
    # Convert to DataFrame and restructure
    stats_df = stats.to_dataframe().reset_index()
    stats_df['Year'] = ds['year'].values[0]  # Add the year directly from time
    stats_df['Month'] = ds['month'].values[0]  # Add the month directly from time
    stats_df = stats_df.set_index(['Year', 'Month', 'latitude', 'longitude'])
    
    # Append to list
    df_list.append(stats_df)

# Concatenate all months into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)
df_final


                                    Mean     Median        Std         Max  \
Year Month latitude longitude                                                
2003 1     66.0     -12.00     75.236145  75.775327   8.782744   94.519331   
                    -11.25     75.552674  76.007124   8.550499   94.520955   
                    -10.50     75.894075  76.164500   8.252259   94.731257   
                    -9.75      76.241089  76.356457   7.886537   95.124397   
                    -9.00      76.476207  76.534825   7.630156   95.402252   
...                                  ...        ...        ...         ...   
2024 11    34.5      33.00     91.994461  93.310432  12.167921  119.380760   
                     33.75     99.085503  98.508888   7.897540  125.112083   
                     34.50     99.188461  97.974762   9.205626  131.790710   
                     35.25     92.865158  92.894096  12.291994  133.288147   
                     36.00     55.354137  59.655468  33.475647  

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Mean,Median,Std,Max,Min,Total
Year,Month,latitude,longitude,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2003,1,66.0,-12.00,75.236145,75.775327,8.782744,94.519331,50.609932,18658.564017
2003,1,66.0,-11.25,75.552674,76.007124,8.550499,94.520955,53.126433,18737.063171
2003,1,66.0,-10.50,75.894075,76.164500,8.252259,94.731257,55.474440,18821.730706
2003,1,66.0,-9.75,76.241089,76.356457,7.886537,95.124397,60.049833,18907.790081
2003,1,66.0,-9.00,76.476207,76.534825,7.630156,95.402252,60.577261,18966.099442
...,...,...,...,...,...,...,...,...,...
2024,11,34.5,33.00,91.994461,93.310432,12.167921,119.380760,62.600586,22078.669922
2024,11,34.5,33.75,99.085503,98.508888,7.897540,125.112083,79.010674,23780.521484
2024,11,34.5,34.50,99.188461,97.974762,9.205626,131.790710,71.100456,23805.230469
2024,11,34.5,35.25,92.865158,92.894096,12.291994,133.288147,44.531418,22287.638672


In [5]:
# Convert the pandas DataFrame back to an xarray Dataset
df_final_xr = df_final.reset_index().set_index(['Year', 'Month', 'latitude', 'longitude'])
df_final_xr = df_final_xr.to_xarray()

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\CAMS\chem_multlvl\monthly_o3_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")

df_final_xr


Dataset saved to: E:\IPMA\CAMS\chem_multlvl\monthly_o3_stats.nc


In [9]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\CAMS\chem_multlvl\monthly_o3_stats.nc"
output_folder = r"E:\IPMA\CAMS\chem_multlvl\maps_month\o3"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Mean'
title = 'Mean Concentration O3 (µg/m³)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Coordinates
lat = ds['latitude']
lon = ds['longitude']

# Loop through all years and months
for year in ds['Year'].values:
    for month in ds['Month'].values:
        data = ds[var].sel(Year=year, Month=month)

        # Skip empty or all-NaN values
        if np.isnan(data).all():
            continue

        # Plotting
        fig = plt.figure(figsize=(7, 6))
        ax = plt.axes(projection=ccrs.PlateCarree())
        ax.set_extent([-12, 36, 34, 66], crs=ccrs.PlateCarree())

        im = ax.pcolormesh(lon, lat, data, cmap=cmap, vmin=vmin, vmax=vmax,
                           shading='auto', transform=ccrs.PlateCarree())

        ax.set_title(f'{title} - {year} / {month:02d}', fontsize=12)
        ax.coastlines(resolution='10m', linewidth=0.8)
        ax.add_feature(cfeature.BORDERS, linewidth=0.5)
        ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='none')

        # Gridlines (bottom and left only)
        gl = ax.gridlines(draw_labels=True, linewidth=0)
        gl.top_labels = False
        gl.right_labels = False
        gl.bottom_labels = True
        gl.left_labels = True

        # Colorbar
        cbar = plt.colorbar(im, ax=ax, orientation='horizontal', shrink=0.7, pad=0.1)
        cbar.set_label('µg/m³')
        cbar.ax.tick_params(labelsize=8)

        # Save figure
        plt.tight_layout()
        filename = f"mean_concentration_o3_{year}{month:02d}.png"
        plt.savefig(os.path.join(output_folder, filename), dpi=150)
        plt.close(fig)

print("✅ Monthly temperature maps generated for all years and months!")


✅ Monthly temperature maps generated for all years and months!


In [8]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import cartopy.crs as ccrs
import cartopy.feature as cfeature

# File path
file_path = r"E:\IPMA\CAMS\chem_multlvl\monthly_o3_stats.nc"
output_folder = r"E:\IPMA\CAMS\chem_multlvl\maps_month\o3"
os.makedirs(output_folder, exist_ok=True)

# Load dataset
ds = xr.open_dataset(file_path)

# Variable to plot
var = 'Mean'
title = 'Mean Concentration O3 (µg/m³)'

# Global color scale limits
vmin = ds[var].min().item()
vmax = ds[var].max().item()

# Colormap
cmap = 'Blues'

# Coordinates
lat = ds['latitude']
lon = ds['longitude']

# Loop through all years
for year in ds['Year'].values:
    fig, axs = plt.subplots(3, 4, figsize=(15, 10),
                            subplot_kw={'projection': ccrs.PlateCarree()})
    fig.suptitle(f'{title} - {year}', fontsize=16)

    for i, month in enumerate(ds['Month'].values):
        row, col = divmod(i, 4)
        ax = axs[row, col]

        # Extract data
        data = ds[var].sel(Year=year, Month=month)

        # Skip if no data
        if np.isnan(data).all():
            ax.set_visible(False)
            continue

        ax.set_extent([-12, 36, 34, 66], crs=ccrs.PlateCarree())

        im = ax.pcolormesh(lon, lat, data, cmap=cmap, vmin=vmin, vmax=vmax,
                           shading='auto', transform=ccrs.PlateCarree())

        ax.set_title(f'Month {month:02d}', fontsize=10)
        ax.coastlines(resolution='10m', linewidth=0.6)
        ax.add_feature(cfeature.BORDERS, linewidth=0.4)
        ax.add_feature(cfeature.LAND, edgecolor='black', facecolor='none')

        # Gridlines (bottom and left only)
        gl = ax.gridlines(draw_labels=True, linewidth=0)
        gl.top_labels = False
        gl.right_labels = False
        gl.bottom_labels = row == 2
        gl.left_labels = col == 0

    # Add a single colorbar for all subplots
    cbar_ax = fig.add_axes([0.92, 0.25, 0.015, 0.5])
    cbar = fig.colorbar(im, cax=cbar_ax)
    cbar.set_label('µg/m³')

    # Save figure
    plt.tight_layout(rect=[0, 0, 0.9, 0.95])
    filename = f"mean_concentration_o3_year_{year}.png"
    plt.savefig(os.path.join(output_folder, filename), dpi=150)
    plt.close(fig)

print("✅ Yearly temperature grids generated (3x4 months per year)!")


  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
  plt.tight_layout(rect=[0, 0, 0.9, 0.95])


✅ Yearly temperature grids generated (3x4 months per year)!


Daily

In [1]:
import xarray as xr
import pandas as pd
import os
import numpy as np

# Define the folder containing NetCDF files
folder_path = r"E:\IPMA\CAMS\chem_multlvl\4conversion_2003_2024"

# Get a sorted list of all NetCDF files in the folder
file_list = sorted([f for f in os.listdir(folder_path) if f.endswith(".nc")])

# Initialize an empty list to store monthly DataFrames
df_list = []

# Process each file (each corresponding to a year)
for file in file_list:
    file_path = os.path.join(folder_path, file)
    
    ds = xr.open_dataset(file_path)  # Load without specifying chunks
    ds = ds.chunk({'time': 500})  # Rechunk after loading 

    # Group by day
    daily_grouped = ds.groupby('time.date')  # Group by each unique date
    
    # Calculate statistics using numpy's nan functions along the 'time' dimension
    for date, daily_data in daily_grouped:
        o3_data = daily_data['go3'].values
        
        mean = np.nanmean(o3_data, axis=0)
        median = np.nanmedian(o3_data, axis=0)
        std = np.nanstd(o3_data, axis=0)
        max_ = np.nanmax(o3_data, axis=0)
        min_ = np.nanmin(o3_data, axis=0)
        total = np.nansum(o3_data, axis=0)
    
        # Create a new xarray Dataset for the statistics
        stats = xr.Dataset({
            'Mean': (['latitude', 'longitude'], mean),
            'Median': (['latitude', 'longitude'], median),
            'Std': (['latitude', 'longitude'], std),
            'Max': (['latitude', 'longitude'], max_),
            'Min': (['latitude', 'longitude'], min_),
            'Total': (['latitude', 'longitude'], total)
        }, coords={'latitude': ds['latitude'], 'longitude': ds['longitude']})

        # Convert to DataFrame and restructure
        stats_df = stats.to_dataframe().reset_index()
        date_pd = pd.to_datetime(str(date))
        stats_df['Year'] = date_pd.year
        stats_df['Month'] = date_pd.month
        stats_df['Day'] = date_pd.day
        stats_df = stats_df.set_index(['Year', 'Month', 'Day', 'latitude', 'longitude'])
    
        # Append to list
        df_list.append(stats_df)
        
# Concatenate all months into a single DataFrame
df_final = pd.concat(df_list)

# Display the DataFrame
print(df_final)
df_final


                                         Mean      Median        Std  \
Year Month Day latitude longitude                                      
2003 1     1   66.0     -12.00      81.386744   81.651369   3.032321   
                        -11.25      80.857696   80.673358   3.362990   
                        -10.50      80.605792   80.528010   3.406749   
                        -9.75       80.482895   79.764018   3.270355   
                        -9.00       80.314404   79.264423   3.259029   
...                                       ...         ...        ...   
2024 11    30  34.5      33.00      89.997475   91.014740  12.772193   
                         33.75     102.739861  103.910065   5.248046   
                         34.50     102.956787  103.607613   6.041886   
                         35.25      98.439049   99.009918   8.149194   
                         36.00      54.661221   44.336761  27.460127   

                                          Max        Min       

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Mean,Median,Std,Max,Min,Total
Year,Month,Day,latitude,longitude,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2003,1,1,66.0,-12.00,81.386744,81.651369,3.032321,85.399057,77.353788,651.093953
2003,1,1,66.0,-11.25,80.857696,80.673358,3.362990,85.367410,76.284932,646.861570
2003,1,1,66.0,-10.50,80.605792,80.528010,3.406749,85.113089,76.190062,644.846339
2003,1,1,66.0,-9.75,80.482895,79.764018,3.270355,84.998933,75.872866,643.863164
2003,1,1,66.0,-9.00,80.314404,79.264423,3.259029,85.045502,75.547815,642.515234
...,...,...,...,...,...,...,...,...,...,...
2024,11,30,34.5,33.00,89.997475,91.014740,12.772193,106.637863,70.601181,719.979797
2024,11,30,34.5,33.75,102.739861,103.910065,5.248046,108.799355,94.750755,821.918884
2024,11,30,34.5,34.50,102.956787,103.607613,6.041886,110.423782,93.651962,823.654297
2024,11,30,34.5,35.25,98.439049,99.009918,8.149194,109.853424,85.014359,787.512390


In [2]:
# Convert the pandas DataFrame to xarray directly
df_final_xr = xr.Dataset.from_dataframe(df_final)

# Save the xarray Dataset to a NetCDF file
output_file_path = r"E:\IPMA\CAMS\chem_multlvl\daily_o3_stats.nc"
df_final_xr.to_netcdf(output_file_path)

print(f"Dataset saved to: {output_file_path}")

df_final_xr

Dataset saved to: E:\IPMA\CAMS\chem_multlvl\daily_o3_stats.nc
