In [None]:
import os
import glob
import xarray as xr
import pandas as pd

def process_era5_file(file_path, output_base_folder, lat_min=49, lat_max=61, lon_min=-10, lon_max=2):
    """
    Process a single ERA5 Zarr file:
    - Reads the dataset.
    - For each variable, converts the data along with latitude, longitude, and time to a CSV.
    - Filters the data to only keep the UK region.
    - Saves the CSV file in a folder named for the year.
    
    Assumes longitudes are in [0, 360].
    """
    # Open the Zarr file
    ds = xr.open_zarr(file_path, consolidated=True)
    
    # Extract the time coordinate (assuming it's a scalar timestamp)
    time_val = pd.to_datetime(ds.time.values.item())
    year_folder = os.path.join(output_base_folder, str(time_val.year))
    os.makedirs(year_folder, exist_ok=True)
    
    # Convert -10° to 350° for [0, 360] longitude range
    lon_min_mod = 350

    for var in ds.data_vars:
        # Convert DataArray to a DataFrame (latitude, longitude, time)
        df = ds[var].to_dataframe().reset_index()
        
        # Filter by latitude
        df = df[(df['latitude'] >= lat_min) & (df['latitude'] <= lat_max)]
        
        # Filter by longitude: keep points where lon >= 350 or lon <= 2
        df = df[((df['longitude'] >= lon_min_mod) | (df['longitude'] <= lon_max))]
        
        # Create a single 'date' column
        if 'time' in df.columns:
            df['date'] = pd.to_datetime(df['time']).dt.date
            df.drop(columns=['time'], inplace=True)
        
        # Name the CSV file using the variable name and date
        csv_filename = f"{var}_{time_val.strftime('%Y%m%d')}.csv"
        csv_path = os.path.join(year_folder, csv_filename)
        
        # Save to CSV
        df.to_csv(csv_path, index=False)
        print(f"Saved {csv_path}")


# --------------------------
# MAIN SCRIPT
# --------------------------

# Path where your .zarr files are stored:
zarr_folder = r"C:\Users\brand\Downloads\ChaosBench\data\era5"
# This pattern should match files like "era5_full_1.5deg_YYYYMMDD.zarr"
zarr_pattern = os.path.join(zarr_folder, "era5_full_1.5deg_*.zarr")

# Output folder for CSVs
output_base_folder = r"C:\Users\brand\OneDrive\Desktop\IC\Climate Change\Climate Chnage FINAL PJ\era5_csv"

# Gather all matching Zarr files
zarr_files = glob.glob(zarr_pattern)
zarr_files.sort()  # optional sorting by filename

# Process each file
for file_path in zarr_files:
    process_era5_file(file_path, output_base_folder)


Saved C:\Users\brand\OneDrive\Desktop\IC\Climate Change\Climate Chnage FINAL PJ\era5_csv\1979\q_19790101.csv
Saved C:\Users\brand\OneDrive\Desktop\IC\Climate Change\Climate Chnage FINAL PJ\era5_csv\1979\t_19790101.csv
Saved C:\Users\brand\OneDrive\Desktop\IC\Climate Change\Climate Chnage FINAL PJ\era5_csv\1979\u_19790101.csv
Saved C:\Users\brand\OneDrive\Desktop\IC\Climate Change\Climate Chnage FINAL PJ\era5_csv\1979\v_19790101.csv
Saved C:\Users\brand\OneDrive\Desktop\IC\Climate Change\Climate Chnage FINAL PJ\era5_csv\1979\w_19790101.csv
Saved C:\Users\brand\OneDrive\Desktop\IC\Climate Change\Climate Chnage FINAL PJ\era5_csv\1979\z_19790101.csv
Saved C:\Users\brand\OneDrive\Desktop\IC\Climate Change\Climate Chnage FINAL PJ\era5_csv\1979\q_19790102.csv
Saved C:\Users\brand\OneDrive\Desktop\IC\Climate Change\Climate Chnage FINAL PJ\era5_csv\1979\t_19790102.csv
Saved C:\Users\brand\OneDrive\Desktop\IC\Climate Change\Climate Chnage FINAL PJ\era5_csv\1979\u_19790102.csv
Saved C:\Users\bran