In [1]:
from datetime import datetime, timedelta
import cdsapi

c = cdsapi.Client()

# Assuming 'c' is your Copernicus client

# Get the current date
now_date = datetime.utcnow()

# Calculate the date 5 days ago from now
five_days_ago = now_date - timedelta(days=5)

# Calculate the start and end date for the last week from five_days_ago
start_date = five_days_ago - timedelta(weeks=1)
end_date = five_days_ago

# Generate the list of dates for the last week from five_days_ago
dates = [start_date + timedelta(days=x) for x in range((end_date - start_date).days + 1)]

# Determine the year and month for each date
dates_by_month = {}
for date in dates:
    year_month = (date.year, date.month)
    if year_month not in dates_by_month:
        dates_by_month[year_month] = []
    dates_by_month[year_month].append(date.strftime("%d"))

# Print debug info
for (year, month), days in dates_by_month.items():
    print(f"Requesting data for year: {year}, month: {month}, days: {days}")

# Make requests for each month
for (year, month), days in dates_by_month.items():
    c.retrieve(
        'reanalysis-era5-pressure-levels',
        {
            'product_type': 'reanalysis',
            'format': 'netcdf',
            'variable': [
                'specific_humidity', 'temperature', 'u_component_of_wind',
                'v_component_of_wind', 'vertical_velocity',
            ],
            'pressure_level': '850',
            'year': year,
            'month': month,
            'day': days,
            'time': [
                '00:00', '01:00', '02:00',
                '03:00', '04:00', '05:00',
                '06:00', '07:00', '08:00',
                '09:00', '10:00', '11:00',
                '12:00', '13:00', '14:00',
                '15:00', '16:00', '17:00',
                '18:00', '19:00', '20:00',
                '21:00', '22:00', '23:00',
            ],
            'area': [
                -26.863281, 15.556641, -35.13787,
                33.266602,
            ]
        },
        f'data/last_week_data_{year}_{month}.nc'
    )


Requesting data for year: 2024, month: 9, days: ['06', '07', '08', '09', '10', '11', '12', '13']


2024-09-18 08:26:00,781 INFO Welcome to the CDS.
As per our announcements on the Forum, this instance of CDS will be decommissioned on 26 September 2024 and will no longer be accessible from this date onwards.
Please update your cdsapi package to a version >=0.7.2, create an account on CDS-Beta and update your .cdsapirc file. We strongly recommend users to check our Guidelines at https://confluence.ecmwf.int/x/uINmFw
2024-09-18 08:26:00,783 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-pressure-levels
2024-09-18 08:26:01,060 INFO Request is queued


In [2]:
import xarray as xr

# Open the three NetCDF files as xarray datasets
ds1 = xr.open_dataset("data_850/2018_850_SA.nc")
ds2 = xr.open_dataset("data_850/2019_850_SA.nc")
ds3 = xr.open_dataset("data_850/2020to2022.nc")
# ds4 = xr.open_dataset("data_850/2021_850_SA.nc")
# ds5 = xr.open_dataset("data_850/2022_850_SA.nc")


ds1.load()
ds2.load()
ds3.load()
# ds4.load()
# ds5.load()

# Combine the datasets using xarray's `merge` (if merging by variable) or `concat` (if appending along a dimension)
# For merging:
combined_ds = xr.merge([ds1, ds2, ds3])  # replace the list with the relevant datasets , ds3, ds4, ds5

# Or for concatenating along a dimension, use concat:
# combined_ds = xr.concat([ds1, ds2, ds3], dim='time')  # replace 'time' with the relevant dimension

# Save the combined dataset to a new NetCDF file
# combined_ds.to_netcdf("2020to2024.nc")


KeyboardInterrupt: 

In [9]:
ds = xr.open_dataset("2020to2024.nc")
ds

In [14]:
def subset_data(dataset, coarsen = 1):
    if coarsen > 1:
        lat_slice = slice(1, 33, coarsen)
        lon_slice = slice(3, 67, coarsen)
    else:
        lat_slice = slice(1, 33)  
        lon_slice = slice(3, 67)

    dataset = dataset.isel(latitude=lat_slice, longitude=lon_slice)

    return dataset

ds_coarsened = subset_data(ds, coarsen=2)


In [None]:
combined_ds.to_netcdf("2018to2022.nc")
ds_coarsened.to_netcdf("2018to2022_coarsened.nc")