In [7]:
import os
import pandas as pd
import datetime
import cdsapi
from glob import glob
import xarray as xr

# create .cdsapirc file
home_dir = os.path.expanduser("~")
cdsapirc_path = os.path.join(home_dir, ".cdsapirc")

api_content = """url: https://cds.climate.copernicus.eu/api/v2
key: 310677:f60edf5d-2ce1-4676-8873-1f76498caedd"""

with open(cdsapirc_path, "w") as file:
    file.write(api_content)

print(f"Created {cdsapirc_path}")

# create CDS API client
c = cdsapi.Client()

# check if the file is already present
def is_stub_already_present(dest_folder, stub):
    stubs = [x.split('/')[-1] for x in glob(dest_folder + "*.nc")]
    if stub in stubs:
        print(f"Present: {stub}")
        return True
    return False

# download ERA5 data
def fetch_era5_data(name, nbox, year, month, filepath):
    days_in_month = pd.date_range(f'{year}-{month:02d}-01', periods=1, freq='M').days_in_month[0]
    days = [f'{day:02d}' for day in range(1, days_in_month + 1)]
    
    c.retrieve(
        'reanalysis-era5-single-levels',
        {
            'product_type': 'reanalysis',
            'format': 'netcdf',
            'variable': [
                '10m_u_component_of_wind', '10m_v_component_of_wind',
                '2m_temperature', 'surface_pressure'
            ],
            'year': str(year),
            'month': f'{month:02d}',
            'day': days,
            'time': [
                '00:00', '01:00', '02:00',
                '03:00', '04:00', '05:00',
                '06:00', '07:00', '08:00',
                '09:00', '10:00', '11:00',
                '12:00', '13:00', '14:00',
                '15:00', '16:00', '17:00',
                '18:00', '19:00', '20:00',
                '21:00', '22:00', '23:00',
            ],
            'area': nbox,
        },
        filepath
    )

def download_era5(name, nbox, abbv, year, month, output_main_dir):
    """
    Download ERA5 data for a specific month and year
    
    Agrs:
    - name (str): Name of the cyclone
    - nbox (list): List of coordinates for the bounding box
    - abbv (str): Abbreviation for the bounding box
    - year (int): Year
    - month (int): Month
    - output_main_dir (str): Output main directory
    
    Returns:
    None
    """
    dest_folder = f'{output_main_dir}/{name}/'
    os.makedirs(dest_folder, exist_ok=True)
    
    file_name = f'ERA5_{abbv.upper()}_{year}{month:02d}.nc'
    filepath = os.path.join(dest_folder, file_name)

    print(f'[{name}] - {year}-{month:02d} - Downloading file ... ')
    if not is_stub_already_present(dest_folder, file_name):
        fetch_era5_data(name, nbox, year, month, filepath)
    print(f'[{name}] - {year}-{month:02d} - Downloaded.')

# merge netcdf files
def merge_nc_files(dest_folder, output_file):
    """
    Merge all netcdf files in a folder into a single file
    
    Args:
    - dest_folder (str): Destination folder
    - output_file (str): Output file
        
    Returns:
    None
    """
    nc_files = sorted(glob(dest_folder + "*.nc"))
    datasets = [xr.open_dataset(nc) for nc in nc_files]
    combined = xr.concat(datasets, dim='time')
    combined.to_netcdf(output_file)
    print(f'Merged files into {output_file}')
    for ds in datasets:
        ds.close()


def get_nbox_from_wbox(wbox):
    """
    Get the north box from the west box
    
    Args:
    - wbox (list): List of coordinates for the west box
    
    Returns:
    - nbox (list): List of coordinates for the north box
    """
    west_lon, south_lat, east_lon, north_lat = wbox
    nbox = [north_lat, west_lon, south_lat, east_lon]
    return nbox

# set the bounding box
# wbox = (116.4737, 26.2304, 126.4737, 36.2304) # Shanghai
wbox = (116.0794, 18.9037, 126.0794, 28.9037) # Taiwan
nbox = get_nbox_from_wbox(wbox)
abbrev = "FIXED_REGION"

# read the list of cyclones
cyclones_path = "./list_of_typhoons_taiwan.xlsx"
df = pd.read_excel(cyclones_path)
df = df.dropna()
print(df)

# create the output directory
output_main_dir = "./ERA5_single_level_taiwan"
os.makedirs(output_main_dir, exist_ok=True)


def process_downloads(df, output_main_dir):
    """
    Process the downloads for all cyclones
    
    Args:
    - df (pd.DataFrame): DataFrame containing the list of cyclones
    - output_main_dir (str): Output main directory
    
    Returns:
    None
    """
    for idx in range(len(df)):
        row = df.iloc[idx]
        name = str(row["Typhoon number"])
        start_date = datetime.datetime.strptime(row["Form Date"], "%d-%m-%Y")
        end_date = datetime.datetime.strptime(row["Dissipated Date"], "%d-%m-%Y")

        # download data for each month
        current_date = start_date
        while current_date <= end_date:
            year = current_date.year
            month = current_date.month

            download_era5(name, nbox, abbrev, year, month, output_main_dir)

            # move to the next month
            current_date += datetime.timedelta(days=31)
            current_date = current_date.replace(day=1)

        # merge all files
        dest_folder = f'{output_main_dir}/{name}/'
        output_file = f'{output_main_dir}/ERA5_{abbrev.upper()}_{name}.nc'
        merge_nc_files(dest_folder, output_file)

        print(f'{name} - All downloads are finished and files are merged.')

        with open(os.path.join(output_main_dir, "ERA5_COMPLETE.txt"), "a+") as file:
            file.write(f"{name}\t{datetime.datetime.now()}\n")

        subject = f"[COMPLETED] Download - Cyclone {name}"
        message_txt = f"Download Completed"
        # send_txt_email(message_txt, subject) 

# process downloads
process_downloads(df, output_main_dir)


2024-07-09 18:13:42,149 INFO Welcome to the CDS
2024-07-09 18:13:42,150 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2024-07-09 18:13:42,219 INFO Request is queued
2024-07-09 18:13:55,668 INFO Request is running
2024-07-09 18:18:00,665 INFO Request is completed
2024-07-09 18:18:00,688 INFO Downloading https://download-0003-clone.copernicus-climate.eu/cache-compute-0003/cache/data6/adaptor.mars.internal-1720545406.3972588-4101-4-50c8899b-dd29-4b76-9180-b7827fac36fc.nc to ./ERA5_single_level_taiwan/198209/ERA5_FIXED_REGION_198207.nc (9.5M)
2024-07-09 18:18:01,830 INFO Download rate 8.4M/s   
2024-07-09 18:18:01,943 INFO Welcome to the CDS
2024-07-09 18:18:01,944 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/reanalysis-era5-single-levels
2024-07-09 18:18:01,996 INFO Request is queued
2024-07-09 18:18:06,897 INFO Request is running
2024-07-09 18:22:20,332 INFO Request is completed
2024-07-09 18:22:20,3