In [None]:
from pydap.client import open_url
from datetime import datetime
import numpy as np
import pandas as pd
from datetime import timedelta
import xarray as xr
from scipy.interpolate import LinearNDInterpolator
from datetime import date
import os
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
def get_era_5(ds):
    era_5_df = ds.to_dataframe()
    era_5_df = era_5_df.dropna()
    
    index_long = era_5_df.index.levels[0]
    index_lat = era_5_df.index.levels[1]
    index_time = era_5_df.index.levels[2]

    start_time = pd.Timestamp(1992, 10, 5)
    index_time_fixed = []
    for tid in index_time:
        hours = tid - start_time
        index_time_fixed.append(hours.days * 24 + hours.seconds / 3600)

    lon, lat, time = np.meshgrid(index_long, index_lat, index_time_fixed, indexing='ij')

    lon = lon.flatten()
    lat = lat.flatten()
    time = time.flatten()
    
    df = pd.DataFrame({'sp_lon': lon, 'sp_lat': lat, 'hours_since_ref': time,
                         'u10': era_5_df["u10"].to_numpy(),
                         'v10': era_5_df["v10"].to_numpy()})
    df['sp_lon'] = np.where(df['sp_lon'] < 0, df['sp_lon'] + 360, df['sp_lon'])
    return df

In [None]:
import cdsapi
import xarray as xr
from urllib.request import urlopen
warnings.filterwarnings('ignore')
c = cdsapi.Client()
# dataset to read
dataset = 'reanalysis-era5-single-levels'
years = ['2017', '2018', '2019','2020', '2021', '2022']
for i in tqdm(range(len(years))):
    if years[i] == '2017':
        months = ['03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ]
    elif years[i] == '2022':
        months = ['01']
    else:
        months = ['01','02','03',
            '04', '05', '06',
            '07', '08', '09',
            '10', '11', '12',
        ]
        
    for month in months:
        params = {'product_type': 'reanalysis',
                'variable': ['10m_u_component_of_wind', '10m_v_component_of_wind',],
                'year': years[i],
                'month': month,
                'day': [
                    '01', '02', '03',
                    '04', '05', '06',
                    '07', '08', '09',
                    '10', '11', '12',
                    '13', '14', '15',
                    '16', '17', '18',
                    '19', '20', '21',
                    '22', '23', '24',
                    '25', '26', '27',
                    '28', '29', '30',
                    '31',
                ],
                'time': [
                    '00:00', '01:00', '02:00',
                    '03:00', '04:00', '05:00',
                    '06:00', '07:00', '08:00',
                    '09:00', '10:00', '11:00',
                    '12:00', '13:00', '14:00',
                    '15:00', '16:00', '17:00',
                    '18:00', '19:00', '20:00',
                    '21:00', '22:00', '23:00',
                ],
                'area': [38, -180, -38,180,],
                'format': 'netcdf',
            }
        # retrieves the path to the file
        fl = c.retrieve(dataset, params)
        with urlopen(fl.location) as f:
            ds = xr.open_dataset(f.read())
            df = get_era_5(ds)
            df.to_csv("era_5/" + years[i] + '_' + month + ".csv" ,index=False)
            df = pd.DataFrame()