In [5]:
from ecmwf.opendata import Client
import pdbufr
from datetime import datetime, timedelta
import os

In [3]:
start_date = datetime.now()
print(start_date)

2023-09-12 11:24:59.578245


In [6]:
client = Client(source="azure")
# client = Client("https://ai4edataeuwest.blob.core.windows.net/ecmwf", beta=True)
client.retrieve(
    date=int(start_date.strftime("%Y%m%d")),
    time=0,
    stream="enfo",
    type="tf",
    step=240,
    target=f"track_data/{start_date.strftime('%Y%m%d')}.bufr",
);

20230912000000-240h-enfo-tf.bufr:   0%|          | 0.00/568k [00:00<?, ?B/s]

In [7]:
df_storms = pdbufr.read_bufr(f"track_data/{start_date.strftime('%Y%m%d')}.bufr",
        columns=("stormIdentifier", "longStormName", "ensembleMemberNumber", "year", "month", "day", "hour", "latitude", "longitude", "pressureReducedToMeanSeaLevel"))

In [8]:
df_storms.head()

Unnamed: 0,stormIdentifier,longStormName,ensembleMemberNumber,year,month,day,hour,latitude,longitude,pressureReducedToMeanSeaLevel
0,13L,LEE,1,2023,9,12,0,23.7,-64.5,93400.0
1,13L,LEE,1,2023,9,12,0,23.9,-65.2,93600.0
2,13L,LEE,1,2023,9,12,0,24.0,-65.7,93300.0
3,13L,LEE,1,2023,9,12,0,24.4,-65.9,93700.0
4,13L,LEE,1,2023,9,12,0,24.7,-66.2,93100.0


In [16]:
def download_tracks_forecast(start_date):
    """
    Downloads the forecast of the tropical cyclone tracks from ECMWF's open data dataset azure
    and saves them in a bufr file "data/tracks/{date of the forecast}.bufr".
    If today's forecast is not available yet, it downloads yesterday's forecast.
    
    start_date: datetime object
        Day of the forecast of which the data want to be downloaded

    Returns:
    start_date: datetime object
        Day of the forecast of which the data have been downloaded
    """
    # Check if the file already exists
    if os.path.exists(f"track_data/{start_date.strftime('%Y%m%d')}.bufr"):
        return start_date
    else:
        client = Client(source="azure")
        try:
            client.retrieve(
                date=int(start_date.strftime("%Y%m%d")),
                time=0,
                stream="enfo",
                type="tf",
                step=240,
                target=f"track_data/{start_date.strftime('%Y%m%d')}.bufr",
            );
            return start_date
        # Usually early in the morning the forecast of the current day is not available
        except:
            start_date = start_date - timedelta(days=1)
            client.retrieve(
                date=int(start_date.strftime("%Y%m%d")),
                time=0,
                stream="enfo",
                type="tf",
                step=240,
                target=f"track_data/{start_date.strftime('%Y%m%d')}.bufr",
            );
            return start_date

def create_storms_df(start_date):
    """
    Creates a dataframe containing data of the cyclone tracks starting from the forecast file
    "data/tc_track_data_{date of the forecast}.bufr".

    start_date: datetime object
        Day of the forecast of the data selected by user
    
    Returns:
    df_storms: pandas DataFrame
        Dataframe containing the forecast data for the active cyclones.
    """
    # Load cyclone dataframe with Mean sea level pressure value
    # df_storms = pdbufr.read_bufr('data/tc_test_track_data.bufr',
    df_storms = pdbufr.read_bufr(f"track_data/{start_date.strftime('%Y%m%d')}.bufr",
        columns=("stormIdentifier", "longStormName", "ensembleMemberNumber", "year", "month", "day", "hour", "latitude", "longitude",
                 "pressureReducedToMeanSeaLevel"))
    # Load cyclone dataframe with Wind speed at 10m value
    # df1 = pdbufr.read_bufr('data/tc_test_track_data.bufr',
    df1 = pdbufr.read_bufr(f"track_data/{start_date.strftime('%Y%m%d')}.bufr",
        columns=("stormIdentifier", "longStormName", "ensembleMemberNumber", "latitude", "longitude",
                 "windSpeedAt10M"))
    # Build the dataframe with the timeperiod column
    timeperiod = []
    start_date = datetime(df_storms.year[0], df_storms.month[0], df_storms.day[0], df_storms.hour[0])
    for cyclone in df_storms.stormIdentifier.unique():
        df_cyclone = df_storms[df_storms.stormIdentifier == cyclone]
        df_cyclone.reset_index(inplace=True, drop=True)
        members = df_cyclone.ensembleMemberNumber.unique()
        for member in members:
            df_track = df_cyclone[df_cyclone.ensembleMemberNumber == member]
            for i in range(len(df_track)):
                timeperiod.append(6 * (i+1))
    # Add the Wind speed at 10m column adn the timePeriod column to the storms dataframe 
    df_storms["windSpeedAt10M"] = df1.windSpeedAt10M
    df_storms["timePeriod"] = timeperiod
    # Storms with number higher than 10 are not real storms (according to what Fernando said)
    drop_condition = df_storms.stormIdentifier < '70'
    df_storms = df_storms[drop_condition]
    return df_storms

In [17]:
download_tracks_forecast(start_date)
df_storms = create_storms_df(start_date)

In [18]:
df_storms.head()

Unnamed: 0,stormIdentifier,longStormName,ensembleMemberNumber,year,month,day,hour,latitude,longitude,pressureReducedToMeanSeaLevel,windSpeedAt10M,timePeriod
0,13L,LEE,1,2023,9,12,0,23.7,-64.5,93400.0,58.7,6
1,13L,LEE,1,2023,9,12,0,23.9,-65.2,93600.0,49.9,12
2,13L,LEE,1,2023,9,12,0,24.0,-65.7,93300.0,48.4,18
3,13L,LEE,1,2023,9,12,0,24.4,-65.9,93700.0,46.8,24
4,13L,LEE,1,2023,9,12,0,24.7,-66.2,93100.0,45.3,30


In [8]:
df_storms = pdbufr.read_bufr(f"track_data/{start_date.strftime('%Y%m%d')}.bufr",
        columns=("stormIdentifier", "longStormName", "ensembleMemberNumber", "year", "month", "day", "hour", "latitude", "longitude",
                 "pressureReducedToMeanSeaLevel"))

In [15]:
print(df_storms.stormIdentifier.unique())
prova = df_storms[df_storms.stormIdentifier == '14L']
prova.head()

['13L' '14L' '70W' '71W' '72W' '73W' '74W' '75W' '76W' '70E' '71E' '72E'
 '73E' '74E' '70L' '71L' '72L' '73L' '74L' '75L' '76L' '70C' '71C' '72C'
 '70A' '71A' '72A' '73A' '70B' '71B' '72B' '73B' '70S' '71S' '70U' '71U'
 '70P' '71P' '72P']


Unnamed: 0,stormIdentifier,longStormName,ensembleMemberNumber,year,month,day,hour,latitude,longitude,pressureReducedToMeanSeaLevel
1924,14L,MARGOT,1,2023,9,12,0,27.3,-39.8,99000.0
1925,14L,MARGOT,1,2023,9,12,0,28.9,-39.8,98700.0
1926,14L,MARGOT,1,2023,9,12,0,30.0,-39.8,98800.0
1927,14L,MARGOT,1,2023,9,12,0,31.5,-40.1,98500.0
1928,14L,MARGOT,1,2023,9,12,0,32.5,-40.8,98500.0


In [28]:
client = Client("https://data.ecmwf.int/forecasts", beta=True) # ecwf: last five days
client.retrieve(
    date=0,
    time=0,
    step=12,
    stream="oper",
    type="fc",
    levtype="sfc",
    param='msl',
    target='test.grib'
)



20230724000000-12h-oper-fc.grib2:   0%|          | 0.00/228k [00:00<?, ?B/s]

In [29]:
client.retrieve(
    time=0,
    stream="oper",
    type="tf",
    step=240,
    target="data.bufr",
)

20230724000000-240h-oper-tf.bufr:   0%|          | 0.00/4.27k [00:00<?, ?B/s]

In [38]:
client = Client("https://ai4edataeuwest.blob.core.windows.net/ecmwf", beta=True) # azure: data from jan-2021 until now
client.retrieve(
    time=0,  # time start of the forecast or 12
    date = 20220605, #date start of the forecast
    stream="oper",
    type="tf",
    step=240, #step of the forecast
    target="nom_fitxer2.bufr",
)


20220605000000-240h-oper-tf.bufr:   0%|          | 0.00/1.32k [00:00<?, ?B/s]