In [9]:
import os
import numpy as np
import pandas as pd
import boto3
from botocore import UNSIGNED
from botocore.config import Config

cfs_dates = ['20190201','20190325','20190420','20190503','20190514','20190614']

cfs_aws_bucket = 'noaa-cfs-pds'

lead_time = 20

def downloadDirectoryFroms3(bucketName, remoteDirectoryName, endDate):
    s3_resource = boto3.resource('s3', config=Config(signature_version=UNSIGNED))
    bucket = s3_resource.Bucket(bucketName) 
    for obj in bucket.objects.filter(Prefix = remoteDirectoryName):
        if not os.path.exists(os.path.dirname(obj.key)):
            os.makedirs(os.path.dirname(obj.key))
            
        # Get the forecast date and time portion of the file name
        file_name = obj.key.partition('/')[-1] # Gets name of GRIB file
        fxst_time = file_name.partition('.')[0][-10:-1] # Gets 10 characters to the left of the first '.'
        fxst_time_dt = pd.to_datetime(fxst_time,format='%Y%m%d%H') # Converts this to a date time
        
        if(fxst_time_dt <= endDate): # Compare date time of current file to end date
            try:
                bucket.download_file(obj.key, obj.key) # save to same path
                print('True: downloaded '+obj.key)
            except botocore.exceptions.ClientError as e:
                if e.response['Error']['Code'] == "404":
                    print('File '+obj.key+' does not exist')
                else:
                    print('Could not download file '+obj.key)
        else:
            print('False: skipped '+obj.key)
            

In [10]:
cfs_dates_beg_dt = pd.to_datetime(cfs_dates,format='%Y%m%d')

cfs_dates_end_dt = cfs_dates_beg_dt + pd.Timedelta(days=lead_time)

for i in np.arange(cfs_dates_beg_dt.size):

    cfs_dir_name = 'cfs.'+cfs_dates[i]+'/06/6hrly_grib_01/'
    
    print('Downloading data from '+cfs_aws_bucket+'::'+cfs_dir_name)

    downloadDirectoryFroms3(cfs_aws_bucket, cfs_dir_name, cfs_dates_end_dt[i])


Downloading data from noaa-cfs-pds::cfs.20190201/06/6hrly_grib_01/
06/6hrly_grib_01/flxf2019020106.01.2019020106.grb2
201902010
True: downloaded cfs.20190201/06/6hrly_grib_01/flxf2019020106.01.2019020106.grb2
06/6hrly_grib_01/flxf2019020106.01.2019020106.grb2.idx
201902010
True: downloaded cfs.20190201/06/6hrly_grib_01/flxf2019020106.01.2019020106.grb2.idx
06/6hrly_grib_01/flxf2019020112.01.2019020106.grb2
201902011
True: downloaded cfs.20190201/06/6hrly_grib_01/flxf2019020112.01.2019020106.grb2
06/6hrly_grib_01/flxf2019020112.01.2019020106.grb2.idx
201902011
True: downloaded cfs.20190201/06/6hrly_grib_01/flxf2019020112.01.2019020106.grb2.idx
06/6hrly_grib_01/flxf2019020118.01.2019020106.grb2
201902011
True: downloaded cfs.20190201/06/6hrly_grib_01/flxf2019020118.01.2019020106.grb2
06/6hrly_grib_01/flxf2019020118.01.2019020106.grb2.idx
201902011
True: downloaded cfs.20190201/06/6hrly_grib_01/flxf2019020118.01.2019020106.grb2.idx
06/6hrly_grib_01/flxf2019020200.01.2019020106.grb2
201902

ValueError: time data '_01/ipvan' does not match format '%Y%m%d%H' (match)

In [3]:
cfs_dates_beg_dt[i]

Timestamp('2019-02-01 00:00:00')