In [1]:
import requests
import os
from time import sleep

## DOWNLOAD_FILE: performs request call and downloads file to given directory with retry logic.
def download_file(url, target_folder):
    # Extract filename from URL
    filename = url.split('/')[-1]

    # Make sure the target folder exists
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)

    # Full path for saving the file
    full_path = os.path.join(target_folder, filename)

    # Retry logic
    max_attempts = 3
    attempts = 0

    while attempts < max_attempts:
        try:
            # Send a GET request to the URL
            response = requests.get(url, stream=True)
            
            # Raise an exception if the request was unsuccessful
            response.raise_for_status()

            # Open the target file in binary write mode
            with open(full_path, 'wb') as file:
                # Write the content of the response in chunks to the file
                for chunk in response.iter_content(chunk_size=8192):
                    file.write(chunk)

            print(f"File downloaded: {full_path}")
            return  # Exit function after successful download
        except requests.exceptions.RequestException as e:
            attempts += 1
            print(f"Attempt {attempts} failed: {e}")
            sleep(1)  # Wait for 1 second before retrying

    print(f"Failed to download the file after {max_attempts} attempts.")

In [2]:
##file_downloads: This is an interface for the NOAA data. needs web directory and then iterates for 12 months.
def file_downloads(year):
    web_dir = 'https://www.ncei.noaa.gov/data/nclimgrid-daily/access/grids/'
    year = str(year)
    output_dir = 'downloaded_files/' + year

    start = 'ncdd-'
    end = '-grd-scaled.nc'
    for i in range(12):
        adder = ''
        if i+1 < 10:
            adder = '0'
        filepath = '/' + start + year + adder + str(i+1) + end
        print(filepath)
        filepath = web_dir + filepath
        try:
            download_file(filepath, output_dir)
        
        except Exception as e:
            print('error')
            print(e)

In [3]:
##Main function. Change how many years you want to download.
#WARNING: downloading all the data from 1951 on will take a lot of storage!
# Roughtly 50gb (~60Mb per file, 12 months per year, 72 years)
def main():
    start = 2018
    end = 2023
    
    for i in range(end-start+1):
        thisyear = start + i
        file_downloads(thisyear)

In [4]:
main()

/2018/ncdd-201801-grd-scaled.nc
File downloaded: downloaded_files/2018\ncdd-201801-grd-scaled.nc
/2018/ncdd-201802-grd-scaled.nc
File downloaded: downloaded_files/2018\ncdd-201802-grd-scaled.nc
/2018/ncdd-201803-grd-scaled.nc
File downloaded: downloaded_files/2018\ncdd-201803-grd-scaled.nc
/2018/ncdd-201804-grd-scaled.nc
File downloaded: downloaded_files/2018\ncdd-201804-grd-scaled.nc
/2018/ncdd-201805-grd-scaled.nc
File downloaded: downloaded_files/2018\ncdd-201805-grd-scaled.nc
/2018/ncdd-201806-grd-scaled.nc
File downloaded: downloaded_files/2018\ncdd-201806-grd-scaled.nc
/2018/ncdd-201807-grd-scaled.nc
File downloaded: downloaded_files/2018\ncdd-201807-grd-scaled.nc
/2018/ncdd-201808-grd-scaled.nc
File downloaded: downloaded_files/2018\ncdd-201808-grd-scaled.nc
/2018/ncdd-201809-grd-scaled.nc
File downloaded: downloaded_files/2018\ncdd-201809-grd-scaled.nc
/2018/ncdd-201810-grd-scaled.nc
File downloaded: downloaded_files/2018\ncdd-201810-grd-scaled.nc
/2018/ncdd-201811-grd-scaled.n