# Parallel Downloads
In this notebook we will speed up the downloading of files by doing them in parallel

## Imports

In [None]:

from urllib.request import urlretrieve
import sys

## Download Functions
Here we use urlib to download files from a specified URL.

The download progress hook will be displayed for each file as it downloads so that progress can be seen.

In [None]:
def download_progress_hook(count, block_size, total_size):
    """
    Report hook to display a progress bar for downloading.
    
    :param count: Current block number being downloaded.
    :param block_size: Size of each block (in bytes).
    :param total_size: Total size of the file (in bytes).
    """
    # Calculate percentage of the download
    downloaded_size = count * block_size
    percentage = min(100, downloaded_size * 100 / total_size)
    
    # Create a simple progress bar
    progress_bar = f"\rDownloading: {percentage:.2f}% [{downloaded_size}/{total_size} bytes]"
    
    # Update the progress on the same line
    sys.stdout.write(progress_bar)
    sys.stdout.flush()

    # When download is complete
    if downloaded_size >= total_size:
        print("\nDownload complete!")
def download(url, file_name):
    print(file_name)
    return urlretrieve(url, file_name, reporthook=download_progress_hook)



## Create List of URLs
We first create a list of URLs

In [None]:
years = np.arange(fromYear, toYear + 1, 1)
months = []
for i in np.arange(1, 13, 1):
    months.append('%02i' % i)
urls= []
for y in years:
    for m in months:
        if not ((y == years[0]) & (int(m) < monthFirstYear)) | ((y == years[-1]) & (int(m) > monthLastYear)):
            urls.append('https://cdaweb.gsfc.nasa.gov/sp_phys/data/omni/hro_1min/' + str(y) + \
                        '/omni_hro_1min_' + str(y) + str(m) + '01_v01.cdf')


## Run Download
By using Parallel and delayed we are able to queue each file download and then perform them on multiple cores at the same time

In [None]:
from joblib import Parallel, delayed
download_args = [(url, './omni_tempfiles/'+url.split('/')[-1]) for url in urls]
Parallel(n_jobs=12, backend='threading')(delayed(download)(*args) for args in download_args)

## Alternative
Here is an alternative using multiprocessing

In [None]:
from multiprocessing import Pool
p= Pool(8)
p.map(download, urls)