# Download Springer Covid-19 Free Textbooks
### Imports

In [None]:
import concurrent.futures
from pathlib import Path
import urllib

import pandas as pd
import requests

### Variables

In [None]:
des_dir = Path('download')
des_dir.mkdir(parents=True, exist_ok=True)

df = pd.read_excel(
    'https://resource-cms.springernature.com/springer-cms/rest/v1/content/'
    '17858272/data'
)

### Functions

In [None]:
def scrape_redirect(url):
    """Scrape Springer textbook redirect link."""
    r = requests.get(url)
    return r.url

## Web Scrape and Download

In [None]:
with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = {executor.submit(scrape_redirect, x): x
               for x in df['OpenURL']}
    redirects = {}
    for f in concurrent.futures.as_completed(futures):
        url = futures[f]
        try:
            redirects[url] = f.result()
        except Exception as e:
            print(f'Generated Exception: {url}, {e}')

df['redirects'] = df['OpenURL'].map(redirects)

df['pdf_url'] = (
    df['redirects']
    .str.replace('book', 'content/pdf')
    + '.pdf'
)

df['epub_url'] = (
    df['redirects']
    .str.replace('book', 'download/epub')
    + '.epub'
)

df['file_name'] = (
    df.apply(lambda x: f"{x['Book Title']}_{x['Edition']}", axis=1)
    .str.replace('[/:]', '-')
)

with concurrent.futures.ThreadPoolExecutor() as executor:
    pdf_futures = {}
    for r in df[['pdf_url', 'file_name']].itertuples():
        f = des_dir / f'{r.file_name}.pdf'
        if not f.is_file():
            pdf_futures[executor.submit(urllib.request.urlretrieve, r.pdf_url, f)] = f
    for future in concurrent.futures.as_completed(pdf_futures):
        try:
            future.result()
        except Exception as e:
            print(f'Error: {pdf_futures[future]}, {e}')
    print('\n*** Completed pdf downloads ***\n')
    
    epub_futures = {}
    for r in df[['epub_url', 'file_name']].itertuples():
        f = des_dir / f'{r.file_name}.epub'
        if not f.is_file():
            epub_futures[executor.submit(urllib.request.urlretrieve, r.epub_url, f)] = f
    for future in concurrent.futures.as_completed(epub_futures):
        try:
            future.result()
        except urllib.error.HTTPError as e:
            print(f'Epub not available: {epub_futures[future]}')
        except Exception as e:
            print(f'Error: {epub_futures[future]}, {e}')
    print('\n*** Completed epub downloads ***\n')