### set up 

In [1]:
import aiohttp
import asyncio
import requests

from bs4 import BeautifulSoup
import re

from pathlib import Path

In [2]:
# temporary folder for data
folder = Path(".") / "data"
if not folder.exists():
    folder.mkdir()
else:
    for file in folder.iterdir():
        if file.is_file():
            file.unlink()

### define functions

In [18]:
async def fetch(session, url):
    async with session.get(url) as response:
        response.raise_for_status()
        print(f"{url = }")
        return await response.read()

In [24]:
async def process_many(urls):
    async def process_one_file(session, url):

        # request the base HTML page
        name = Path(url).name
        second_last_part = Path(url).parts[-2]
        response = await fetch(session, url)

        # find the resource URL
        soup = BeautifulSoup(response, 'html.parser')
        pattern = r'file-examples\.com/storage/\w+/'
        matched_string = re.search(pattern, str(soup)).group(0)
        resource_url = f"https://{matched_string}2017/{second_last_part}/{name}"

        ## download the file
        data = await fetch(session, resource_url)
        
        ## save the file
        location = Path(".") / "data"
        file_path = location / Path(url).name
        with open(file_path, "wb") as f:
            f.write(data)
            print(f"Saving {name}")

    async with aiohttp.ClientSession() as session:
        tasks = [process_one_file(session, url) for url in urls]
        await asyncio.gather(*tasks)


### process

In [25]:
urls = [
    "https://file-examples.com/wp-content/storage/2017/02/zip_2MB.zip",
    "https://file-examples.com/wp-content/storage/2017/10/file-sample_150kB.pdf",
    "https://file-examples.com/wp-content/storage/2017/02/file_example_JSON_1kb.json",
    "https://file-examples.com/wp-content/storage/2017/02/file_example_XML_24kb.xml",
    "https://file-examples.com/wp-content/storage/2017/02/index.html",
    "https://file-examples.com/wp-content/storage/2017/02/file_example_XLSX_5000.xlsx",
    "https://file-examples.com/wp-content/storage/2017/02/file_example_CSV_5000.csv",
]

await process_many(urls)

url = 'https://file-examples.com/wp-content/storage/2017/10/file-sample_150kB.pdf'
url = 'https://file-examples.com/wp-content/storage/2017/02/index.html'
url = 'https://file-examples.com/wp-content/storage/2017/02/file_example_JSON_1kb.json'
url = 'https://file-examples.com/wp-content/storage/2017/02/file_example_XML_24kb.xml'
url = 'https://file-examples.com/wp-content/storage/2017/02/zip_2MB.zip'
url = 'https://file-examples.com/wp-content/storage/2017/02/file_example_CSV_5000.csv'
url = 'https://file-examples.com/wp-content/storage/2017/02/file_example_XLSX_5000.xlsx'
url = 'https://file-examples.com/storage/fe0707c5116828d4b9ad356/2017/02/file_example_XML_24kb.xml'
Saving file_example_XML_24kb.xml
url = 'https://file-examples.com/storage/fe0707c5116828d4b9ad356/2017/02/index.html'
url = 'https://file-examples.com/storage/fe0707c5116828d4b9ad356/2017/02/file_example_JSON_1kb.json'
Saving file_example_JSON_1kb.json
url = 'https://file-examples.com/storage/fe0707c5116828d4b9ad356/201

### clean up

In [26]:
# temporary folder for data
folder = Path(".") / "data"
if not folder.exists():
    folder.mkdir()
else:
    for file in folder.iterdir():
        if file.is_file():
            file.unlink()