## Asynchronous scraping

In [12]:
import aiohttp
import asyncio
import nest_asyncio
from bs4 import BeautifulSoup
import csv
import re

nest_asyncio.apply()

In [13]:
async def scrape_and_save_links(text):
    soup = BeautifulSoup(text, 'html.parser')
    file = open('csv_file', 'a', newline='')
    writer = csv.writer(file, delimiter=',')
    for link in soup.findAll('a', attrs = {'href': re.compile('^http')}):
        link = link.get('href')
        writer.writerow([link])
    file.close()

In [14]:
async def fetch(session, url):
    try:
        async with session.get(url) as response:
            text = await response.text()
            task = asyncio.create_task(scrape_and_save_links(text))
            await task
    except Exception as e:
        print(str(e))

In [15]:
async def scrape(urls):
    tasks = []
    async with aiohttp.ClientSession() as session:
        for url in urls:
            tasks.append(fetch(session, url))
        await asyncio.gather(*tasks)

In [16]:
urls = ['https://analytics.usa.gov/', 'https://www.python.org/', 'https://www.linkedin.com/']

asyncio.run(scrape(urls=urls))