[Reference](https://medium.com/@datajournal/speed-up-web-scraping-b045b9da0b1e)

In [1]:
import requests
from bs4 import BeautifulSoup
import csv

base_url = "https://example.com/products/page"
pages = range(1, 13)  # Scrape 12 pages

def extract_data(page):
    url = f"{base_url}/{page}/"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")

    # Extract data here (e.g., product name, price)
    products = []
    for product in soup.select(".product"):
        products.append({
            "name": product.find("h2").text.strip(),
            "price": product.find(class_="price").text.strip(),
            "url": product.find("a").get("href")
        })

    return products

def store_results(products):
    with open("products.csv", "w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=products[0].keys())
        writer.writeheader()
        writer.writerows(products)

all_products = []
for page in pages:
    all_products.extend(extract_data(page))

store_results(all_products)

In [2]:
pip install aiohttp beautifulsoup4



In [4]:
import aiohttp
import asyncio
from bs4 import BeautifulSoup
import csv

base_url = "https://example.com/products/page"
pages = range(1, 13)  # Scrape 12 pages

async def extract_data(page, session):
    url = f"{base_url}/{page}/"
    async with session.get(url) as response:
        soup = BeautifulSoup(await response.text(), "html.parser")

        products = []
        for product in soup.select(".product"):
            products.append({
                "name": product.find("h2").text.strip(),
                "price": product.find(class_="price").text.strip(),
                "url": product.find("a").get("href")
            })

        return products

async def main():
    async with aiohttp.ClientSession() as session:
        tasks = [extract_data(page, session) for page in pages]
        results = await asyncio.gather(*tasks)

        # Flatten the list
        all_products = [item for sublist in results for item in sublist]
        store_results(all_products)

def store_results(products):
    with open("products.csv", "w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=products[0].keys())
        writer.writeheader()
        writer.writerows(products)

# Run the asyncio event loop
asyncio.run(main())

In [5]:
import asyncio
from bs4 import BeautifulSoup
import aiohttp

max_concurrency = 5  # Limit concurrent requests
sem = asyncio.Semaphore(max_concurrency)

async def extract_data(page, session):
    async with sem:  # This limits the number of concurrent requests
        url = f"{base_url}/{page}/"
        async with session.get(url) as response:
            soup = BeautifulSoup(await response.text(), "html.parser")

            products = []
            for product in soup.select(".product"):
                products.append({
                    "name": product.find("h2").text.strip(),
                    "price": product.find(class_="price").text.strip(),
                    "url": product.find("a").get("href")
                })

            return products