In [6]:
import threading
import requests
from bs4 import BeautifulSoup

urls = [
    'https://python.langchain.com/v0.2/docs/introduction',
    'https://python.langchain.com/v0.2/docs/concepts',
    'https://python.langchain.com/v0.2/docs/tutorials'
]

def fetch_content(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content,'html.parser')
    print(f'fetched {len(soup.text)} characters from {url} ')


threads = []

for url in urls:
    thread = threading.Thread(target=fetch_content,args=(url,))
    threads.append(thread)
    thread.start()

for thread in threads:
    thread.join()

print("all web pages fetched")

fetched 4168 characters from https://python.langchain.com/v0.2/docs/concepts 
fetched 4168 characters from https://python.langchain.com/v0.2/docs/tutorials 
fetched 4168 characters from https://python.langchain.com/v0.2/docs/introduction 
all web pages fetched


In [7]:
import time
start = time.time()

import threading
import requests
from bs4 import BeautifulSoup

urls = [
    'https://python.langchain.com/v0.2/docs/introduction',
    'https://python.langchain.com/v0.2/docs/concepts',
    'https://python.langchain.com/v0.2/docs/tutorials'
]

def fetch_content(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content,'html.parser')
    print(f'fetched {len(soup.text)} characters from {url} ')


threads = []

for url in urls:
    thread = threading.Thread(target=fetch_content,args=(url,))
    threads.append(thread)
    thread.start()

for thread in threads:
    thread.join()

print("all web pages fetched")

print("Time:", time.time() - start)

fetched 4168 characters from https://python.langchain.com/v0.2/docs/tutorials 
fetched 4168 characters from https://python.langchain.com/v0.2/docs/concepts 
fetched 4168 characters from https://python.langchain.com/v0.2/docs/introduction 
all web pages fetched
Time: 8.291846990585327


In [8]:
import time
start = time.time()

from concurrent.futures import ThreadPoolExecutor
import requests
from bs4 import BeautifulSoup

urls = [
    'https://python.langchain.com/v0.2/docs/introduction',
    'https://python.langchain.com/v0.2/docs/concepts',
    'https://python.langchain.com/v0.2/docs/tutorials'
]

session = requests.Session()

def fetch_content(url):
    response = session.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    print(f'Fetched {len(soup.text)} characters from {url}')

with ThreadPoolExecutor(max_workers=3) as executor:
    executor.map(fetch_content, urls)

print("All web pages fetched")

print("Time:", time.time() - start)

Fetched 4168 characters from https://python.langchain.com/v0.2/docs/concepts
Fetched 4168 characters from https://python.langchain.com/v0.2/docs/introduction
Fetched 4168 characters from https://python.langchain.com/v0.2/docs/tutorials
All web pages fetched
Time: 1.2065751552581787
