In [None]:
import requests
from bs4 import BeautifulSoup
from typing import Dict, List

def get_html_content(url: str) -> BeautifulSoup:
    """Fetch and parse the HTML content of a Wikipedia page."""
    response = requests.get(url)
    response.raise_for_status()  # Raises an error for HTTP errors
    return BeautifulSoup(response.text, 'html.parser')

def extract_title(soup: BeautifulSoup) -> str:
    """Extract and return the article title from the HTML content."""
    title_tag = soup.find('h1', id='firstHeading')
    return title_tag.text if title_tag else "Title not found"

def extract_text_and_headings(soup: BeautifulSoup) -> Dict[str, List[str]]:
    """Extract headings and associated paragraph texts from the article."""
    content = {}
    current_heading = None
    
    for element in soup.find(id="bodyContent").find_all(['h2', 'p']):
        if element.name == 'h2':
            heading = element.get_text().strip()
            current_heading = heading
            content[current_heading] = []
        elif element.name == 'p' and current_heading:
            paragraph = element.get_text().strip()
            content[current_heading].append(paragraph)
            
    return content

def extract_links(soup: BeautifulSoup) -> List[str]:
    """Collect all links that redirect to other Wikipedia pages."""
    links = []
    for link in soup.find(id="bodyContent").find_all('a', href=True):
        href = link['href']
        if href.startswith('/wiki/') and not ':' in href:
            links.append(f"https://en.wikipedia.org{href}")
    return list(set(links))

def scrape_wikipedia(url: str) -> Dict[str, any]:
    """Wrapper function to scrape the title, text with headings, and links from a Wikipedia page."""
    soup = get_html_content(url)
    
    title = extract_title(soup)
    content = extract_text_and_headings(soup)
    links = extract_links(soup)
    
    return {
        "title": title,
        "content": content,
        "links": links
    }


url = "https://en.wikipedia.org/wiki/Web_scraping"
data = scrape_wikipedia(url)
print(data)
