In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup

# Step 1: Set up Selenium WebDriver with Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless")  # Run in headless mode
chrome_options.add_argument("--no-sandbox")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

try:
    # Step 2: Get the Wikipedia page contents
    Wiki_url = "https://en.wikipedia.org/wiki/Key_events_of_the_20th_century"
    driver.get(Wiki_url)

    # Step 3: Get the page's source HTML
    html = driver.page_source

    # Step 4: Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(html, "html.parser")

    # Step 5: Extract all text from the page
    text = soup.get_text()  # Full text from the page (may include noise)

    # Step 6: Extract text from the "bodyContent" section (more specific)
    body_content = soup.find(id="bodyContent")
    if body_content:  # Ensure the "bodyContent" section exists
        text2 = body_content.get_text()
    else:
        text2 = "No content found in 'bodyContent'."

    # Step 7: Save the extracted text to a file
    with open("20th_century_scrape.txt", "w", encoding="utf-8") as fp:
        fp.write(text2)

    print("Scraping complete! Extracted text saved to '20th_century_scrape.txt'.")

finally:
    # Step 8: Quit the WebDriver
    driver.quit()


Scraping complete! Extracted text saved to '20th_century_scrape.txt'.
