# This script contains the following:
## 1. Import libraries
## 2. Fetch webpage and inspect
## 3. Scrape webpage

### 1. Import libraries

In [1]:
# Import lubraries
import requests
from bs4 import BeautifulSoup
from pathlib import Path
import re

print("Libraries imported successfully")


Libraries imported successfully


In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import time

### 2. Fetch webpage and inspect

In [3]:
# Fetch webpage
url = "https://en.wikipedia.org/wiki/Key_events_of_the_20th_century"

headers = {
    "User-Agent": (
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/118.0.0.0 Safari/537.36"
    )
}


In [4]:
response = requests.get(url, headers=headers, timeout=30)
response.raise_for_status()

In [5]:
print("Status code:", response.status_code)
print(response.text[:300])

Status code: 200
<!DOCTYPE html>
<html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-clientpref-1 vector-feature-main-menu-pinned-disabled vector-feature-limited-width-clientpref-1 


### 3. Scrape webpage

In [6]:
soup = BeautifulSoup(response.text, "html.parser")
body = soup.select_one("#mw-content-text .mw-parser-output")

In [7]:
# remove extras
for sel in ["sup.reference","span.mw-editsection","div.navbox","table","style","script"]:
    for tag in body.select(sel):
        tag.decompose()


In [8]:
text = body.get_text(separator="\n", strip=True)
text = re.sub(r"\n{2,}", "\n\n", text)

In [9]:
out_path = Path("key_events_20th_century.txt")
out_path.write_text(text, encoding="utf-8")
print(f"Saved → {out_path.resolve()}")
print("\nPreview:\n", "\n".join(text.splitlines()[:25]))

Saved → /Users/ianfleming/20th-century/key_events_20th_century.txt

Preview:
 The
20th century
changed the world in unprecedented ways. The
World Wars
sparked tension between countries and led to the creation of
atomic bombs
, the
Cold War
led to the
Space Race
and the creation of space-based rockets, and the
World Wide Web
was created. These advancements have played a significant role in citizens' lives and shaped the
21st century
into what it is today.
Historic events in the 20th century
World at the beginning of the century
Main article:
Edwardian era
The new beginning of the 20th century marked significant changes. The 1900s saw the decade herald a series of inventions, including the
automobile
,
airplane
and
radio broadcasting


I tried to use Selenium and ChromeDriver in the 1.4 Alice exercise, but this approach caused version and driver compatibility issues on my Mac.

After a ALOT of research,I switched to a simpler and more efficient solution using requests.get() to fetch the page directly and BeautifulSoup to parse and clean the HTML.
