# Web Scraping with BeautifulSoup

In [2]:
# 📌 Step 1: Import Libraries
import requests
from bs4 import BeautifulSoup
import csv

In [3]:
# 📌 Step 2: Fetch a Webpage
# We'll scrape Hacker News (https://news.ycombinator.com/) for headlines

url = "https://news.ycombinator.com/"
response = requests.get(url)

# Check if request was successful
if response.status_code == 200:
    print("✅ Successfully fetched webpage!")
else:
    print("❌ Failed to fetch webpage. Status:", response.status_code)

✅ Successfully fetched webpage!


In [4]:
# 📌 Step 3: Parse HTML with BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")

# Find all headlines (on Hacker News, headlines are inside <a class="storylink"> but site changes sometimes)
headlines = soup.find_all("span", class_="titleline")

print(f"Found {len(headlines)} headlines!")

Found 30 headlines!


In [5]:
# 📌 Step 4: Extract and Store Headlines
news_data = []

for h in headlines:
    title = h.get_text(strip=True)
    link = h.find("a")["href"]
    news_data.append([title, link])

# Show first 5 headlines
news_data[:5]

[['Almost anything you give sustained attention to will begin to loop on itself(henrikkarlsson.xyz)',
  'https://www.henrikkarlsson.xyz/p/attention'],
 ['Le Chat. Custom MCP Connectors. Memories(mistral.ai)',
  'https://mistral.ai/news/le-chat-mcp-connectors-memories'],
 ['30 minutes with a stranger(pudding.cool)',
  'https://pudding.cool/2025/06/hello-stranger/'],
 ['Inverting the Xorshift128 random number generator(littlemaninmyhead.wordpress.com)',
  'https://littlemaninmyhead.wordpress.com/2025/08/31/inverting-the-xorshift128-random-number-generator/'],
 ['Use Bayes rule to mechanically solve probability riddles(disroot.org)',
  'https://cloud.disroot.org/s/Ec4xTMFDteTrFio']]

In [6]:
# 📌 Step 5: Save Data to CSV
filename = "news_headlines.csv"

with open(filename, "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Title", "Link"])   # Header row
    writer.writerows(news_data)

print(f"✅ Saved {len(news_data)} headlines to {filename}")

✅ Saved 30 headlines to news_headlines.csv
