# BBC News web scraping

In [None]:
# Required Libraries
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import time

In [None]:
# News URL (Example: BBC Technology)
URL = "https://www.bbc.com/news/technology"

HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36",
    "Accept-Language": "en-US,en;q=0.5"
}

# Send Request
webpage = requests.get(URL, headers=HEADERS)
print(webpage)  # <Response [200]> means request successful

# Parse HTML
soup = BeautifulSoup(webpage.content, "html.parser")

# Empty lists
titles = []
links = []
summaries = []
sentiments = []

# Collect all news articles (BBC uses <a> with gs-c-promo-heading class)
for article in soup.find_all("a", {"class": "gs-c-promo-heading"}):
    title = article.get_text().strip()
    link = "https://www.bbc.com" + article.get("href")

    titles.append(title)
    links.append(link)

    # Fetch individual article page for summary
    article_page = requests.get(link, headers=HEADERS)
    article_soup = BeautifulSoup(article_page.content, "html.parser")

    # Find first paragraph of article
    para = article_soup.find("p")
    if para:
        summary = para.get_text().strip()
    else:
        summary = "N/A"

    summaries.append(summary)

    # Sentiment Analysis (using TextBlob)
    sentiment = TextBlob(summary).sentiment.polarity
    if sentiment > 0:
        sentiments.append("Positive")
    elif sentiment < 0:
        sentiments.append("Negative")
    else:
        sentiments.append("Neutral")



In [None]:
# ✅ Store in DataFrame
df = pd.DataFrame({
    "Title": titles,
    "Summary": summaries,
    "Link": links,
    "Sentiment": sentiments
})

# Show first few records
print(df.head())

In [None]:
# Save to CSV
df.to_csv("bbc_news_sentiment.csv", index=False)
print("News data saved to bbc_news_sentiment.csv")