# **BBC News Scraper**
This notebook scrapes the latest news from BBC News website with headlines and descriptions.

In [1]:
# Import Required Libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime
import time

print("Libraries imported successfully!")

Libraries imported successfully!


In [2]:
def scrape_bbc_news():
    """
    Scrape BBC News homepage for headlines and descriptions
    Returns a DataFrame with news data
    """
    try:
        # BBC News URL
        url = 'https://www.bbc.com/news'
        
        # Set headers to mimic a browser request
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        print(f"Scraping BBC News from: {url}")
        print("Please wait...")
        
        # Send GET request
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()  # Raise exception for bad status codes
        
        # Parse HTML content
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Lists to store data
        headlines = []
        descriptions = []
        links = []
        
        # Find all article containers (BBC uses h2 and h3 tags for headlines)
        # Looking for article elements
        articles = soup.find_all('h2', limit=20)
        
        for article in articles:
            try:
                # Get headline text
                headline_text = article.get_text(strip=True)
                
                # Get the link
                link_tag = article.find_parent('a')
                if link_tag and link_tag.get('href'):
                    link = link_tag['href']
                    if not link.startswith('http'):
                        link = 'https://www.bbc.com' + link
                else:
                    link = 'N/A'
                
                # Try to find description in nearby p tags
                parent = article.find_parent()
                description = ''
                
                if parent:
                    p_tag = parent.find('p')
                    if p_tag:
                        description = p_tag.get_text(strip=True)
                
                if headline_text:  # Only add if headline exists
                    headlines.append(headline_text)
                    descriptions.append(description)
                    links.append(link)
                    
            except Exception as e:
                print(f"Error parsing article: {e}")
                continue
        
        # Create DataFrame
        df = pd.DataFrame({
            'Headline': headlines,
            'Description': descriptions,
            'Link': links,
            'Scraped_Date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        })
        
        return df
    
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the page: {e}")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

print("Scraping function defined successfully!")

Scraping function defined successfully!


In [3]:
# Execute the scraper
news_df = scrape_bbc_news()

if news_df is not None and len(news_df) > 0:
    print(f"\n✓ Successfully scraped {len(news_df)} articles!")
    print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
else:
    print("No articles found or scraping failed.")

Scraping BBC News from: https://www.bbc.com/news
Please wait...

✓ Successfully scraped 20 articles!
Date: 2026-02-06 09:01:25



In [4]:
# Display the scraped data
if news_df is not None and len(news_df) > 0:
    print("=" * 80)
    print("BBC NEWS - TODAY'S HEADLINES")
    print("=" * 80)
    
    for idx, row in news_df.iterrows():
        print(f"\n{idx + 1}. HEADLINE: {row['Headline']}")
        print(f"   DESCRIPTION: {row['Description'][:150]}..." if len(row['Description']) > 150 else f"   DESCRIPTION: {row['Description']}")
        print(f"   LINK: {row['Link']}")
        print("-" * 80)
    
    print(f"\nTotal Articles Scraped: {len(news_df)}")
else:
    print("No data available to display.")

BBC NEWS - TODAY'S HEADLINES

1. HEADLINE: Russian general shot several times in Moscow
   DESCRIPTION: 
   LINK: https://www.bbc.com/news/articles/c3686nzexp3o
--------------------------------------------------------------------------------

2. HEADLINE: BBC at scene where Russian general was shot in Moscow
   DESCRIPTION: 
   LINK: https://www.bbc.com/news/videos/cx2y27je512o
--------------------------------------------------------------------------------

3. HEADLINE: Coin portrait of late Queen draws criticism in Australia
   DESCRIPTION: 
   LINK: https://www.bbc.com/news/articles/c70l0wzww50o
--------------------------------------------------------------------------------

4. HEADLINE: Trump endorses Japan's Takaichi ahead of snap election
   DESCRIPTION: 
   LINK: https://www.bbc.com/news/articles/c5yd14r2mgno
--------------------------------------------------------------------------------

5. HEADLINE: We had sex in a Chinese hotel, then found we had been broadcast to thousands

In [5]:
# Display DataFrame view
if news_df is not None and len(news_df) > 0:
    print("\n DATA SUMMARY:")
    print(news_df.head(10).to_string(index=True))


 DATA SUMMARY:
                                                                                            Headline Description                                            Link         Scraped_Date
0                                                       Russian general shot several times in Moscow              https://www.bbc.com/news/articles/c3686nzexp3o  2026-02-06 09:01:25
1                                              BBC at scene where Russian general was shot in Moscow                https://www.bbc.com/news/videos/cx2y27je512o  2026-02-06 09:01:25
2                                           Coin portrait of late Queen draws criticism in Australia              https://www.bbc.com/news/articles/c70l0wzww50o  2026-02-06 09:01:25
3                                             Trump endorses Japan's Takaichi ahead of snap election              https://www.bbc.com/news/articles/c5yd14r2mgno  2026-02-06 09:01:25
4                       We had sex in a Chinese hotel, then found we had b

In [6]:
# Save the data to CSV
if news_df is not None and len(news_df) > 0:
    filename = f"bbc_news_{datetime.now().strftime('%Y_%m_%d')}.csv"
    news_df.to_csv(filename, index=False, encoding='utf-8')
    print(f"\n✓ Data saved to: {filename}")
    print(f"Total records: {len(news_df)}")
else:
    print("No data to save.")


✓ Data saved to: bbc_news_2026_02_06.csv
Total records: 20
