# **NEWS SCRAPPING**

In [1]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin  # For joining URLs

def scrape_bbc_news():
    base_url = 'https://www.bbc.com'
    url = 'https://www.bbc.com/news'  # URL of the BBC News website
    data = []

    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find sections/categories on the BBC News website
        sections = soup.find_all('a', class_='nw-o-link')

        for section in sections:
            section_name = section.text.strip()
            section_link = urljoin(base_url, section['href'])  # Join base URL with section link

            print(f"Scraping articles from '{section_name}' section...")
            section_data = scrape_section(section_link)
            data.extend(section_data)
            print("\n")

        # Save data to CSV file
        save_to_csv(data)

    else:
        print('Failed to fetch data from BBC News')

def scrape_section(section_url):
    section_data = []
    # Send a GET request to the section URL
    response = requests.get(section_url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all article elements
        articles = soup.find_all('div', class_='gs-c-promo')

        # Extract titles and content of each article
        for article in articles:
            # Extract title
            title_element = article.find('h3', class_='gs-c-promo-heading__title')
            title = title_element.text.strip() if title_element else 'No title'

            # Extract content (if available)
            content_element = article.find('p', class_='gs-c-promo-summary')
            content = content_element.text.strip() if content_element else 'No content available'

            section_data.append({'Title': title, 'Content': content})

    else:
        print(f'Failed to fetch data from {section_url}')

    return section_data

def save_to_csv(data):
    # Define CSV file name
    file_name = 'bbc_news.csv'

    # Write data to CSV file
    with open(file_name, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Title', 'Content']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

        # Write header
        writer.writeheader()

        # Write rows
        for row in data:
            writer.writerow(row)

    print(f"Data saved to '{file_name}'")

# Call the function to initiate scraping and save data to CSV
scrape_bbc_news()

Data saved to 'bbc_news.csv'
