In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import json

def scrape_website(url):
    """Scrape headlines and links from the given website."""
    try:
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the webpage: {e}")
        return []

    soup = BeautifulSoup(response.text, "html.parser")

    # Find all links and their text (Modify this according to your target website)
    articles = soup.find_all("a")

    data = []
    for article in articles:
        title = article.get_text(strip=True)
        link = article.get("href", "")

        # Skip empty titles
        if title:
            full_link = link if link.startswith("http") else f"{url.rstrip('/')}/{link.lstrip('/')}"
            data.append({"title": title, "link": full_link})

    return data

def save_to_csv(data, filename="scraped_data.csv"):
    """Save scraped data to a CSV file."""
    with open(filename, "w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=["title", "link"])
        writer.writeheader()
        writer.writerows(data)
    print(f"Data saved to {filename}")

def save_to_json(data, filename="scraped_data.json"):
    """Save scraped data to a JSON file."""
    with open(filename, "w", encoding="utf-8") as file:
        json.dump(data, file, indent=4)
    print(f"Data saved to {filename}")

if __name__ == "__main__":
    user_url = input("Enter the website URL to scrape: ")
    scraped_data = scrape_website(user_url)

    if scraped_data:
        save_to_csv(scraped_data)
        save_to_json(scraped_data)
    else:
        print("No data found or invalid website.")


Enter the website URL to scrape: https://www.bbc.com/news
Data saved to scraped_data.csv
Data saved to scraped_data.json
