In [1]:
import requests
from bs4 import BeautifulSoup
import os


In [2]:
# Define the fetch_articles function for testing
def fetch_articles(url, save_dir):
    """
    Fetches articles from the specified URL and saves them in the specified directory.

    Args:
        url (str): The URL of the website to fetch articles from.
        save_dir (str): The directory where the articles will be saved.

    Returns:
        List of dictionaries with article titles and content.
    """
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to fetch articles. Status code: {response.status_code}")
        return []

    soup = BeautifulSoup(response.content, "html.parser")

    articles = soup.find_all("article")
    if not articles:
        print("No articles found on the webpage.")
        return []

    os.makedirs(save_dir, exist_ok=True)

    saved_articles = []
    for i, article in enumerate(articles[:5]):  # Limit to 5 articles for testing
        title_element = article.find("h2")
        title = title_element.get_text(strip=True) if title_element else f"article_{i+1}"

        content_element = article.find("p")
        content = content_element.get_text(strip=True) if content_element else "No content available."

        filename = os.path.join(save_dir, f"{title.replace(' ', '_')}.txt")
        with open(filename, "w", encoding="utf-8") as f:
            f.write(f"{title}\n\n{content}")

        print(f"Saved: {filename}")
        saved_articles.append({"title": title, "content": content})

    return saved_articles

In [3]:
# Test the function
test_url = "https://www.dw.com/en/volkswagen-vw-banking-on-global-sales-to-stay-ahead-of-the-mobility-curve/a-71064923"  # Replace with a real URL
test_save_dir = "data/raw_test/"

# Run fetch_articles and display the result
articles = fetch_articles(test_url, test_save_dir)
print("Fetched Articles:")
for article in articles:
    print(article["title"], "-", article["content"])

Saved: data/raw_test/Home-grown_and_global_problems.txt
Fetched Articles:
Home-grown and global problems - VW produces and sells vehicles worldwide. Its Germanness is an important selling point, but the company is equally at home in China, Brazil and the US. Its dependence on foreign markets may soon come to bite.
