In [1]:
import trafilatura
import json
import dateparser

def extract_article(url):
    """
    Extracts article content and publication date from a given URL using Trafilatura.
    """
    try:
        # Download HTML
        downloaded_html = trafilatura.fetch_url(url)
        if not downloaded_html:
            raise ValueError("Failed to download content")

        # Extract article content and metadata in JSON format
        extracted = trafilatura.extract(downloaded_html, output_format="json", with_metadata=True)
        if not extracted:
            raise ValueError("Trafilatura extraction failed")

        # Convert JSON string to dictionary
        data = json.loads(extracted)

        # Get article text and publication date
        content = data.get("text", "").strip()
        publish_date_str = data.get("date", None)
        publish_date = dateparser.parse(publish_date_str) if publish_date_str else None

        # Validate extracted content
        if not content or len(content) < 200:
            raise ValueError("Content extraction incomplete")

    except Exception as e:
        return {"content": "", "publish_date": None}

    # Format publication date
    formatted_date = publish_date.strftime("%d-%m-%Y") if publish_date else None

    return {"content": content, "publish_date": formatted_date}

# Example usage:
url = "https://www.businesstimes.com.sg/international/central-bankers-move-slowly-through-fog-trumps-trade-wars"
result = extract_article(url)

# Clean output
result_content_clean = result["content"].replace("\n", " ") if result["content"] else "No content extracted"
print("Result:", result_content_clean)
print("Publication Date:", result["publish_date"])

Result: Central bankers move slowly through fog of Trump’s trade wars Monetary policymakers are being knocked off course by the twists and turns of White House policy, with markets paring back interest-rate cut expectations across the globe [LONDON, FRANKFURT, WASHINGTON, HONG KONG] US President Donald Trump has already upended global trade and the postwar international security consensus. Now he’s throwing central banking into disarray. Monetary policymakers are being knocked off course by the twists and turns of White House policy, with markets paring back interest-rate cut expectations across the globe. No longer are central bankers “either the frontmen or rhythm-keepers of macro policy”, said Thierry Wizman, a strategist at Macquarie. “They are now followers, who are ceding their dynamism to events in federal legislatures, executive mansions and diplomatic halls.” US Federal Reserve chair Jerome Powell stressed the uncertainty of the outlook as he kept US rates unchanged on Wednesd