In [None]:
import json
import os
from datetime import datetime
from utils import get_proxy_settings
from proxy import fetch_feed_content_with_proxy
from feed_parser import retry_with_backoff, parse_feed


def process_feeds(feed_urls):
    """
    Process multiple RSS feeds and save metadata to a single JSON file.

    Args:
        feed_urls (list): List of RSS feed URLs.
    """
    # Get proxy settings
    proxy_settings = get_proxy_settings()
    app_proxy = proxy_settings["app_proxy"]
    username = proxy_settings["username"]
    password = proxy_settings["password"]

    # Define output directory and JSON filename based on the current timestamp
    output_dir = "rss_metadata"
    timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
    output_filepath = os.path.join(output_dir, f"{timestamp}.json")
    os.makedirs(output_dir, exist_ok=True)

    # Load previous metadata (if any) to avoid reprocessing
    previous_data = {}
    previous_filepath = max(
        (os.path.join(output_dir, f) for f in os.listdir(output_dir) if f.endswith(".json")),
        default=None,
        key=os.path.getctime,
    )
    if previous_filepath:
        with open(previous_filepath, "r") as f:
            previous_data = json.load(f)

    # Initialize a container for the current run's metadata
    all_metadata = []

    # Process each feed
    for feed_url in feed_urls:
        print(f"Processing feed: {feed_url}")
        feed_content = fetch_feed_content_with_proxy(feed_url, app_proxy, username, password)
        if feed_content:
            try:
                articles = parse_feed(feed_content)
                for article in articles:
                    # Skip if the article's URL is already in previous data
                    if any(metadata["url"] == article["url"] for metadata in previous_data.get("articles", [])):
                        print(f"Skipping already processed article: {article['url']}")
                        continue

                    # Add article metadata to the current run
                    all_metadata.append(article)
            except Exception as e:
                print(f"Error parsing feed {feed_url}: {e}")
        else:
            print(f"Failed to fetch feed: {feed_url}")

    # Save all metadata from this run to a single JSON file
    if all_metadata:
        with open(output_filepath, "w") as f:
            json.dump({"timestamp": timestamp, "articles": all_metadata}, f, indent=4)
        print(f"Metadata saved to {output_filepath}")
    else:
        print("No new metadata to save.")
