In [None]:
from rss_collector.utils import get_proxy_settings
from rss_collector.proxy import fetch_feed_content_with_proxy
from rss_collector.feed_parser import retry_with_backoff, parse_feed
import json
import os
from datetime import datetime

def process_feeds(feed_urls):
    """
    Process multiple RSS feeds and save metadata for all feeds in a single JSON file.

    Args:
        feed_urls (list): List of RSS feed URLs.

    Returns:
        None
    """
    # Get proxy settings
    proxy_settings = get_proxy_settings()
    app_proxy = proxy_settings["app_proxy"]
    username = proxy_settings["username"]
    password = proxy_settings["password"]

    # Initialize metadata storage
    all_metadata = {
        "timestamp": datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
        "articles": []
    }

    for feed_url in feed_urls:
        print(f"Processing feed: {feed_url}")
        feed_content = fetch_feed_content_with_proxy(feed_url, app_proxy, username, password)

        if feed_content:
            try:
                # Parse feed content
                feed_data = parse_feed(feed_content)
                for entry in feed_data.entries:
                    # Append each article's metadata
                    all_metadata["articles"].append({
                        "title": getattr(entry, "title", "No Title"),
                        "url": getattr(entry, "link", "No URL"),
                        "published_date": getattr(entry, "published", "No Date"),
                    })
                print(f"Successfully processed feed: {feed_url}")
            except Exception as e:
                print(f"Error parsing feed {feed_url}: {e}")
        else:
            print(f"Failed to fetch feed: {feed_url}")

    # Save metadata to a single JSON file with timestamp as the filename
    output_dir = "rss_metadata"
    os.makedirs(output_dir, exist_ok=True)
    output_file = os.path.join(output_dir, f"{all_metadata['timestamp']}.json")
    with open(output_file, "w") as f:
        json.dump(all_metadata, f, indent=4)
    print(f"All metadata saved to {output_file}")


In [None]:
from rss_collector.utils import get_proxy_settings
from rss_collector.proxy import fetch_feed_content_with_proxy
from rss_collector.feed_parser import retry_with_backoff, parse_feed
import os
import json
from datetime import datetime


def process_feeds(feed_urls):
    """
    Process multiple RSS feeds and save metadata for all feeds into a single JSON file.

    Args:
        feed_urls (list): List of RSS feed URLs.

    Returns:
        None
    """
    # Get proxy settings
    proxy_settings = get_proxy_settings()
    app_proxy = proxy_settings["app_proxy"]
    username = proxy_settings["username"]
    password = proxy_settings["password"]

    # Directory for storing metadata
    output_dir = "rss_metadata"
    os.makedirs(output_dir, exist_ok=True)

    # Timestamp for unique file naming
    timestamp = datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
    metadata_file = os.path.join(output_dir, f"{timestamp}.json")

    # Collect metadata
    all_metadata = {
        "timestamp": timestamp,
        "articles": []
    }

    for feed_url in feed_urls:
        print(f"Processing feed: {feed_url}")
        feed_content = fetch_feed_content_with_proxy(feed_url, app_proxy, username, password)

        if feed_content:
            try:
                # Parse the feed content
                feed_data = parse_feed(feed_content)
                for entry in feed_data.entries:
                    article_metadata = {
                        "title": getattr(entry, "title", "No Title"),
                        "url": getattr(entry, "link", "No URL"),
                        "published_date": getattr(entry, "published", "No Date")
                    }
                    all_metadata["articles"].append(article_metadata)

                print(f"Metadata from {feed_url} added.")
            except Exception as e:
                print(f"Error parsing feed {feed_url}: {e}")
        else:
            print(f"Failed to fetch feed: {feed_url}")

    # Save all metadata to a single JSON file
    with open(metadata_file, "w") as f:
        json.dump(all_metadata, f, indent=4)
    print(f"All metadata saved to {metadata_file}")
