# LOKALER NEWSLETTER GENERATOR - PROOF OF CONCEPT

Automatische Erstellung von lokalen Newslettern/Artikeln pro Ort
Tech Stack: Claude, Perplexity, Firecrawl, Google Gemini 2.5 Flash


In [1]:
# =============================================================================
# ZELLE 0: Requirements
# =============================================================================
# @title Requirements Installation
import subprocess
import sys

def install_requirements():
    """Installiert alle ben√∂tigten Packages"""
    requirements = [
        "firecrawl-py",
        "google-generativeai",
        "anthropic",
        "requests",
        "pandas",
        "asyncio",
        "nest-asyncio",
        "ipywidgets",
        "python-dotenv"
    ]

    for package in requirements:
        print(f"üì¶ Installiere {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

    print("‚úÖ Alle Requirements installiert!")

# Installation ausf√ºhren
install_requirements()

# Standard Imports
import asyncio
import json
import time
import requests
from datetime import datetime
import pandas as pd
from google.colab import userdata
import nest_asyncio

# Async Support f√ºr Colab
nest_asyncio.apply()

print("üöÄ Setup komplett - alle Libraries geladen!")

üì¶ Installiere firecrawl-py...
üì¶ Installiere google-generativeai...
üì¶ Installiere anthropic...
üì¶ Installiere requests...
üì¶ Installiere pandas...
üì¶ Installiere asyncio...
üì¶ Installiere nest-asyncio...
üì¶ Installiere ipywidgets...
üì¶ Installiere python-dotenv...
‚úÖ Alle Requirements installiert!
üöÄ Setup komplett - alle Libraries geladen!


In [2]:
# =============================================================================
# ZELLE 0b: Google Drive Setup + Ordnerstruktur
# =============================================================================
# @title Google Drive Mount + Newsletter System Ordnerstruktur anlegen

import os
from pathlib import Path
from google.colab import drive
import yaml
import json
from datetime import datetime

def setup_google_drive():
    """Mounted Google Drive und erstellt Newsletter System Ordnerstruktur"""

    # Google Drive mounten
    print("üîó Mounte Google Drive...")
    drive.mount('/content/drive')
    print("‚úÖ Google Drive erfolgreich gemounted")

    # Base Path definieren
    base_path = Path("/content/drive/MyDrive/Newsletter_System")

    # Ordnerstruktur erstellen
    folders = [
        "configs",
        "data/sessions",
        "data/sessions/archive",
        "templates",
        "logs"
    ]

    print(f"üìÅ Erstelle Newsletter System Struktur: {base_path}")

    # Hauptordner erstellen
    base_path.mkdir(exist_ok=True)

    # Unterordner erstellen
    for folder in folders:
        folder_path = base_path / folder
        folder_path.mkdir(parents=True, exist_ok=True)
        print(f"  ‚úÖ {folder}")

    # Erstelle README f√ºr Dokumentation
    readme_content = """# Newsletter System - Google Drive Storage

## Ordnerstruktur:

### `/configs/`
- `locations.yaml` - Stadt-spezifische Konfigurationen
- `categories.yaml` - Kategorie-Templates und Suchstrategien
- `api_settings.yaml` - API-spezifische Parameter

### `/data/sessions/`
- Session-basierte Datensammlung
- Format: `YYYY-MM-DD_location_HH-MM/`
- Jede Session enth√§lt: raw_responses/, processed_content/, final_newsletter

### `/templates/`
- Newsletter-Templates
- Prompt-Templates f√ºr verschiedene APIs

### `/logs/`
- System-Logs und Error-Tracking

## Erstellungszeit:
""" + datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    readme_path = base_path / "README.md"
    with open(readme_path, "w", encoding="utf-8") as f:
        f.write(readme_content)

    print(f"üìÑ README erstellt: {readme_path}")

    return base_path

def create_initial_configs(base_path):
    """Erstellt initiale YAML-Konfigurationsdateien"""

    configs_path = base_path / "configs"

    # 1. LOCATIONS.YAML
    locations_config = {
        "m√ºnchen": {
            "official_names": ["M√ºnchen", "Munich", "Muenchen"],
            "url_slug": "muenchen",
            "official_domains": ["muenchen.de", "stadtwerke-muenchen.de", "mvg.de"],
            "timezone": "Europe/Berlin",
            "region_context": "Bayern Deutschland",
            "population": 1500000,
            "type": "landeshauptstadt"
        },
        "berlin": {
            "official_names": ["Berlin"],
            "url_slug": "berlin",
            "official_domains": ["berlin.de", "bvg.de"],
            "timezone": "Europe/Berlin",
            "region_context": "Deutschland Hauptstadt",
            "population": 3700000,
            "type": "hauptstadt"
        },
        "hamburg": {
            "official_names": ["Hamburg"],
            "url_slug": "hamburg",
            "official_domains": ["hamburg.de", "hvv.de"],
            "timezone": "Europe/Berlin",
            "region_context": "Norddeutschland",
            "population": 1900000,
            "type": "hansestadt"
        }
    }

    locations_file = configs_path / "locations.yaml"
    with open(locations_file, "w", encoding="utf-8") as f:
        yaml.dump(locations_config, f, allow_unicode=True, default_flow_style=False)

    print(f"‚úÖ Locations Config: {locations_file}")

    # 2. CATEGORIES.YAML
    categories_config = {
        "wetter": {
            "priority": "high",
            "time_sensitivity": "today",
            "description": "Aktuelle Wettervorhersage und Wetterwarnungen",
            "search_templates": {
                "claude_web": "aktuelles Wetter {location} heute {date_context} Deutschland Vorhersage",
                "perplexity": "Wetter Vorhersage {location} {date_context} Deutschland aktuell",
                "firecrawl_search": "Wetter {location} heute Vorhersage Deutschland"
            },
            "fallback_urls": [
                "https://www.wetter.com/deutschland/{location_slug}",
                "https://www.wetteronline.de/wetter/{location_slug}"
            ],
            "keywords": ["Wetter", "Temperatur", "Regen", "Sonne", "Vorhersage"]
        },

        "verkehr": {
            "priority": "high",
            "time_sensitivity": "current",
            "description": "Aktuelle Verkehrslage, St√∂rungen und √ñPNV-Infos",
            "search_templates": {
                "claude_web": "Verkehrslage {location} aktuell St√∂rungen Staus {date_context}",
                "perplexity": "aktuelle Verkehrsst√∂rungen {location} √ñPNV Staus {date_context}",
                "firecrawl_search": "Verkehr {location} St√∂rungen aktuell"
            },
            "fallback_urls": [
                "https://www.verkehr.nrw/verkehrslage/{location_slug}",
                "https://www.adac.de/verkehr/verkehrsinformationen/"
            ],
            "keywords": ["Verkehr", "Stau", "√ñPNV", "St√∂rung", "Umleitung"]
        },

        "events": {
            "priority": "medium",
            "time_sensitivity": "today_tomorrow",
            "description": "Lokale Veranstaltungen, Konzerte, Kultur",
            "search_templates": {
                "claude_web": "Veranstaltungen Events {location} {date_context} heute morgen",
                "perplexity": "Events Konzerte Ausstellungen {location} {date_context}",
                "firecrawl_search": "Veranstaltungen {location} heute Events"
            },
            "scrape_urls": [
                "https://www.{location_slug}.de/veranstaltungen",
                "https://www.eventbrite.de/d/germany--{location_slug}/events/"
            ],
            "keywords": ["Event", "Konzert", "Ausstellung", "Theater", "Festival"]
        },

        "nachrichten": {
            "priority": "high",
            "time_sensitivity": "today",
            "description": "Lokale Nachrichten und wichtige Meldungen",
            "search_templates": {
                "claude_web": "lokale Nachrichten {location} {date_context} heute aktuell",
                "perplexity": "{location} News Nachrichten lokal {date_context}",
                "firecrawl_search": "{location} Nachrichten heute lokal"
            },
            "scrape_urls": [
                "https://www.{location_slug}.de/aktuelles",
                "https://www.sueddeutsche.de/{location_slug}"
            ],
            "keywords": ["Nachrichten", "News", "Meldung", "aktuell", "lokal"]
        },

        "rathaus": {
            "priority": "medium",
            "time_sensitivity": "today",
            "description": "Offizielle Mitteilungen der Stadtverwaltung",
            "primary_method": "scrape",
            "scrape_urls": [
                "https://www.{location_slug}.de/rathaus",
                "https://www.{location_slug}.de/aktuelles",
                "https://www.stadt-{location_slug}.de"
            ],
            "search_templates": {
                "claude_web": "Rathaus {location} Mitteilungen Bekanntmachungen {date_context}",
                "perplexity": "{location} Stadtverwaltung Rathaus Mitteilungen {date_context}",
                "firecrawl_search": "Rathaus {location} Mitteilungen heute"
            },
            "keywords": ["Rathaus", "Stadtverwaltung", "Bekanntmachung", "Mitteilung"]
        },

        "schulen": {
            "priority": "low",
            "time_sensitivity": "today",
            "description": "Schulnachrichten und Bildungsinfos",
            "search_templates": {
                "claude_web": "Schulen {location} Mitteilungen Unterrichtsausfall {date_context}",
                "perplexity": "{location} Schulnachrichten Bildung {date_context}",
                "firecrawl_search": "Schulen {location} Mitteilungen"
            },
            "keywords": ["Schule", "Bildung", "Unterricht", "Mitteilung"]
        },

        "vereine": {
            "priority": "low",
            "time_sensitivity": "today_tomorrow",
            "description": "Vereinsaktivit√§ten und lokale Gemeinschaft",
            "search_templates": {
                "claude_web": "Vereine {location} Aktivit√§ten Termine {date_context}",
                "perplexity": "{location} Vereine Events Aktivit√§ten {date_context}",
                "firecrawl_search": "Vereine {location} Termine heute"
            },
            "keywords": ["Verein", "Gemeinschaft", "Aktivit√§t", "Termin"]
        },

        "sport": {
            "priority": "medium",
            "time_sensitivity": "today",
            "description": "Lokale Sportergebnisse und Sportveranstaltungen",
            "search_templates": {
                "claude_web": "{location} Sport Ergebnisse Spiele {date_context} lokal",
                "perplexity": "Sport {location} Ergebnisse Vereine {date_context}",
                "firecrawl_search": "{location} Sport heute Ergebnisse"
            },
            "keywords": ["Sport", "Spiel", "Ergebnis", "Verein", "Mannschaft"]
        }
    }

    categories_file = configs_path / "categories.yaml"
    with open(categories_file, "w", encoding="utf-8") as f:
        yaml.dump(categories_config, f, allow_unicode=True, default_flow_style=False)

    print(f"‚úÖ Categories Config: {categories_file}")

    # 3. API_SETTINGS.YAML
    api_settings_config = {
        "claude": {
            "model": "claude-3-5-haiku-latest",
            "max_tokens": 2000,
            "web_search": {
                "max_uses": 5,
                "user_location": {
                    "type": "approximate",
                    "country": "DE",
                    "timezone": "Europe/Berlin"
                }
            }
        },

        "perplexity": {
            "model": "sonar-pro",
            "temperature": 0.2,
            "top_p": 0.9,
            "extra_body": {
                "search_mode": "web",
                "return_images": False,
                "return_related_questions": False,
                "web_search_options": {
                    "search_context_size": "medium",
                    "user_location": {"country": "DE"}
                }
            }
        },

        "firecrawl": {
            "default_formats": ["markdown", "links"],
            "location": {
                "country": "DE",
                "languages": ["de-DE"]
            },
            "search_limits": {
                "default": 5,
                "wetter": 3,
                "verkehr": 4,
                "events": 5
            },
            "scrape_timeout": 30000
        },

        "gemini": {
            "model": "gemini-2.0-flash-exp",
            "temperature": 0.3,
            "max_output_tokens": 2000
        }
    }

    api_settings_file = configs_path / "api_settings.yaml"
    with open(api_settings_file, "w", encoding="utf-8") as f:
        yaml.dump(api_settings_config, f, allow_unicode=True, default_flow_style=False)

    print(f"‚úÖ API Settings Config: {api_settings_file}")

    return {
        "locations": locations_file,
        "categories": categories_file,
        "api_settings": api_settings_file
    }

def test_config_access(config_files):
    """Testet das Laden der erstellten Konfigurationsdateien"""

    print("\nüß™ TESTE CONFIG-ZUGRIFF")
    print("=" * 40)

    try:
        # Teste Locations Config
        with open(config_files["locations"], "r", encoding="utf-8") as f:
            locations = yaml.safe_load(f)

        print(f"üìç Locations geladen: {list(locations.keys())}")
        print(f"   M√ºnchen Domains: {locations['m√ºnchen']['official_domains']}")

        # Teste Categories Config
        with open(config_files["categories"], "r", encoding="utf-8") as f:
            categories = yaml.safe_load(f)

        print(f"üìã Kategorien geladen: {list(categories.keys())}")
        print(f"   Wetter Priority: {categories['wetter']['priority']}")
        print(f"   Events Template: {categories['events']['search_templates']['claude_web']}")

        # Teste API Settings Config
        with open(config_files["api_settings"], "r", encoding="utf-8") as f:
            api_settings = yaml.safe_load(f)

        print(f"üîß APIs konfiguriert: {list(api_settings.keys())}")
        print(f"   Claude Model: {api_settings['claude']['model']}")

        print("‚úÖ Alle Config-Dateien erfolgreich lesbar!")

    except Exception as e:
        print(f"‚ùå Config Test Fehler: {e}")

# =============================================================================
# AUSF√úHRUNG
# =============================================================================

print("üöÄ STARTE GOOGLE DRIVE SETUP")
print("=" * 50)

try:
    # 1. Google Drive Setup
    base_path = setup_google_drive()
    print(f"‚úÖ Base Path: {base_path}")

    # 2. Config-Dateien erstellen
    print(f"\nüìù ERSTELLE KONFIGURATIONSDATEIEN")
    config_files = create_initial_configs(base_path)

    # 3. Config-Zugriff testen
    test_config_access(config_files)

    print(f"\nüéâ SETUP KOMPLETT!")
    print(f"üìÅ Newsletter System bereit unter: {base_path}")
    print(f"üìã {len(config_files)} Konfigurationsdateien erstellt")

    # Global verf√ºgbar machen
    NEWSLETTER_SYSTEM_PATH = str(base_path)
    print(f"üåê Global verf√ºgbar: NEWSLETTER_SYSTEM_PATH")

except Exception as e:
    print(f"‚ùå Setup Fehler: {e}")
    import traceback
    print(f"üîç Traceback: {traceback.format_exc()}")

print("=" * 50)

üöÄ STARTE GOOGLE DRIVE SETUP
üîó Mounte Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
‚úÖ Google Drive erfolgreich gemounted
üìÅ Erstelle Newsletter System Struktur: /content/drive/MyDrive/Newsletter_System
  ‚úÖ configs
  ‚úÖ data/sessions
  ‚úÖ data/sessions/archive
  ‚úÖ templates
  ‚úÖ logs
üìÑ README erstellt: /content/drive/MyDrive/Newsletter_System/README.md
‚úÖ Base Path: /content/drive/MyDrive/Newsletter_System

üìù ERSTELLE KONFIGURATIONSDATEIEN
‚úÖ Locations Config: /content/drive/MyDrive/Newsletter_System/configs/locations.yaml
‚úÖ Categories Config: /content/drive/MyDrive/Newsletter_System/configs/categories.yaml
‚úÖ API Settings Config: /content/drive/MyDrive/Newsletter_System/configs/api_settings.yaml

üß™ TESTE CONFIG-ZUGRIFF
üìç Locations geladen: ['berlin', 'hamburg', 'm√ºnchen']
   M√ºnchen Domains: ['muenchen.de', 'stadtwerke-muenchen.de', 'mvg.de']
üìã Katego

In [3]:
# =============================================================================
# ZELLE 0c: ConfigManager Implementation
# =============================================================================
# @title ConfigManager - Zentrale Konfigurationsverwaltung f√ºr Newsletter System

import yaml
import json
from pathlib import Path
from datetime import datetime, timedelta
import re

class ConfigManager:
    """Zentrale Verwaltung aller Konfigurationen f√ºr das Newsletter System"""

    def __init__(self, base_path=None):
        """
        Initialisiert ConfigManager mit Pfad zum Newsletter System

        Args:
            base_path: Pfad zum Newsletter System (default: aus Global)
        """
        if base_path is None:
            if 'NEWSLETTER_SYSTEM_PATH' in globals():
                self.base_path = Path(NEWSLETTER_SYSTEM_PATH)
            else:
                raise ValueError("NEWSLETTER_SYSTEM_PATH nicht gefunden. F√ºhre zuerst Zelle 0b aus!")
        else:
            self.base_path = Path(base_path)

        self.configs_path = self.base_path / "configs"

        # Konfigurationen laden
        self.locations = {}
        self.categories = {}
        self.api_settings = {}

        self.load_all_configs()

    def load_all_configs(self):
        """L√§dt alle YAML-Konfigurationsdateien"""
        try:
            # Locations laden
            locations_file = self.configs_path / "locations.yaml"
            if locations_file.exists():
                with open(locations_file, "r", encoding="utf-8") as f:
                    self.locations = yaml.safe_load(f)
                print(f"‚úÖ Locations geladen: {len(self.locations)} Orte")

            # Categories laden
            categories_file = self.configs_path / "categories.yaml"
            if categories_file.exists():
                with open(categories_file, "r", encoding="utf-8") as f:
                    self.categories = yaml.safe_load(f)
                print(f"‚úÖ Categories geladen: {len(self.categories)} Kategorien")

            # API Settings laden
            api_file = self.configs_path / "api_settings.yaml"
            if api_file.exists():
                with open(api_file, "r", encoding="utf-8") as f:
                    self.api_settings = yaml.safe_load(f)
                print(f"‚úÖ API Settings geladen: {len(self.api_settings)} APIs")

        except Exception as e:
            print(f"‚ùå Fehler beim Laden der Configs: {e}")
            raise

    # =========================================================================
    # LOCATION MANAGEMENT
    # =========================================================================

    def get_location_config(self, location):
        """
        Gibt Konfiguration f√ºr einen Ort zur√ºck

        Args:
            location: Ortsname (case-insensitive)

        Returns:
            dict: Location config oder None
        """
        location_key = location.lower()
        return self.locations.get(location_key)

    def get_location_names(self, location):
        """Gibt alle Namen-Varianten f√ºr einen Ort zur√ºck"""
        config = self.get_location_config(location)
        if config:
            return config.get("official_names", [location])
        return [location]

    def get_location_domains(self, location):
        """Gibt offizielle Domains f√ºr einen Ort zur√ºck"""
        config = self.get_location_config(location)
        if config:
            return config.get("official_domains", [])
        return []

    def get_url_slug(self, location):
        """Gibt URL-kompatible Schreibweise f√ºr Ort zur√ºck"""
        config = self.get_location_config(location)
        if config:
            return config.get("url_slug", location.lower())
        return location.lower()

    # =========================================================================
    # CATEGORY MANAGEMENT
    # =========================================================================

    def get_category_config(self, category):
        """
        Gibt Konfiguration f√ºr eine Kategorie zur√ºck

        Args:
            category: Kategoriename (case-insensitive)

        Returns:
            dict: Category config oder None
        """
        category_key = category.lower()
        return self.categories.get(category_key)

    def get_category_priority(self, category):
        """Gibt Priorit√§t einer Kategorie zur√ºck"""
        config = self.get_category_config(category)
        if config:
            return config.get("priority", "medium")
        return "medium"

    def get_category_method(self, category):
        """Bestimmt beste Methode f√ºr eine Kategorie"""
        config = self.get_category_config(category)
        if config:
            # Explizit definierte Methode
            if "primary_method" in config:
                return config["primary_method"]

            # Scrape wenn URLs vorhanden
            if "scrape_urls" in config and config["scrape_urls"]:
                return "scrape"

            # Sonst Search
            return "search"
        return "search"

    # =========================================================================
    # TEMPLATE SYSTEM
    # =========================================================================

    def get_search_template(self, category, api, location):
        """
        Gibt konfigurierten Search-Template f√ºr Kategorie/API/Location zur√ºck

        Args:
            category: Kategoriename
            api: API-Name ("claude_web", "perplexity", "firecrawl_search")
            location: Ortsname

        Returns:
            str: Fertig konfigurierte Query
        """
        # Kategorie-Config laden
        cat_config = self.get_category_config(category)
        if not cat_config:
            return f"{category} {location}"  # Fallback

        # Template f√ºr API finden
        templates = cat_config.get("search_templates", {})
        template = templates.get(api)

        if not template:
            # Fallback: ersten verf√ºgbaren Template nehmen
            if templates:
                template = list(templates.values())[0]
            else:
                return f"{category} {location}"

        # Template mit Location-Daten anreichern
        enriched_template = self.enrich_template(template, location)

        return enriched_template

    def enrich_template(self, template, location):
        """
        Ersetzt Platzhalter in Templates mit konkreten Werten

        Args:
            template: Template-String mit Platzhaltern
            location: Ortsname

        Returns:
            str: Template mit ersetzten Platzhaltern
        """
        # Location-Daten holen
        location_config = self.get_location_config(location)

        replacements = {
            "{location}": location,
            "{location_slug}": self.get_url_slug(location),
            "{region_context}": location_config.get("region_context", "Deutschland") if location_config else "Deutschland"
        }

        # Datum-Platzhalter (wird sp√§ter von TimeContextManager √ºbernommen)
        replacements["{date_context}"] = "heute"

        # Ersetze alle Platzhalter
        enriched = template
        for placeholder, value in replacements.items():
            enriched = enriched.replace(placeholder, value)

        return enriched

    def get_scrape_urls(self, category, location):
        """
        Gibt Scrape-URLs f√ºr Kategorie/Location zur√ºck

        Args:
            category: Kategoriename
            location: Ortsname

        Returns:
            list: Liste der URLs zum Scrapen
        """
        cat_config = self.get_category_config(category)
        if not cat_config:
            return []

        urls = cat_config.get("scrape_urls", [])

        # URLs mit Location-Daten anreichern
        enriched_urls = []
        for url in urls:
            enriched_url = self.enrich_template(url, location)
            enriched_urls.append(enriched_url)

        return enriched_urls

    # =========================================================================
    # API SETTINGS
    # =========================================================================

    def get_api_config(self, api):
        """
        Gibt Konfiguration f√ºr eine API zur√ºck

        Args:
            api: API-Name ("claude", "perplexity", "firecrawl", "gemini")

        Returns:
            dict: API config oder leeres dict
        """
        return self.api_settings.get(api, {})

    def get_api_limits(self, api, category=None):
        """Gibt API-spezifische Limits zur√ºck"""
        api_config = self.get_api_config(api)

        if api == "firecrawl" and category:
            limits = api_config.get("search_limits", {})
            return limits.get(category, limits.get("default", 5))

        return api_config.get("limit", 5)

    # =========================================================================
    # NEWSLETTER WORKFLOW HELPERS
    # =========================================================================

    def get_newsletter_categories(self, priority_filter=None):
        """
        Gibt Kategorien f√ºr Newsletter zur√ºck, optional gefiltert nach Priorit√§t

        Args:
            priority_filter: "high", "medium", "low" oder None f√ºr alle

        Returns:
            list: Kategorienamen sortiert nach Priorit√§t
        """
        if priority_filter:
            filtered_categories = [
                name for name, config in self.categories.items()
                if config.get("priority") == priority_filter
            ]
        else:
            filtered_categories = list(self.categories.keys())

        # Sortiere nach Priorit√§t: high > medium > low
        priority_order = {"high": 0, "medium": 1, "low": 2}

        def priority_key(category):
            config = self.get_category_config(category)
            priority = config.get("priority", "medium") if config else "medium"
            return priority_order.get(priority, 1)

        return sorted(filtered_categories, key=priority_key)

    def create_newsletter_plan(self, location, categories=None):
        """
        Erstellt Datensammlung-Plan f√ºr Newsletter

        Args:
            location: Ortsname
            categories: Liste der Kategorien oder None f√ºr alle high/medium

        Returns:
            dict: Detaillierter Plan mit Queries und Methoden
        """
        if categories is None:
            categories = self.get_newsletter_categories(priority_filter="high")
            categories.extend(self.get_newsletter_categories(priority_filter="medium"))

        plan = {
            "location": location,
            "location_config": self.get_location_config(location),
            "timestamp": datetime.now().isoformat(),
            "categories": {}
        }

        for category in categories:
            cat_config = self.get_category_config(category)
            if not cat_config:
                continue

            method = self.get_category_method(category)

            if method == "search":
                # Search-Queries f√ºr alle APIs
                search_queries = {}
                for api in ["claude_web", "perplexity", "firecrawl_search"]:
                    query = self.get_search_template(category, api, location)
                    search_queries[api] = query

                plan["categories"][category] = {
                    "method": "search",
                    "priority": cat_config.get("priority", "medium"),
                    "queries": search_queries,
                    "limit": self.get_api_limits("firecrawl", category)
                }

            elif method == "scrape":
                # Scrape-URLs
                urls = self.get_scrape_urls(category, location)

                plan["categories"][category] = {
                    "method": "scrape",
                    "priority": cat_config.get("priority", "medium"),
                    "urls": urls
                }

        return plan

    # =========================================================================
    # DEBUGGING & INFO
    # =========================================================================

    def get_system_info(self):
        """Gibt Systeminfo zur√ºck"""
        return {
            "base_path": str(self.base_path),
            "locations_count": len(self.locations),
            "categories_count": len(self.categories),
            "apis_count": len(self.api_settings),
            "available_locations": list(self.locations.keys()),
            "available_categories": list(self.categories.keys()),
            "configured_apis": list(self.api_settings.keys())
        }

# =============================================================================
# INITIALISIERUNG UND TESTS
# =============================================================================

# ConfigManager initialisieren
try:
    config_manager = ConfigManager()
    print("‚úÖ ConfigManager initialisiert")

    # System Info
    info = config_manager.get_system_info()
    print(f"üìä System Info: {info['locations_count']} Locations, {info['categories_count']} Categories, {info['apis_count']} APIs")

except Exception as e:
    print(f"‚ùå ConfigManager Initialisierung fehlgeschlagen: {e}")
    config_manager = None

# =============================================================================
# DIREKTER TEST
# =============================================================================

if config_manager:
    print("\nüß™ TESTE CONFIGMANAGER")
    print("=" * 40)

    try:
        # Test 1: Location Lookup
        print("üìç Test 1: Location Config")
        location_config = config_manager.get_location_config("m√ºnchen")
        print(f"  M√ºnchen Domains: {config_manager.get_location_domains('m√ºnchen')}")
        print(f"  URL Slug: {config_manager.get_url_slug('M√ºnchen')}")

        # Test 2: Search Templates
        print(f"\nüîç Test 2: Search Templates")
        wetter_query_claude = config_manager.get_search_template("wetter", "claude_web", "M√ºnchen")
        wetter_query_perplexity = config_manager.get_search_template("wetter", "perplexity", "M√ºnchen")
        print(f"  Claude Wetter: {wetter_query_claude}")
        print(f"  Perplexity Wetter: {wetter_query_perplexity}")

        # Test 3: Scrape URLs
        print(f"\nüï∑Ô∏è Test 3: Scrape URLs")
        rathaus_urls = config_manager.get_scrape_urls("rathaus", "m√ºnchen")
        events_urls = config_manager.get_scrape_urls("events", "m√ºnchen")
        print(f"  Rathaus URLs: {rathaus_urls}")
        print(f"  Events URLs: {events_urls}")

        # Test 4: API Settings
        print(f"\nüîß Test 4: API Settings")
        claude_config = config_manager.get_api_config("claude")
        firecrawl_limits = config_manager.get_api_limits("firecrawl", "wetter")
        print(f"  Claude Model: {claude_config.get('model', 'N/A')}")
        print(f"  Firecrawl Wetter Limit: {firecrawl_limits}")

        # Test 5: Newsletter Plan
        print(f"\nüì∞ Test 5: Newsletter Plan")
        newsletter_plan = config_manager.create_newsletter_plan("m√ºnchen", ["wetter", "events"])
        print(f"  Plan Location: {newsletter_plan['location']}")
        print(f"  Plan Kategorien: {list(newsletter_plan['categories'].keys())}")

        for cat_name, cat_plan in newsletter_plan['categories'].items():
            print(f"  {cat_name}: {cat_plan['method']} ({cat_plan['priority']} priority)")
            if cat_plan['method'] == 'search':
                print(f"    Claude Query: {cat_plan['queries']['claude_web']}")

        print(f"\n‚úÖ Alle ConfigManager Tests erfolgreich!")

    except Exception as test_error:
        print(f"‚ùå ConfigManager Test Fehler: {test_error}")
        import traceback
        print(f"üîç Traceback: {traceback.format_exc()}")

print("=" * 40)
print("‚úÖ ConfigManager Setup komplett\n")

‚úÖ Locations geladen: 3 Orte
‚úÖ Categories geladen: 8 Kategorien
‚úÖ API Settings geladen: 4 APIs
‚úÖ ConfigManager initialisiert
üìä System Info: 3 Locations, 8 Categories, 4 APIs

üß™ TESTE CONFIGMANAGER
üìç Test 1: Location Config
  M√ºnchen Domains: ['muenchen.de', 'stadtwerke-muenchen.de', 'mvg.de']
  URL Slug: muenchen

üîç Test 2: Search Templates
  Claude Wetter: aktuelles Wetter M√ºnchen heute heute Deutschland Vorhersage
  Perplexity Wetter: Wetter Vorhersage M√ºnchen heute Deutschland aktuell

üï∑Ô∏è Test 3: Scrape URLs
  Rathaus URLs: ['https://www.muenchen.de/rathaus', 'https://www.muenchen.de/aktuelles', 'https://www.stadt-muenchen.de']
  Events URLs: ['https://www.muenchen.de/veranstaltungen', 'https://www.eventbrite.de/d/germany--muenchen/events/']

üîß Test 4: API Settings
  Claude Model: claude-3-5-haiku-latest
  Firecrawl Wetter Limit: 3

üì∞ Test 5: Newsletter Plan
  Plan Location: m√ºnchen
  Plan Kategorien: ['wetter', 'events']
  wetter: search (high prio

In [4]:
# =============================================================================
# ZELLE 0d: TimeContextManager Implementation
# =============================================================================
# @title TimeContextManager - Intelligente Datums-Verarbeitung f√ºr Newsletter APIs

from datetime import datetime, timedelta, time
import pytz
from typing import Dict, List, Optional
import locale

class TimeContextManager:
    """
    Intelligente Datums- und Zeit-Kontextualisierung f√ºr Newsletter APIs
    Stellt sicher, dass alle Suchanfragen zeitlich korrekt kontextualisiert sind
    """

    def __init__(self, location="m√ºnchen"):
        """
        Initialisiert TimeContextManager

        Args:
            location: Standard-Ort f√ºr Timezone (default: m√ºnchen)
        """
        self.location = location

        # Deutsche Zeitzone setzen
        self.timezone = pytz.timezone("Europe/Berlin")

        # Aktuelle Zeit in deutscher Zeitzone
        self.now = datetime.now(self.timezone)

        # Basis-Zeiten berechnen
        self._calculate_time_references()

        # Deutsche Locale f√ºr Wochentage/Monate (falls verf√ºgbar)
        try:
            locale.setlocale(locale.LC_TIME, 'de_DE.UTF-8')
        except:
            try:
                locale.setlocale(locale.LC_TIME, 'German')
            except:
                pass  # Fallback zu English

        print(f"‚è∞ TimeContextManager initialisiert f√ºr {self.now.strftime('%A, %d.%m.%Y %H:%M')} (Zeitzone: {self.timezone})")

    def _calculate_time_references(self):
        """Berechnet alle relevanten Zeit-Referenzen"""

        # Basis-Tage
        self.heute = self.now.date()
        self.gestern = self.heute - timedelta(days=1)
        self.morgen = self.heute + timedelta(days=1)
        self.√ºbermorgen = self.heute + timedelta(days=2)

        # Wochenbezug
        self.wochenstart = self.heute - timedelta(days=self.heute.weekday())  # Montag
        self.wochenende_start = self.wochenstart + timedelta(days=5)  # Samstag
        self.wochenende_end = self.wochenstart + timedelta(days=6)    # Sonntag

        # Monatsbezug
        self.monatsstart = self.heute.replace(day=1)

        # String-Repr√§sentationen
        self.heute_str = self.heute.strftime("%d.%m.%Y")
        self.gestern_str = self.gestern.strftime("%d.%m.%Y")
        self.morgen_str = self.morgen.strftime("%d.%m.%Y")

        # Wochentage
        self.heute_wochentag = self.heute.strftime("%A")
        self.morgen_wochentag = self.morgen.strftime("%A")

        # Zeit des Tages
        current_hour = self.now.hour
        if current_hour < 6:
            self.tageszeit = "nacht"
        elif current_hour < 12:
            self.tageszeit = "morgen"
        elif current_hour < 18:
            self.tageszeit = "nachmittag"
        else:
            self.tageszeit = "abend"

    # =========================================================================
    # ZEIT-KONTEXT GENERIERUNG
    # =========================================================================

    def get_time_context(self, timeframe="heute", api="generic"):
        """
        Generiert zeitlichen Kontext f√ºr verschiedene Timeframes und APIs

        Args:
            timeframe: "heute", "morgen", "gestern", "wochenende", "aktuell", "current"
            api: "claude_web", "perplexity", "firecrawl", "generic"

        Returns:
            dict: Zeit-Kontext mit verschiedenen Formaten
        """
        context = {
            "timeframe": timeframe,
            "api": api,
            "timestamp": self.now.isoformat(),
            "german_keywords": [],
            "english_keywords": [],
            "date_strings": [],
            "api_parameters": {}
        }

        if timeframe == "heute":
            context.update({
                "german_keywords": ["heute", "aktuell", "momentan", f"am {self.heute_wochentag}"],
                "english_keywords": ["today", "current", "now", "currently"],
                "date_strings": [self.heute_str, self.heute.strftime("%Y-%m-%d")],
                "human_readable": f"heute ({self.heute_str})"
            })

        elif timeframe == "morgen":
            context.update({
                "german_keywords": ["morgen", f"am {self.morgen_wochentag}", "morgiger Tag"],
                "english_keywords": ["tomorrow", f"on {self.morgen.strftime('%A')}"],
                "date_strings": [self.morgen_str, self.morgen.strftime("%Y-%m-%d")],
                "human_readable": f"morgen ({self.morgen_str})"
            })

        elif timeframe == "gestern":
            context.update({
                "german_keywords": ["gestern", "am gestrigen Tag"],
                "english_keywords": ["yesterday"],
                "date_strings": [self.gestern_str, self.gestern.strftime("%Y-%m-%d")],
                "human_readable": f"gestern ({self.gestern_str})"
            })

        elif timeframe == "wochenende":
            context.update({
                "german_keywords": ["Wochenende", "Samstag Sonntag", "am Wochenende"],
                "english_keywords": ["weekend", "Saturday Sunday"],
                "date_strings": [
                    self.wochenende_start.strftime("%d.%m.%Y"),
                    self.wochenende_end.strftime("%d.%m.%Y")
                ],
                "human_readable": f"Wochenende ({self.wochenende_start.strftime('%d.%m.')} - {self.wochenende_end.strftime('%d.%m.%Y')})"
            })

        elif timeframe in ["aktuell", "current"]:
            context.update({
                "german_keywords": ["aktuell", "momentan", "zur Zeit", "gerade", "live"],
                "english_keywords": ["current", "now", "currently", "live", "real-time"],
                "date_strings": [self.heute_str],
                "human_readable": f"aktuell ({self.tageszeit}, {self.heute_str})"
            })

        # API-spezifische Parameter hinzuf√ºgen
        context["api_parameters"] = self._get_api_specific_parameters(timeframe, api)

        return context

    def _get_api_specific_parameters(self, timeframe, api):
        """Generiert API-spezifische Zeit-Parameter"""

        params = {}

        if api == "firecrawl":
            # Firecrawl Search tbs Parameter
            if timeframe == "heute":
                params["tbs"] = "qdr:d"  # Past 24 hours
            elif timeframe == "gestern":
                params["tbs"] = "qdr:d"  # Past 24 hours (wird gestern mit abdecken)
            elif timeframe == "wochenende":
                params["tbs"] = "qdr:w"  # Past week
            elif timeframe in ["aktuell", "current"]:
                params["tbs"] = "qdr:h"  # Past hour

        elif api == "perplexity":
            # Perplexity hat bessere Ergebnisse mit expliziten Datums-Keywords
            if timeframe == "heute":
                params["date_hint"] = f"today {self.heute_str}"
            elif timeframe == "morgen":
                params["date_hint"] = f"tomorrow {self.morgen_str}"
            elif timeframe == "gestern":
                params["date_hint"] = f"yesterday {self.gestern_str}"

        elif api == "claude_web":
            # Claude Web Search funktioniert gut mit nat√ºrlichen Keywords
            if timeframe in ["aktuell", "current"]:
                params["freshness_hint"] = "latest news current status"
            elif timeframe == "heute":
                params["freshness_hint"] = "today news updates"

        return params

    # =========================================================================
    # TEMPLATE-INTEGRATION
    # =========================================================================

    def inject_time_context(self, template, timeframe="heute", api="generic"):
        """
        Ersetzt {date_context} Platzhalter in Templates mit zeitlichem Kontext

        Args:
            template: Template-String mit {date_context} Platzhalter
            timeframe: Gew√ºnschter Zeitbezug
            api: Ziel-API f√ºr optimierte Formatierung

        Returns:
            str: Template mit ersetztem Zeit-Kontext
        """
        time_context = self.get_time_context(timeframe, api)

        # W√§hle passende Keywords basierend auf API
        if api in ["claude_web", "perplexity"]:
            # F√ºr diese APIs funktionieren deutsche Keywords gut
            time_keywords = " ".join(time_context["german_keywords"][:2])  # Erste 2 Keywords
        else:
            # F√ºr andere APIs einfacher Ansatz
            time_keywords = time_context["human_readable"]

        # Ersetze Platzhalter
        enriched_template = template.replace("{date_context}", time_keywords)

        return enriched_template

    def get_contextual_search_query(self, base_query, timeframe="heute", api="generic", location=None):
        """
        Erstellt vollst√§ndig kontextualisierte Suchanfrage

        Args:
            base_query: Basis-Suchanfrage
            timeframe: Zeitlicher Kontext
            api: Ziel-API
            location: Ort (optional)

        Returns:
            dict: Vollst√§ndige Query mit Metadaten
        """
        time_context = self.get_time_context(timeframe, api)

        # Zeit-Keywords hinzuf√ºgen
        time_keywords = " ".join(time_context["german_keywords"][:2])

        # Query zusammensetzen
        if location:
            contextual_query = f"{base_query} {location} {time_keywords} Deutschland"
        else:
            contextual_query = f"{base_query} {time_keywords}"

        return {
            "query": contextual_query,
            "base_query": base_query,
            "time_context": time_context,
            "location": location,
            "timestamp": self.now.isoformat(),
            "api_parameters": time_context["api_parameters"]
        }

    # =========================================================================
    # KATEGORIE-SPEZIFISCHE ZEIT-LOGIK
    # =========================================================================

    def get_optimal_timeframe_for_category(self, category):
        """
        Bestimmt optimalen Zeitrahmen f√ºr verschiedene Newsletter-Kategorien

        Args:
            category: Newsletter-Kategorie

        Returns:
            str: Optimaler timeframe f√ºr diese Kategorie
        """
        category_time_mapping = {
            "wetter": "heute",           # Wetter ist heute-fokussiert
            "verkehr": "aktuell",        # Verkehr sollte real-time sein
            "events": "heute",           # Events f√ºr heute/morgen
            "nachrichten": "heute",      # Aktuelle Nachrichten
            "rathaus": "heute",          # Aktuelle Bekanntmachungen
            "schulen": "heute",          # Schulnachrichten f√ºr heute
            "vereine": "wochenende",     # Vereinsaktivit√§ten oft am Wochenende
            "sport": "heute"             # Sportergebnisse von heute
        }

        return category_time_mapping.get(category.lower(), "heute")

    def is_relevant_timeframe(self, category, timeframe):
        """
        Pr√ºft ob ein Zeitrahmen f√ºr eine Kategorie sinnvoll ist

        Args:
            category: Newsletter-Kategorie
            timeframe: Zu pr√ºfender Zeitrahmen

        Returns:
            bool: True wenn Kombination sinnvoll ist
        """
        relevant_combinations = {
            "wetter": ["heute", "morgen"],
            "verkehr": ["aktuell", "heute"],
            "events": ["heute", "morgen", "wochenende"],
            "nachrichten": ["heute", "gestern"],
            "rathaus": ["heute", "gestern"],
            "schulen": ["heute"],
            "vereine": ["heute", "wochenende"],
            "sport": ["heute", "gestern"]
        }

        return timeframe in relevant_combinations.get(category.lower(), ["heute"])

    # =========================================================================
    # NEWSLETTER WORKFLOW INTEGRATION
    # =========================================================================

    def create_time_aware_newsletter_plan(self, config_manager, location, categories=None):
        """
        Erweitert ConfigManager Newsletter-Plan um zeitlichen Kontext

        Args:
            config_manager: ConfigManager Instance
            location: Ortsname
            categories: Liste der Kategorien

        Returns:
            dict: Zeit-bewusster Newsletter Plan
        """
        # Basis-Plan vom ConfigManager holen
        base_plan = config_manager.create_newsletter_plan(location, categories)

        # Plan um Zeit-Kontext erweitern
        time_aware_plan = base_plan.copy()
        time_aware_plan["time_context"] = {
            "created_at": self.now.isoformat(),
            "timezone": str(self.timezone),
            "reference_date": self.heute_str,
            "tageszeit": self.tageszeit
        }

        # Kategorien um Zeit-Kontext erweitern
        for category, cat_plan in time_aware_plan["categories"].items():
            optimal_timeframe = self.get_optimal_timeframe_for_category(category)

            cat_plan["timeframe"] = optimal_timeframe
            cat_plan["time_context"] = self.get_time_context(optimal_timeframe)

            # Queries um Zeit-Kontext erweitern
            if "queries" in cat_plan:
                enhanced_queries = {}
                for api, query in cat_plan["queries"].items():
                    enhanced_query = self.inject_time_context(
                        query,
                        optimal_timeframe,
                        api.replace("_web", "").replace("_search", "")  # API-Namen normalisieren
                    )
                    enhanced_queries[api] = enhanced_query

                cat_plan["queries"] = enhanced_queries
                cat_plan["original_queries"] = cat_plan.get("queries", {})

        return time_aware_plan

    # =========================================================================
    # UTILITY METHODS
    # =========================================================================

    def get_current_time_info(self):
        """Gibt aktuelle Zeit-Informationen zur√ºck"""
        return {
            "current_datetime": self.now.isoformat(),
            "date": self.heute_str,
            "weekday": self.heute_wochentag,
            "time_of_day": self.tageszeit,
            "timezone": str(self.timezone),
            "hour": self.now.hour,
            "is_weekend": self.heute.weekday() >= 5
        }

    def format_for_newsletter(self, timeframe="heute"):
        """Formatiert Zeit-Kontext f√ºr Newsletter-Ausgabe"""
        time_context = self.get_time_context(timeframe)
        return time_context["human_readable"]

# =============================================================================
# INITIALISIERUNG UND TESTS
# =============================================================================

# TimeContextManager initialisieren
try:
    time_manager = TimeContextManager(location="m√ºnchen")
    print("‚úÖ TimeContextManager initialisiert")

except Exception as e:
    print(f"‚ùå TimeContextManager Initialisierung fehlgeschlagen: {e}")
    time_manager = None

# =============================================================================
# DIREKTER TEST
# =============================================================================

if time_manager:
    print("\nüß™ TESTE TIMECONTEXTMANAGER")
    print("=" * 50)

    try:
        # Test 1: Aktuelle Zeit-Info
        print("‚è∞ Test 1: Aktuelle Zeit-Info")
        time_info = time_manager.get_current_time_info()
        print(f"  Datum: {time_info['date']}")
        print(f"  Wochentag: {time_info['weekday']}")
        print(f"  Tageszeit: {time_info['time_of_day']} ({time_info['hour']}:00)")
        print(f"  Wochenende: {time_info['is_weekend']}")

        # Test 2: Zeit-Kontexte
        print(f"\nüìÖ Test 2: Zeit-Kontexte")
        for timeframe in ["heute", "morgen", "aktuell", "wochenende"]:
            context = time_manager.get_time_context(timeframe, "claude_web")
            print(f"  {timeframe}: {context['human_readable']}")
            print(f"    Keywords: {', '.join(context['german_keywords'][:3])}")

        # Test 3: Template-Injection
        print(f"\nüî§ Test 3: Template Zeit-Injection")
        test_template = "Wetter {location} {date_context} Deutschland Vorhersage"
        for api in ["claude_web", "perplexity", "firecrawl"]:
            enriched = time_manager.inject_time_context(test_template, "heute", api)
            print(f"  {api}: {enriched}")

        # Test 4: Kategorie-spezifische Zeitrahmen
        print(f"\nüéØ Test 4: Kategorie-spezifische Zeitrahmen")
        categories = ["wetter", "verkehr", "events", "sport"]
        for category in categories:
            optimal_time = time_manager.get_optimal_timeframe_for_category(category)
            print(f"  {category}: {optimal_time}")

        # Test 5: Kontextuelle Suchanfrage
        print(f"\nüîç Test 5: Kontextuelle Suchanfrage")
        contextual_query = time_manager.get_contextual_search_query(
            "Wetter Vorhersage",
            "heute",
            "perplexity",
            "M√ºnchen"
        )
        print(f"  Base Query: {contextual_query['base_query']}")
        print(f"  Contextual Query: {contextual_query['query']}")
        print(f"  API Parameters: {contextual_query['api_parameters']}")

        # Test 6: Integration mit ConfigManager
        if 'config_manager' in globals() and config_manager:
            print(f"\nüîó Test 6: ConfigManager Integration")
            time_aware_plan = time_manager.create_time_aware_newsletter_plan(
                config_manager,
                "m√ºnchen",
                ["wetter", "events"]
            )

            print(f"  Plan Zeitkontext: {time_aware_plan['time_context']['reference_date']}")
            for cat_name, cat_plan in time_aware_plan["categories"].items():
                print(f"  {cat_name}:")
                print(f"    Timeframe: {cat_plan['timeframe']}")
                if 'queries' in cat_plan:
                    claude_query = cat_plan['queries'].get('claude_web', 'N/A')
                    print(f"    Claude Query: {claude_query}")

        print(f"\n‚úÖ Alle TimeContextManager Tests erfolgreich!")

    except Exception as test_error:
        print(f"‚ùå TimeContextManager Test Fehler: {test_error}")
        import traceback
        print(f"üîç Traceback: {traceback.format_exc()}")

print("=" * 50)
print("‚úÖ TimeContextManager Setup komplett\n")

‚è∞ TimeContextManager initialisiert f√ºr Sunday, 20.07.2025 15:35 (Zeitzone: Europe/Berlin)
‚úÖ TimeContextManager initialisiert

üß™ TESTE TIMECONTEXTMANAGER
‚è∞ Test 1: Aktuelle Zeit-Info
  Datum: 20.07.2025
  Wochentag: Sunday
  Tageszeit: nachmittag (15:00)
  Wochenende: True

üìÖ Test 2: Zeit-Kontexte
  heute: heute (20.07.2025)
    Keywords: heute, aktuell, momentan
  morgen: morgen (21.07.2025)
    Keywords: morgen, am Monday, morgiger Tag
  aktuell: aktuell (nachmittag, 20.07.2025)
    Keywords: aktuell, momentan, zur Zeit
  wochenende: Wochenende (19.07. - 20.07.2025)
    Keywords: Wochenende, Samstag Sonntag, am Wochenende

üî§ Test 3: Template Zeit-Injection
  claude_web: Wetter {location} heute aktuell Deutschland Vorhersage
  perplexity: Wetter {location} heute aktuell Deutschland Vorhersage
  firecrawl: Wetter {location} heute (20.07.2025) Deutschland Vorhersage

üéØ Test 4: Kategorie-spezifische Zeitrahmen
  wetter: heute
  verkehr: aktuell
  events: heute
  sport: 

In [5]:
# =============================================================================
# ZELLE 0e: DataPersistenceManager Implementation
# =============================================================================
# @title DataPersistenceManager - Strukturierte Speicherung in Google Drive

import json
import os
from pathlib import Path
from datetime import datetime
import hashlib
import uuid
from typing import Dict, List, Optional, Any
import pandas as pd

class DataPersistenceManager:
    """
    Strukturierte Speicherung aller Newsletter-Daten in Google Drive
    Erm√∂glicht Nachvollziehbarkeit, Audit-Trails und Fact-Checking
    """

    def __init__(self, location, session_id=None, base_path=None):
        """
        Initialisiert DataPersistenceManager f√ºr eine Location/Session

        Args:
            location: Ortsname f√ºr Newsletter
            session_id: Eindeutige Session-ID (wird auto-generiert falls None)
            base_path: Basis-Pfad zum Newsletter System
        """
        self.location = location.lower()

        # Base Path bestimmen
        if base_path is None:
            if 'NEWSLETTER_SYSTEM_PATH' in globals():
                self.base_path = Path(NEWSLETTER_SYSTEM_PATH)
            else:
                raise ValueError("NEWSLETTER_SYSTEM_PATH nicht gefunden. F√ºhre zuerst Zelle 0b aus!")
        else:
            self.base_path = Path(base_path)

        # Session ID generieren oder verwenden
        if session_id is None:
            timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M')
            self.session_id = f"{timestamp}_{self.location}"
        else:
            self.session_id = session_id

        # Session-Pfad erstellen
        self.session_path = self.base_path / "data" / "sessions" / self.session_id

        # Ordnerstruktur f√ºr Session erstellen
        self._create_session_structure()

        # Session-Metadaten
        self.session_meta = {
            "session_id": self.session_id,
            "location": location,
            "created_at": datetime.now().isoformat(),
            "timezone": "Europe/Berlin",
            "status": "active",
            "api_calls": 0,
            "categories_processed": [],
            "total_content_items": 0,
            "newsletter_generated": False
        }

        # Content Registry f√ºr Session
        self.content_registry = []
        self.api_call_counter = 0

        print(f"üíæ DataPersistenceManager initialisiert")
        print(f"üìÅ Session: {self.session_id}")
        print(f"üìç Location: {location}")
        print(f"üóÇÔ∏è Session Path: {self.session_path}")

    def _create_session_structure(self):
        """Erstellt Ordnerstruktur f√ºr aktuelle Session"""

        # Hauptordner
        self.session_path.mkdir(parents=True, exist_ok=True)

        # Unterordner erstellen
        folders = [
            "raw_responses",           # Original API-Responses
            "processed_content",       # Verarbeitete Inhalte pro Kategorie
            "queries",                 # Verwendete Queries und Templates
            "metadata",               # Content-Metadaten und Mappings
            "final_newsletter",       # Finaler Newsletter in verschiedenen Formaten
            "logs",                   # Session-spezifische Logs
            "fact_check"              # F√ºr sp√§tere Fact-Check Funktionen
        ]

        for folder in folders:
            folder_path = self.session_path / folder
            folder_path.mkdir(exist_ok=True)

        print(f"‚úÖ Session-Struktur erstellt: {len(folders)} Ordner")

    # =========================================================================
    # RAW RESPONSE SPEICHERUNG
    # =========================================================================

    def save_raw_response(self, source, response, query=None, category=None, metadata=None):
        """
        Speichert originale API-Response f√ºr Nachvollziehbarkeit

        Args:
            source: API-Quelle ("claude_search", "perplexity", "firecrawl_scrape", etc.)
            response: Original API-Response (dict, object, oder string)
            query: Verwendete Query (optional)
            category: Newsletter-Kategorie (optional)
            metadata: Zus√§tzliche Metadaten (optional)

        Returns:
            str: Dateiname der gespeicherten Response
        """
        self.api_call_counter += 1
        call_id = f"{self.api_call_counter:03d}"

        # Filename generieren
        timestamp = datetime.now().strftime('%H-%M-%S')
        if category:
            filename = f"{call_id}_{source}_{category}_{timestamp}.json"
        else:
            filename = f"{call_id}_{source}_{timestamp}.json"

        filepath = self.session_path / "raw_responses" / filename

        # Response-Daten strukturieren
        response_data = {
            "call_id": call_id,
            "timestamp": datetime.now().isoformat(),
            "source": source,
            "category": category,
            "query": query,
            "metadata": metadata or {},
            "response": self._serialize_response(response)
        }

        # Als JSON speichern
        try:
            with open(filepath, 'w', encoding='utf-8') as f:
                json.dump(response_data, f, indent=2, ensure_ascii=False, default=str)

            print(f"üíæ Raw Response gespeichert: {filename}")

            # Session-Meta aktualisieren
            self.session_meta["api_calls"] = self.api_call_counter

            return filename

        except Exception as e:
            print(f"‚ùå Fehler beim Speichern von Raw Response: {e}")
            return None

    def _serialize_response(self, response):
        """Konvertiert verschiedene Response-Formate zu serialisierbaren Daten"""

        if isinstance(response, (dict, list, str, int, float, bool)):
            return response

        # Objekt mit Attributen
        elif hasattr(response, '__dict__'):
            try:
                return {
                    "_type": str(type(response)),
                    "_attributes": {k: self._serialize_response(v) for k, v in response.__dict__.items()}
                }
            except:
                return {"_type": str(type(response)), "_str": str(response)}

        # Fallback: String-Repr√§sentation
        else:
            return {"_type": str(type(response)), "_str": str(response)}

    # =========================================================================
    # PROCESSED CONTENT SPEICHERUNG
    # =========================================================================

    def save_processed_content(self, category, content, content_type="markdown", metadata=None):
        """
        Speichert verarbeiteten Content pro Kategorie

        Args:
            category: Newsletter-Kategorie
            content: Verarbeiteter Content
            content_type: Format ("markdown", "json", "text")
            metadata: Content-Metadaten

        Returns:
            str: Pfad zur gespeicherten Datei
        """
        timestamp = datetime.now().strftime('%H-%M-%S')

        # Dateiname basierend auf Typ
        if content_type == "markdown":
            filename = f"{category}_{timestamp}.md"
        elif content_type == "json":
            filename = f"{category}_{timestamp}.json"
        else:
            filename = f"{category}_{timestamp}.txt"

        filepath = self.session_path / "processed_content" / filename

        try:
            # Content je nach Typ speichern
            if content_type == "markdown" or content_type == "text":
                with open(filepath, 'w', encoding='utf-8') as f:
                    if isinstance(content, str):
                        f.write(content)
                    else:
                        f.write(str(content))

            elif content_type == "json":
                with open(filepath, 'w', encoding='utf-8') as f:
                    json.dump(content, f, indent=2, ensure_ascii=False, default=str)

            # Metadaten-Datei erstellen
            if metadata:
                meta_filepath = self.session_path / "metadata" / f"{category}_{timestamp}_meta.json"
                with open(meta_filepath, 'w', encoding='utf-8') as f:
                    json.dump(metadata, f, indent=2, ensure_ascii=False, default=str)

            print(f"üìÑ Processed Content gespeichert: {filename}")

            # Content Registry aktualisieren
            content_item = {
                "category": category,
                "filename": filename,
                "filepath": str(filepath),
                "content_type": content_type,
                "timestamp": datetime.now().isoformat(),
                "size_chars": len(content) if isinstance(content, str) else 0,
                "has_metadata": metadata is not None
            }
            self.content_registry.append(content_item)

            # Session-Meta aktualisieren
            if category not in self.session_meta["categories_processed"]:
                self.session_meta["categories_processed"].append(category)
            self.session_meta["total_content_items"] = len(self.content_registry)

            return str(filepath)

        except Exception as e:
            print(f"‚ùå Fehler beim Speichern von Processed Content: {e}")
            return None

    # =========================================================================
    # QUERY TRACKING
    # =========================================================================

    def save_query_log(self, category, api, original_query, enhanced_query, time_context=None):
        """
        Speichert Query-Historie f√ºr Nachvollziehbarkeit

        Args:
            category: Newsletter-Kategorie
            api: Verwendete API
            original_query: Original Template
            enhanced_query: Final verwendete Query
            time_context: Zeit-Kontext Information

        Returns:
            str: Query-Log ID
        """
        query_id = str(uuid.uuid4())[:8]
        timestamp = datetime.now()

        query_log = {
            "query_id": query_id,
            "timestamp": timestamp.isoformat(),
            "category": category,
            "api": api,
            "original_query": original_query,
            "enhanced_query": enhanced_query,
            "time_context": time_context,
            "location": self.location
        }

        # Query-Log speichern
        filename = f"queries_{timestamp.strftime('%H-%M-%S')}_{category}_{api}.json"
        filepath = self.session_path / "queries" / filename

        try:
            with open(filepath, 'w', encoding='utf-8') as f:
                json.dump(query_log, f, indent=2, ensure_ascii=False, default=str)

            print(f"üîç Query-Log gespeichert: {query_id}")
            return query_id

        except Exception as e:
            print(f"‚ùå Fehler beim Speichern von Query-Log: {e}")
            return None

    # =========================================================================
    # NEWSLETTER SPEICHERUNG
    # =========================================================================

    def save_final_newsletter(self, newsletter_content, format="markdown", metadata=None):
        """
        Speichert finalen Newsletter

        Args:
            newsletter_content: Newsletter-Inhalt
            format: Ausgabeformat ("markdown", "html", "json")
            metadata: Newsletter-Metadaten

        Returns:
            str: Pfad zur Newsletter-Datei
        """
        timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M')

        # Filename basierend auf Format
        if format == "markdown":
            filename = f"newsletter_{self.location}_{timestamp}.md"
        elif format == "html":
            filename = f"newsletter_{self.location}_{timestamp}.html"
        elif format == "json":
            filename = f"newsletter_{self.location}_{timestamp}.json"
        else:
            filename = f"newsletter_{self.location}_{timestamp}.txt"

        filepath = self.session_path / "final_newsletter" / filename

        try:
            # Newsletter speichern
            if format in ["markdown", "html", "text"]:
                with open(filepath, 'w', encoding='utf-8') as f:
                    f.write(newsletter_content)
            elif format == "json":
                with open(filepath, 'w', encoding='utf-8') as f:
                    json.dump(newsletter_content, f, indent=2, ensure_ascii=False, default=str)

            # Newsletter-Metadaten speichern
            if metadata:
                meta_filepath = self.session_path / "final_newsletter" / f"newsletter_meta_{timestamp}.json"
                with open(meta_filepath, 'w', encoding='utf-8') as f:
                    json.dump(metadata, f, indent=2, ensure_ascii=False, default=str)

            print(f"üì∞ Newsletter gespeichert: {filename}")

            # Session-Meta aktualisieren
            self.session_meta["newsletter_generated"] = True
            self.session_meta["newsletter_file"] = filename

            return str(filepath)

        except Exception as e:
            print(f"‚ùå Fehler beim Speichern des Newsletters: {e}")
            return None

    # =========================================================================
    # SESSION MANAGEMENT
    # =========================================================================

    def save_session_meta(self):
        """Speichert Session-Metadaten"""
        meta_filepath = self.session_path / "session_meta.json"

        # Session-Meta mit aktuellen Werten aktualisieren
        self.session_meta.update({
            "last_updated": datetime.now().isoformat(),
            "status": "completed" if self.session_meta["newsletter_generated"] else "active"
        })

        try:
            with open(meta_filepath, 'w', encoding='utf-8') as f:
                json.dump(self.session_meta, f, indent=2, ensure_ascii=False, default=str)

            print(f"üìä Session-Meta gespeichert: {self.session_id}")
            return str(meta_filepath)

        except Exception as e:
            print(f"‚ùå Fehler beim Speichern der Session-Meta: {e}")
            return None

    def get_session_summary(self):
        """Gibt Session-Zusammenfassung zur√ºck"""
        return {
            "session_id": self.session_id,
            "location": self.location,
            "created_at": self.session_meta["created_at"],
            "api_calls": self.session_meta["api_calls"],
            "categories_processed": self.session_meta["categories_processed"],
            "total_content_items": self.session_meta["total_content_items"],
            "newsletter_generated": self.session_meta["newsletter_generated"],
            "session_path": str(self.session_path)
        }

    # =========================================================================
    # CONTENT RETRIEVAL
    # =========================================================================

    def get_content_for_category(self, category):
        """L√§dt gespeicherten Content f√ºr eine Kategorie"""
        category_content = []

        for item in self.content_registry:
            if item["category"] == category:
                try:
                    filepath = Path(item["filepath"])
                    if filepath.exists():
                        if item["content_type"] == "json":
                            with open(filepath, 'r', encoding='utf-8') as f:
                                content = json.load(f)
                        else:
                            with open(filepath, 'r', encoding='utf-8') as f:
                                content = f.read()

                        category_content.append({
                            "content": content,
                            "metadata": item
                        })
                except Exception as e:
                    print(f"‚ùå Fehler beim Laden von {item['filename']}: {e}")

        return category_content

    def get_all_raw_responses(self):
        """L√§dt alle Raw API-Responses f√ºr Analyse"""
        raw_responses = []
        raw_path = self.session_path / "raw_responses"

        if raw_path.exists():
            for file in raw_path.glob("*.json"):
                try:
                    with open(file, 'r', encoding='utf-8') as f:
                        response_data = json.load(f)
                        raw_responses.append(response_data)
                except Exception as e:
                    print(f"‚ùå Fehler beim Laden von {file.name}: {e}")

        return raw_responses

    # =========================================================================
    # CLEANUP & ARCHIVAL
    # =========================================================================

    def finalize_session(self):
        """Schlie√üt Session ab und archiviert"""
        print(f"üèÅ Finalisiere Session: {self.session_id}")

        # Session-Meta aktualisieren
        self.session_meta["status"] = "completed"
        self.session_meta["completed_at"] = datetime.now().isoformat()

        # Finale Session-Meta speichern
        self.save_session_meta()

        # Session-Summary erstellen
        summary = self.get_session_summary()

        print(f"üìä SESSION ABGESCHLOSSEN")
        print(f"   API Calls: {summary['api_calls']}")
        print(f"   Kategorien: {len(summary['categories_processed'])}")
        print(f"   Content Items: {summary['total_content_items']}")
        print(f"   Newsletter: {'‚úÖ' if summary['newsletter_generated'] else '‚ùå'}")

        return summary

# =============================================================================
# DEMO UND TESTS
# =============================================================================

# Demo DataPersistenceManager
print("üß™ TESTE DATAPERSISTENCEMANAGER")
print("=" * 50)

try:
    # Test-Session erstellen
    test_location = "m√ºnchen"
    persistence_manager = DataPersistenceManager(test_location)

    print(f"\nüìä Session Summary:")
    summary = persistence_manager.get_session_summary()
    for key, value in summary.items():
        print(f"  {key}: {value}")

    # Test 1: Raw Response speichern
    print(f"\nüíæ Test 1: Raw Response Speichern")
    mock_response = {
        "data": [
            {"title": "M√ºnchen Wetter heute", "url": "wetter.com", "content": "Sonnig, 25¬∞C"},
            {"title": "Verkehr M√ºnchen", "url": "adac.de", "content": "A9 Stau"}
        ],
        "status": "success"
    }

    saved_file = persistence_manager.save_raw_response(
        source="firecrawl_search",
        response=mock_response,
        query="M√ºnchen Wetter heute",
        category="wetter",
        metadata={"api_version": "v1", "limit": 3}
    )

    # Test 2: Processed Content speichern
    print(f"\nüìÑ Test 2: Processed Content Speichern")
    processed_content = """# Wetter in M√ºnchen

## Heute
- Sonnig und warm
- Temperatur: 25¬∞C
- Wind: 5 km/h

## Vorhersage
- Morgen: Teilweise bew√∂lkt
- √úbermorgen: Regen m√∂glich
"""

    content_file = persistence_manager.save_processed_content(
        category="wetter",
        content=processed_content,
        content_type="markdown",
        metadata={"sources": 2, "confidence": "high"}
    )

    # Test 3: Query Log speichern
    print(f"\nüîç Test 3: Query Log Speichern")
    query_id = persistence_manager.save_query_log(
        category="wetter",
        api="claude_web",
        original_query="Wetter {location} {date_context}",
        enhanced_query="Wetter M√ºnchen heute aktuell Deutschland",
        time_context={"timeframe": "heute", "date": "2025-07-20"}
    )

    # Test 4: Newsletter speichern
    print(f"\nüì∞ Test 4: Newsletter Speichern")
    mock_newsletter = """# M√ºnchen Newsletter - 20.07.2025

## Wetter
Heute sonnig und warm mit 25¬∞C.

## Verkehr
Stau auf der A9 Richtung N√ºrnberg.

## Events
Sommerfest im Englischen Garten.
"""

    newsletter_file = persistence_manager.save_final_newsletter(
        newsletter_content=mock_newsletter,
        format="markdown",
        metadata={
            "generated_at": datetime.now().isoformat(),
            "categories": ["wetter", "verkehr", "events"],
            "word_count": 25
        }
    )

    # Test 5: Content retrieval
    print(f"\nüìÇ Test 5: Content Retrieval")
    wetter_content = persistence_manager.get_content_for_category("wetter")
    print(f"  Wetter Content Items: {len(wetter_content)}")

    raw_responses = persistence_manager.get_all_raw_responses()
    print(f"  Raw Responses: {len(raw_responses)}")

    # Test 6: Session finalisieren
    print(f"\nüèÅ Test 6: Session Finalisieren")
    final_summary = persistence_manager.finalize_session()

    print(f"\n‚úÖ Alle DataPersistenceManager Tests erfolgreich!")
    print(f"üìÅ Session gespeichert unter: {persistence_manager.session_path}")

except Exception as e:
    print(f"‚ùå DataPersistenceManager Test Fehler: {e}")
    import traceback
    print(f"üîç Traceback: {traceback.format_exc()}")

print("=" * 50)
print("‚úÖ DataPersistenceManager Setup komplett\n")

üß™ TESTE DATAPERSISTENCEMANAGER
‚úÖ Session-Struktur erstellt: 7 Ordner
üíæ DataPersistenceManager initialisiert
üìÅ Session: 2025-07-20_13-35_m√ºnchen
üìç Location: m√ºnchen
üóÇÔ∏è Session Path: /content/drive/MyDrive/Newsletter_System/data/sessions/2025-07-20_13-35_m√ºnchen

üìä Session Summary:
  session_id: 2025-07-20_13-35_m√ºnchen
  location: m√ºnchen
  created_at: 2025-07-20T13:35:42.087764
  api_calls: 0
  categories_processed: []
  total_content_items: 0
  newsletter_generated: False
  session_path: /content/drive/MyDrive/Newsletter_System/data/sessions/2025-07-20_13-35_m√ºnchen

üíæ Test 1: Raw Response Speichern
üíæ Raw Response gespeichert: 001_firecrawl_search_wetter_13-35-42.json

üìÑ Test 2: Processed Content Speichern
üìÑ Processed Content gespeichert: wetter_13-35-42.md

üîç Test 3: Query Log Speichern
üîç Query-Log gespeichert: ed4b39b4

üì∞ Test 4: Newsletter Speichern
üì∞ Newsletter gespeichert: newsletter_m√ºnchen_2025-07-20_13-35.md

üìÇ Test 5: Co

In [6]:
# =============================================================================
# ZELLE 1: API Configuration
# =============================================================================
# @title API Keys Setup
class APIConfig:
    """Zentrale API-Konfiguration mit Google Colab Secrets"""

    def __init__(self):
        self.firecrawl_key = None
        self.claude_key = None
        self.perplexity_key = None
        self.gemini_key = None
        self.groq_key = None

    def load_keys(self):
        """L√§dt alle API Keys aus Google Colab Secrets"""
        try:
            self.firecrawl_key = userdata.get('FIRECRAWL_API')
            self.claude_key = userdata.get('ANTHROPIC_API')
            self.perplexity_key = userdata.get('PERPLEXITY_API_KEY')
            self.gemini_key = userdata.get('GOOGLE_API_KEY')
            self.groq_key = userdata.get('GROQ_API')

            print("‚úÖ API Keys erfolgreich geladen:")
            print(f"üï∑Ô∏è Firecrawl: {'‚úì' if self.firecrawl_key else '‚ùå'}")
            print(f"ü§ñ Claude (Anthropic): {'‚úì' if self.claude_key else '‚ùå'}")
            print(f"üîç Perplexity: {'‚úì' if self.perplexity_key else '‚ùå'}")
            print(f"‚ú® Gemini (Google): {'‚úì' if self.gemini_key else '‚ùå'}")
            print(f"‚ö° Groq: {'‚úì' if self.groq_key else '‚ùå'}")

        except Exception as e:
            print(f"‚ùå Fehler beim Laden der API Keys: {e}")
            print("üí° Tipp: Keys in Google Colab Secrets hinterlegen!")

    def get_keys_status(self):
        """Gibt Status aller Keys zur√ºck"""
        return {
            'firecrawl': bool(self.firecrawl_key),
            'claude': bool(self.claude_key),
            'perplexity': bool(self.perplexity_key),
            'gemini': bool(self.gemini_key),
            'groq': bool(self.groq_key)
        }

# API Config initialisieren
api_config = APIConfig()
api_config.load_keys()

‚úÖ API Keys erfolgreich geladen:
üï∑Ô∏è Firecrawl: ‚úì
ü§ñ Claude (Anthropic): ‚úì
üîç Perplexity: ‚úì
‚ú® Gemini (Google): ‚úì
‚ö° Groq: ‚úì


In [7]:
# =============================================================================
# ZELLE 2: Firecrawl Worker - Foundation-System Integration
# =============================================================================
# @title Firecrawl Worker - Mit ConfigManager + TimeContext + DataPersistence
from firecrawl import FirecrawlApp, ScrapeOptions

class FirecrawlWorkerV2:
    """
    Firecrawl Worker V2 - Vollst√§ndig integriert mit Foundation-System
    - ConfigManager f√ºr intelligente Queries
    - TimeContextManager f√ºr Datums-Bewusstsein
    - DataPersistenceManager f√ºr Audit-Trail
    """

    def __init__(self, api_key, config_manager=None, time_manager=None, persistence_manager=None):
        """
        Initialisiert Firecrawl Worker V2

        Args:
            api_key: Firecrawl API Key
            config_manager: ConfigManager Instance
            time_manager: TimeContextManager Instance
            persistence_manager: DataPersistenceManager Instance
        """
        self.app = FirecrawlApp(api_key=api_key)

        # Foundation-System Integration
        self.config_manager = config_manager
        self.time_manager = time_manager
        self.persistence_manager = persistence_manager

        # Legacy Support falls Foundation nicht verf√ºgbar
        self.has_foundation = all([config_manager, time_manager, persistence_manager])

        # Worker State
        self.results = {
            "search": [],
            "scrape": [],
            "crawl": []
        }

        print(f"‚úÖ Firecrawl Worker V2 initialisiert")
        print(f"üèóÔ∏è Foundation Integration: {'‚úÖ' if self.has_foundation else '‚ùå'}")

        if self.has_foundation:
            # API-Config aus ConfigManager holen
            self.api_config = self.config_manager.get_api_config("firecrawl")
            self.default_location = self.api_config.get("location", {"country": "DE", "languages": ["de-DE"]})
            print(f"‚öôÔ∏è API Config geladen: {len(self.api_config)} Parameter")
        else:
            print("‚ö†Ô∏è L√§uft im Legacy-Modus ohne Foundation-System")

    # =========================================================================
    # SEARCH - Foundation-System Integration
    # =========================================================================

    def search_with_foundation(self, category, location, timeframe="heute", limit=None):
        """
        Intelligente Search mit Foundation-System Integration

        Args:
            category: Newsletter-Kategorie (z.B. "wetter", "events")
            location: Ortsname
            timeframe: Zeitrahmen (default: optimal f√ºr Kategorie)
            limit: Anzahl Ergebnisse (default: aus Config)

        Returns:
            dict: Strukturierte Search-Ergebnisse mit Metadaten
        """
        if not self.has_foundation:
            print("‚ùå Foundation-System erforderlich f√ºr diese Methode")
            return None

        try:
            # 1. Optimal Timeframe bestimmen
            if timeframe == "heute":  # Auto-detect
                timeframe = self.time_manager.get_optimal_timeframe_for_category(category)

            # 2. Search Template aus Config holen
            search_template = self.config_manager.get_search_template(category, "firecrawl_search", location)

            # 3. Template mit Zeit-Kontext anreichern
            enhanced_query = self.time_manager.inject_time_context(search_template, timeframe, "firecrawl")

            # 4. API-Limits aus Config
            if limit is None:
                limit = self.config_manager.get_api_limits("firecrawl", category)

            # 5. Query Log speichern
            query_id = self.persistence_manager.save_query_log(
                category=category,
                api="firecrawl_search",
                original_query=search_template,
                enhanced_query=enhanced_query,
                time_context=self.time_manager.get_time_context(timeframe, "firecrawl")
            )

            print(f"üîç Firecrawl Search: {category} in {location}")
            print(f"üìù Enhanced Query: {enhanced_query}")
            print(f"‚è∞ Timeframe: {timeframe}")
            print(f"üìä Limit: {limit}")

            # 6. Firecrawl Search durchf√ºhren
            search_result = self.app.search(
                query=enhanced_query,
                limit=limit
            )

            # 7. Raw Response speichern
            raw_filename = self.persistence_manager.save_raw_response(
                source="firecrawl_search",
                response=search_result,
                query=enhanced_query,
                category=category,
                metadata={
                    "query_id": query_id,
                    "timeframe": timeframe,
                    "location": location,
                    "limit": limit
                }
            )

            # 8. Response verarbeiten
            if hasattr(search_result, 'data') and search_result.data:
                processed_result = {
                    "method": "search",
                    "category": category,
                    "location": location,
                    "timeframe": timeframe,
                    "query_id": query_id,
                    "raw_filename": raw_filename,
                    "results_count": len(search_result.data),
                    "results": search_result.data,
                    "timestamp": datetime.now().isoformat()
                }

                self.results["search"].append(processed_result)

                print(f"‚úÖ {len(search_result.data)} Suchergebnisse erhalten")

                # 9. Processed Content als Markdown speichern
                markdown_content = self._format_search_results_as_markdown(processed_result)
                content_filename = self.persistence_manager.save_processed_content(
                    category=category,
                    content=markdown_content,
                    content_type="markdown",
                    metadata={
                        "query_id": query_id,
                        "source": "firecrawl_search",
                        "results_count": len(search_result.data)
                    }
                )

                processed_result["content_filename"] = content_filename

                # Preview anzeigen
                for i, result in enumerate(search_result.data[:2]):
                    if isinstance(result, dict):
                        title = result.get('title', 'Kein Titel')
                        url = result.get('url', 'Keine URL')
                        print(f"  {i+1}. {title[:60]}...")
                        print(f"     üîó {url}")

                return processed_result
            else:
                print("‚ùå Keine Suchergebnisse erhalten")
                return None

        except Exception as e:
            print(f"‚ùå Firecrawl Search Fehler: {e}")

            # Fehler auch in Persistence speichern
            if self.persistence_manager:
                error_log = {
                    "error": str(e),
                    "category": category,
                    "location": location,
                    "query": enhanced_query if 'enhanced_query' in locals() else "N/A"
                }
                self.persistence_manager.save_raw_response(
                    source="firecrawl_search_error",
                    response=error_log,
                    category=category
                )

            return None

    def _format_search_results_as_markdown(self, result):
        """Formatiert Search-Ergebnisse als Markdown f√ºr Speicherung"""

        markdown = f"# {result['category'].title()} - {result['location'].title()}\n\n"
        markdown += f"**Generiert:** {result['timestamp']}\n"
        markdown += f"**Timeframe:** {result['timeframe']}\n"
        markdown += f"**Ergebnisse:** {result['results_count']}\n"
        markdown += f"**Query ID:** {result['query_id']}\n\n"

        markdown += "## Suchergebnisse\n\n"

        for i, item in enumerate(result['results'], 1):
            if isinstance(item, dict):
                title = item.get('title', 'Kein Titel')
                url = item.get('url', 'Keine URL')
                description = item.get('description', 'Keine Beschreibung')

                markdown += f"### {i}. {title}\n\n"
                markdown += f"**URL:** {url}\n\n"

                if description:
                    markdown += f"**Beschreibung:** {description}\n\n"

                # Content falls verf√ºgbar (bei Scraping)
                if item.get('markdown'):
                    content_preview = item['markdown'][:300] + "..." if len(item['markdown']) > 300 else item['markdown']
                    markdown += f"**Content Preview:**\n```\n{content_preview}\n```\n\n"

                markdown += "---\n\n"

        return markdown

    # =========================================================================
    # SCRAPE - Foundation-System Integration
    # =========================================================================

    def scrape_with_foundation(self, category, location, urls=None):
        """
        Intelligente Scrape mit Foundation-System Integration

        Args:
            category: Newsletter-Kategorie
            location: Ortsname
            urls: URLs zum Scrapen (default: aus Config)

        Returns:
            dict: Strukturierte Scrape-Ergebnisse
        """
        if not self.has_foundation:
            print("‚ùå Foundation-System erforderlich f√ºr diese Methode")
            return None

        try:
            # 1. URLs aus Config holen falls nicht angegeben
            if urls is None:
                urls = self.config_manager.get_scrape_urls(category, location)

            if not urls:
                print(f"‚ùå Keine Scrape-URLs f√ºr {category} in {location} konfiguriert")
                return None

            # 2. Query Log f√ºr Scrape-Aktion
            query_id = self.persistence_manager.save_query_log(
                category=category,
                api="firecrawl_scrape",
                original_query=f"Scrape URLs for {category}",
                enhanced_query=f"Scraping {len(urls)} URLs: {', '.join(urls[:2])}{'...' if len(urls) > 2 else ''}",
                time_context=self.time_manager.get_current_time_info()
            )

            print(f"üï∑Ô∏è Firecrawl Scrape: {category} in {location}")
            print(f"üìÑ URLs: {len(urls)}")

            scraped_data = []

            # 3. URLs einzeln scrapen
            for i, url in enumerate(urls):
                try:
                    print(f"  üìÑ Scraping {i+1}/{len(urls)}: {url}")

                    scrape_result = self.app.scrape_url(
                        url=url,
                        formats=['markdown', 'links']
                    )

                    # Raw Response speichern
                    raw_filename = self.persistence_manager.save_raw_response(
                        source="firecrawl_scrape",
                        response=scrape_result,
                        query=url,
                        category=category,
                        metadata={
                            "query_id": query_id,
                            "url_index": i,
                            "location": location
                        }
                    )

                    # Response verarbeiten
                    if hasattr(scrape_result, 'markdown') and scrape_result.markdown:
                        scraped_item = {
                            "url": url,
                            "content": scrape_result.markdown,
                            "links": getattr(scrape_result, 'links', []),
                            "metadata": getattr(scrape_result, 'metadata', {}),
                            "title": getattr(scrape_result, 'title', ''),
                            "description": getattr(scrape_result, 'description', ''),
                            "raw_filename": raw_filename,
                            "timestamp": datetime.now().isoformat()
                        }
                        scraped_data.append(scraped_item)

                        print(f"    ‚úÖ {len(scrape_result.markdown)} Zeichen Content")
                    else:
                        print(f"    ‚ùå Kein Content von {url}")

                except Exception as url_error:
                    print(f"    ‚ùå Fehler bei {url}: {url_error}")
                    continue

            # 4. Ergebnisse strukturieren
            if scraped_data:
                processed_result = {
                    "method": "scrape",
                    "category": category,
                    "location": location,
                    "query_id": query_id,
                    "urls_count": len(urls),
                    "successful_scrapes": len(scraped_data),
                    "scraped_data": scraped_data,
                    "timestamp": datetime.now().isoformat()
                }

                self.results["scrape"].append(processed_result)

                # 5. Processed Content als Markdown speichern
                markdown_content = self._format_scrape_results_as_markdown(processed_result)
                content_filename = self.persistence_manager.save_processed_content(
                    category=category,
                    content=markdown_content,
                    content_type="markdown",
                    metadata={
                        "query_id": query_id,
                        "source": "firecrawl_scrape",
                        "urls_scraped": len(scraped_data)
                    }
                )

                processed_result["content_filename"] = content_filename

                print(f"‚úÖ {len(scraped_data)}/{len(urls)} URLs erfolgreich gescrapt")
                return processed_result
            else:
                print("‚ùå Keine URLs erfolgreich gescrapt")
                return None

        except Exception as e:
            print(f"‚ùå Firecrawl Scrape Fehler: {e}")
            return None

    def _format_scrape_results_as_markdown(self, result):
        """Formatiert Scrape-Ergebnisse als Markdown"""

        markdown = f"# {result['category'].title()} - {result['location'].title()} (Scraped)\n\n"
        markdown += f"**Generiert:** {result['timestamp']}\n"
        markdown += f"**URLs gescrapt:** {result['successful_scrapes']}/{result['urls_count']}\n"
        markdown += f"**Query ID:** {result['query_id']}\n\n"

        for i, item in enumerate(result['scraped_data'], 1):
            title = item.get('title', f"Seite {i}")
            url = item['url']
            content = item['content']

            markdown += f"## {i}. {title}\n\n"
            markdown += f"**URL:** {url}\n\n"

            if item.get('description'):
                markdown += f"**Beschreibung:** {item['description']}\n\n"

            # Content Preview (erste 500 Zeichen)
            content_preview = content[:500] + "..." if len(content) > 500 else content
            markdown += f"**Content:**\n```markdown\n{content_preview}\n```\n\n"

            # Links falls verf√ºgbar
            if item.get('links') and len(item['links']) > 0:
                markdown += f"**Links gefunden:** {len(item['links'])}\n"
                for link in item['links'][:5]:  # Erste 5 Links
                    markdown += f"- {link}\n"
                markdown += "\n"

            markdown += "---\n\n"

        return markdown

    # =========================================================================
    # NEWSLETTER INTEGRATION
    # =========================================================================

    def process_category_for_newsletter(self, category, location):
        """
        Verarbeitet eine Kategorie komplett f√ºr Newsletter-Generation
        W√§hlt automatisch beste Methode (Search/Scrape) basierend auf Config

        Args:
            category: Newsletter-Kategorie
            location: Ortsname

        Returns:
            dict: Vollst√§ndig verarbeitete Kategorie-Daten
        """
        if not self.has_foundation:
            print("‚ùå Foundation-System erforderlich")
            return None

        # Beste Methode aus Config bestimmen
        method = self.config_manager.get_category_method(category)

        print(f"üéØ Verarbeite {category} f√ºr {location} via {method}")

        if method == "search":
            return self.search_with_foundation(category, location)
        elif method == "scrape":
            return self.scrape_with_foundation(category, location)
        else:
            print(f"‚ùå Unbekannte Methode: {method}")
            return None

    def get_newsletter_summary_for_location(self, location):
        """Gibt Zusammenfassung aller Daten f√ºr einen Ort zur√ºck"""

        summary = {
            "location": location,
            "timestamp": datetime.now().isoformat(),
            "search_results": [],
            "scrape_results": [],
            "total_content_items": 0
        }

        # Search Results f√ºr Location
        for result in self.results["search"]:
            if result["location"].lower() == location.lower():
                summary["search_results"].append({
                    "category": result["category"],
                    "results_count": result["results_count"],
                    "timeframe": result["timeframe"],
                    "content_filename": result.get("content_filename")
                })

        # Scrape Results f√ºr Location
        for result in self.results["scrape"]:
            if result["location"].lower() == location.lower():
                summary["scrape_results"].append({
                    "category": result["category"],
                    "successful_scrapes": result["successful_scrapes"],
                    "content_filename": result.get("content_filename")
                })

        summary["total_content_items"] = len(summary["search_results"]) + len(summary["scrape_results"])

        return summary

# =============================================================================
# INITIALISIERUNG MIT FOUNDATION-SYSTEM
# =============================================================================

# Firecrawl Worker V2 mit Foundation-System initialisieren
if api_config.firecrawl_key:
    # Foundation-System Integration
    foundation_available = all([
        'config_manager' in globals() and config_manager,
        'time_manager' in globals() and time_manager,
        'persistence_manager' in globals() and persistence_manager
    ])

    if foundation_available:
        firecrawl_worker_v2 = FirecrawlWorkerV2(
            api_key=api_config.firecrawl_key,
            config_manager=config_manager,
            time_manager=time_manager,
            persistence_manager=persistence_manager
        )
        print("üöÄ Firecrawl Worker V2 mit Foundation-System bereit")
    else:
        # Fallback ohne Foundation
        firecrawl_worker_v2 = FirecrawlWorkerV2(api_key=api_config.firecrawl_key)
        print("‚ö†Ô∏è Firecrawl Worker V2 im Legacy-Modus (Foundation-System fehlt)")

else:
    print("‚ùå Firecrawl Worker V2 nicht verf√ºgbar - API Key fehlt")
    firecrawl_worker_v2 = None

# =============================================================================
# DIREKTER TEST - Foundation-Integration
# =============================================================================

if firecrawl_worker_v2 and foundation_available:
    print("\nüß™ TESTE FIRECRAWL WORKER V2 MIT FOUNDATION-SYSTEM")
    print("=" * 60)

    try:
        test_location = "m√ºnchen"

        # Test 1: Search mit Foundation
        print("üìç Test 1: Search mit Foundation-System")
        search_result = firecrawl_worker_v2.search_with_foundation(
            category="wetter",
            location=test_location,
            timeframe="heute",
            limit=3
        )

        if search_result:
            print(f"  ‚úÖ Search: {search_result['results_count']} Ergebnisse")
            print(f"  üìÅ Content File: {search_result.get('content_filename', 'N/A')}")
            print(f"  üÜî Query ID: {search_result['query_id']}")

        # Test 2: Scrape mit Foundation
        print(f"\nüìç Test 2: Scrape mit Foundation-System")
        scrape_result = firecrawl_worker_v2.scrape_with_foundation(
            category="rathaus",
            location=test_location
        )

        if scrape_result:
            print(f"  ‚úÖ Scrape: {scrape_result['successful_scrapes']} URLs")
            print(f"  üìÅ Content File: {scrape_result.get('content_filename', 'N/A')}")
            print(f"  üÜî Query ID: {scrape_result['query_id']}")

        # Test 3: Kategorie-Processing
        print(f"\nüìç Test 3: Auto Kategorie-Processing")
        category_result = firecrawl_worker_v2.process_category_for_newsletter("events", test_location)

        if category_result:
            print(f"  ‚úÖ Events: {category_result['method']} erfolgreich")

        # Test 4: Newsletter Summary
        print(f"\nüìç Test 4: Newsletter Summary")
        newsletter_summary = firecrawl_worker_v2.get_newsletter_summary_for_location(test_location)

        print(f"  üìä Location: {newsletter_summary['location']}")
        print(f"  üîç Search Results: {len(newsletter_summary['search_results'])}")
        print(f"  üï∑Ô∏è Scrape Results: {len(newsletter_summary['scrape_results'])}")
        print(f"  üìÑ Total Content Items: {newsletter_summary['total_content_items']}")

        print(f"\n‚úÖ Alle Firecrawl Worker V2 Tests erfolgreich!")

    except Exception as test_error:
        print(f"‚ùå Test Fehler: {test_error}")
        import traceback
        print(f"üîç Traceback: {traceback.format_exc()}")

print("=" * 60)
print("‚úÖ Firecrawl Worker V2 Setup komplett\n")

‚úÖ Firecrawl Worker V2 initialisiert
üèóÔ∏è Foundation Integration: ‚úÖ
‚öôÔ∏è API Config geladen: 4 Parameter
üöÄ Firecrawl Worker V2 mit Foundation-System bereit

üß™ TESTE FIRECRAWL WORKER V2 MIT FOUNDATION-SYSTEM
üìç Test 1: Search mit Foundation-System
üîç Query-Log gespeichert: d6edc04d
üîç Firecrawl Search: wetter in m√ºnchen
üìù Enhanced Query: Wetter m√ºnchen heute Vorhersage Deutschland
‚è∞ Timeframe: heute
üìä Limit: 3
üíæ Raw Response gespeichert: 002_firecrawl_search_wetter_13-35-47.json
‚úÖ 3 Suchergebnisse erhalten
üìÑ Processed Content gespeichert: wetter_13-35-47.md
  1. Wetter M√ºnchen heute - aktuelle Wettervorhersage f√ºr M√ºnchen...
     üîó https://www.wetter.com/deutschland/muenchen/DE0006515.html
  2. Wetter M√ºnchen heute Vorhersage 14 - 21 Tage | wetter.de...
     üîó https://www.wetter.de/wetter/r/62428
  ‚úÖ Search: 3 Ergebnisse
  üìÅ Content File: /content/drive/MyDrive/Newsletter_System/data/sessions/2025-07-20_13-35_m√ºnchen/processed_conten

In [8]:
# =============================================================================
# ZELLE 3: Claude Worker V2 - Foundation-System Integration
# =============================================================================
# @title Claude Worker V2 - Mit ConfigManager + TimeContext + DataPersistence + Web Search
import anthropic

class ClaudeWorkerV2:
    """
    Claude Worker V2 - Vollst√§ndig integriert mit Foundation-System
    - ConfigManager f√ºr intelligente Web Search Queries
    - TimeContextManager f√ºr zeitliche Kontextualisierung
    - DataPersistenceManager f√ºr Citation-Tracking und Audit-Trail
    - Anthropic Web Search API f√ºr aktuelle Informationen
    """

    def __init__(self, api_key, config_manager=None, time_manager=None, persistence_manager=None):
        """
        Initialisiert Claude Worker V2

        Args:
            api_key: Anthropic API Key
            config_manager: ConfigManager Instance
            time_manager: TimeContextManager Instance
            persistence_manager: DataPersistenceManager Instance
        """
        self.client = anthropic.Anthropic(api_key=api_key)

        # Foundation-System Integration
        self.config_manager = config_manager
        self.time_manager = time_manager
        self.persistence_manager = persistence_manager

        # Legacy Support falls Foundation nicht verf√ºgbar
        self.has_foundation = all([config_manager, time_manager, persistence_manager])

        # Worker State
        self.search_results = []
        self.citation_registry = {}  # Tracking aller Citations f√ºr Fact-Checking

        print(f"‚úÖ Claude Worker V2 initialisiert")
        print(f"üèóÔ∏è Foundation Integration: {'‚úÖ' if self.has_foundation else '‚ùå'}")

        if self.has_foundation:
            # API-Config aus ConfigManager holen
            self.api_config = self.config_manager.get_api_config("claude")
            self.model = self.api_config.get("model", "claude-3-5-haiku-latest")
            self.web_search_config = self.api_config.get("web_search", {})
            print(f"‚öôÔ∏è API Config geladen: Model {self.model}")
            print(f"üîç Web Search Config: {len(self.web_search_config)} Parameter")
        else:
            self.model = "claude-3-5-haiku-latest"
            self.web_search_config = {}
            print("‚ö†Ô∏è L√§uft im Legacy-Modus ohne Foundation-System")

    # =========================================================================
    # WEB SEARCH - Foundation-System Integration
    # =========================================================================

    def web_search_with_foundation(self, category, location, timeframe="heute", max_searches=None):
        """
        Intelligente Web Search mit Foundation-System Integration

        Args:
            category: Newsletter-Kategorie (z.B. "wetter", "events")
            location: Ortsname
            timeframe: Zeitrahmen (default: optimal f√ºr Kategorie)
            max_searches: Max Web Searches (default: aus Config)

        Returns:
            dict: Strukturierte Search-Ergebnisse mit Citations
        """
        if not self.has_foundation:
            print("‚ùå Foundation-System erforderlich f√ºr diese Methode")
            return None

        try:
            # 1. Optimal Timeframe bestimmen
            if timeframe == "heute":  # Auto-detect
                timeframe = self.time_manager.get_optimal_timeframe_for_category(category)

            # 2. Search Template aus Config holen
            search_template = self.config_manager.get_search_template(category, "claude_web", location)

            # 3. Template mit Zeit-Kontext anreichern
            enhanced_query = self.time_manager.inject_time_context(search_template, timeframe, "claude_web")

            # 4. Web Search Parameter aus Config
            if max_searches is None:
                max_searches = self.web_search_config.get("max_uses", 5)

            # 5. User Location f√ºr Web Search
            user_location = self.web_search_config.get("user_location", {})
            if location.lower() == "m√ºnchen":
                user_location.update({
                    "city": "Munich",
                    "region": "Bavaria",
                    "country": "DE"
                })

            # 6. Query Log speichern
            query_id = self.persistence_manager.save_query_log(
                category=category,
                api="claude_web_search",
                original_query=search_template,
                enhanced_query=enhanced_query,
                time_context=self.time_manager.get_time_context(timeframe, "claude_web")
            )

            print(f"üîç Claude Web Search: {category} in {location}")
            print(f"üìù Enhanced Query: {enhanced_query}")
            print(f"‚è∞ Timeframe: {timeframe}")
            print(f"üåê Max Searches: {max_searches}")

            # 7. Web Search Tools konfigurieren
            web_search_tool = {
                "type": "web_search_20250305",
                "name": "web_search",
                "max_uses": max_searches
            }

            if user_location:
                web_search_tool["user_location"] = user_location

            # 8. Claude API Call mit Web Search
            response = self.client.messages.create(
                model=self.model,
                max_tokens=self.api_config.get("max_tokens", 2000),
                messages=[
                    {
                        "role": "user",
                        "content": enhanced_query
                    }
                ],
                tools=[web_search_tool]
            )

            # 9. Raw Response speichern
            raw_filename = self.persistence_manager.save_raw_response(
                source="claude_web_search",
                response=response,
                query=enhanced_query,
                category=category,
                metadata={
                    "query_id": query_id,
                    "timeframe": timeframe,
                    "location": location,
                    "model": self.model,
                    "max_searches": max_searches
                }
            )

            # 10. Response verarbeiten
            if response and response.content:
                processed_result = self._process_claude_web_search_response(
                    response, category, location, timeframe, query_id, raw_filename
                )

                if processed_result:
                    self.search_results.append(processed_result)

                    # 11. Processed Content als Markdown speichern
                    markdown_content = self._format_claude_search_as_markdown(processed_result)
                    content_filename = self.persistence_manager.save_processed_content(
                        category=category,
                        content=markdown_content,
                        content_type="markdown",
                        metadata={
                            "query_id": query_id,
                            "source": "claude_web_search",
                            "citations_count": len(processed_result.get("citations", [])),
                            "web_searches_used": processed_result.get("web_searches_used", 0)
                        }
                    )

                    processed_result["content_filename"] = content_filename

                    print(f"‚úÖ Claude Web Search erfolgreich!")
                    print(f"üìä Web Searches verwendet: {processed_result.get('web_searches_used', 0)}")
                    print(f"üìö Citations: {len(processed_result.get('citations', []))}")
                    print(f"üìù Content: {len(processed_result.get('text_content', ''))} Zeichen")

                    return processed_result

            print("‚ùå Keine Response von Claude erhalten")
            return None

        except Exception as e:
            print(f"‚ùå Claude Web Search Fehler: {e}")

            # Fehler in Persistence speichern
            if self.persistence_manager:
                error_log = {
                    "error": str(e),
                    "category": category,
                    "location": location,
                    "query": enhanced_query if 'enhanced_query' in locals() else "N/A"
                }
                self.persistence_manager.save_raw_response(
                    source="claude_web_search_error",
                    response=error_log,
                    category=category
                )

            return None

    def _process_claude_web_search_response(self, response, category, location, timeframe, query_id, raw_filename):
        """Verarbeitet Claude Web Search Response und extrahiert Citations"""

        processed_result = {
            "method": "web_search",
            "category": category,
            "location": location,
            "timeframe": timeframe,
            "query_id": query_id,
            "raw_filename": raw_filename,
            "timestamp": datetime.now().isoformat(),
            "text_content": "",
            "citations": [],
            "web_searches_used": 0,
            "content_blocks": []
        }

        # Content Blocks verarbeiten
        for content_block in response.content:
            block_info = {
                "type": content_block.type,
                "content": ""
            }

            if content_block.type == 'text':
                # Text Content
                block_info["content"] = content_block.text
                processed_result["text_content"] += content_block.text + "\n"

                # Citations aus Text-Block extrahieren
                if hasattr(content_block, 'citations') and content_block.citations:
                    for citation in content_block.citations:
                        citation_info = {
                            "type": citation.type,
                            "url": getattr(citation, 'url', ''),
                            "title": getattr(citation, 'title', ''),
                            "cited_text": getattr(citation, 'cited_text', '')
                        }
                        processed_result["citations"].append(citation_info)

                        # Citation Registry f√ºr Fact-Checking
                        citation_id = f"{query_id}_{len(processed_result['citations'])}"
                        self.citation_registry[citation_id] = {
                            "category": category,
                            "location": location,
                            "citation": citation_info,
                            "query_id": query_id,
                            "timestamp": datetime.now().isoformat()
                        }

            elif content_block.type == 'server_tool_use':
                # Web Search Tool Use
                block_info["tool_name"] = getattr(content_block, 'name', '')
                block_info["tool_input"] = getattr(content_block, 'input', {})

            elif content_block.type == 'web_search_tool_result':
                # Web Search Results
                block_info["tool_use_id"] = getattr(content_block, 'tool_use_id', '')
                if hasattr(content_block, 'content'):
                    block_info["search_results"] = content_block.content

            processed_result["content_blocks"].append(block_info)

        # Web Search Usage aus Response extrahieren
        if hasattr(response, 'usage') and hasattr(response.usage, 'server_tool_use'):
            server_tool_use = response.usage.server_tool_use
            if hasattr(server_tool_use, 'web_search_requests'):
                processed_result["web_searches_used"] = server_tool_use.web_search_requests
            elif isinstance(server_tool_use, dict):
                processed_result["web_searches_used"] = server_tool_use.get('web_search_requests', 0)

        return processed_result

    def _format_claude_search_as_markdown(self, result):
        """Formatiert Claude Web Search Ergebnisse als Markdown"""

        markdown = f"# {result['category'].title()} - {result['location'].title()} (Claude Web Search)\n\n"
        markdown += f"**Generiert:** {result['timestamp']}\n"
        markdown += f"**Timeframe:** {result['timeframe']}\n"
        markdown += f"**Query ID:** {result['query_id']}\n"
        markdown += f"**Web Searches verwendet:** {result['web_searches_used']}\n"
        markdown += f"**Citations:** {len(result['citations'])}\n\n"

        # Haupt-Content
        markdown += "## Claude's Antwort\n\n"
        if result['text_content']:
            markdown += result['text_content'] + "\n\n"

        # Citations
        if result['citations']:
            markdown += "## Quellen\n\n"
            for i, citation in enumerate(result['citations'], 1):
                markdown += f"**[{i}] {citation.get('title', 'Keine Titel')}**\n"
                markdown += f"URL: {citation.get('url', 'Keine URL')}\n"
                if citation.get('cited_text'):
                    markdown += f"Zitiert: \"{citation['cited_text']}\"\n"
                markdown += "\n"

        # Content Blocks (f√ºr Debugging)
        if result.get('content_blocks'):
            markdown += "## Content Blocks (Debug)\n\n"
            for i, block in enumerate(result['content_blocks']):
                markdown += f"### Block {i+1}: {block['type']}\n"
                if block['type'] == 'server_tool_use':
                    markdown += f"Tool: {block.get('tool_name', 'N/A')}\n"
                    if block.get('tool_input'):
                        markdown += f"Input: {block['tool_input']}\n"
                elif block['content']:
                    content_preview = block['content'][:200] + "..." if len(block['content']) > 200 else block['content']
                    markdown += f"```\n{content_preview}\n```\n"
                markdown += "\n"

        return markdown

    # =========================================================================
    # CATEGORY-SPECIFIC SEARCH
    # =========================================================================

    def search_specific_category(self, category, location, timeframe=None, focused_query=None):
        """
        Kategorie-spezifische Suche mit optionalem focused Query

        Args:
            category: Newsletter-Kategorie
            location: Ortsname
            timeframe: Zeitrahmen (optional, sonst optimal f√ºr Kategorie)
            focused_query: Spezielle Query statt Config-Template (optional)

        Returns:
            dict: Kategorie-spezifische Suchergebnisse
        """
        if not self.has_foundation:
            return self.web_search_with_foundation(category, location, timeframe or "heute")

        # Timeframe automatisch bestimmen falls nicht angegeben
        if timeframe is None:
            timeframe = self.time_manager.get_optimal_timeframe_for_category(category)

        # Focused Query oder Config Template verwenden
        if focused_query:
            enhanced_query = self.time_manager.inject_time_context(focused_query, timeframe, "claude_web")
            print(f"üéØ Focused Query: {focused_query}")
        else:
            result = self.web_search_with_foundation(category, location, timeframe)
            return result

        # Custom Search durchf√ºhren
        try:
            query_id = self.persistence_manager.save_query_log(
                category=category,
                api="claude_web_focused",
                original_query=focused_query,
                enhanced_query=enhanced_query,
                time_context=self.time_manager.get_time_context(timeframe, "claude_web")
            )

            print(f"üéØ Focused Claude Search: {category}")
            print(f"üìù Enhanced Query: {enhanced_query}")

            # Reduzierte Web Search f√ºr focused Queries
            web_search_tool = {
                "type": "web_search_20250305",
                "name": "web_search",
                "max_uses": 3  # Weniger Searches f√ºr focused Queries
            }

            response = self.client.messages.create(
                model=self.model,
                max_tokens=1500,  # K√ºrzere Antworten f√ºr focused Queries
                messages=[
                    {
                        "role": "user",
                        "content": enhanced_query
                    }
                ],
                tools=[web_search_tool]
            )

            # Response Processing wie bei web_search_with_foundation
            if response and response.content:
                processed_result = self._process_claude_web_search_response(
                    response, category, location, timeframe, query_id, None
                )

                if processed_result:
                    processed_result["search_type"] = "focused"
                    print(f"‚úÖ Focused Search erfolgreich: {len(processed_result.get('text_content', ''))} Zeichen")
                    return processed_result

            return None

        except Exception as e:
            print(f"‚ùå Focused Search Fehler: {e}")
            return None

    # =========================================================================
    # NEWSLETTER INTEGRATION
    # =========================================================================

    def process_category_for_newsletter(self, category, location):
        """
        Verarbeitet eine Kategorie komplett f√ºr Newsletter-Generation
        Nutzt immer Web Search (Claude's St√§rke)

        Args:
            category: Newsletter-Kategorie
            location: Ortsname

        Returns:
            dict: Vollst√§ndig verarbeitete Kategorie-Daten
        """
        print(f"üéØ Verarbeite {category} f√ºr {location} via Claude Web Search")

        return self.web_search_with_foundation(category, location)

    def get_citation_summary(self, location=None):
        """
        Gibt Zusammenfassung aller Citations zur√ºck (f√ºr Fact-Checking)

        Args:
            location: Filter nach Location (optional)

        Returns:
            dict: Citation Summary
        """
        filtered_citations = {}

        if location:
            for citation_id, citation_data in self.citation_registry.items():
                if citation_data["location"].lower() == location.lower():
                    filtered_citations[citation_id] = citation_data
        else:
            filtered_citations = self.citation_registry

        # Summary Statistics
        summary = {
            "total_citations": len(filtered_citations),
            "categories": set(),
            "sources": set(),
            "citations": filtered_citations
        }

        for citation_data in filtered_citations.values():
            summary["categories"].add(citation_data["category"])
            if citation_data["citation"].get("url"):
                summary["sources"].add(citation_data["citation"]["url"])

        summary["categories"] = list(summary["categories"])
        summary["sources"] = list(summary["sources"])

        return summary

    def get_newsletter_summary_for_location(self, location):
        """Gibt Zusammenfassung aller Claude-Daten f√ºr einen Ort zur√ºck"""

        summary = {
            "location": location,
            "timestamp": datetime.now().isoformat(),
            "web_search_results": [],
            "total_citations": 0,
            "total_web_searches_used": 0,
            "categories_processed": set()
        }

        # Web Search Results f√ºr Location
        for result in self.search_results:
            if result["location"].lower() == location.lower():
                summary["web_search_results"].append({
                    "category": result["category"],
                    "timeframe": result["timeframe"],
                    "citations_count": len(result.get("citations", [])),
                    "web_searches_used": result.get("web_searches_used", 0),
                    "content_filename": result.get("content_filename"),
                    "query_id": result["query_id"]
                })

                summary["total_citations"] += len(result.get("citations", []))
                summary["total_web_searches_used"] += result.get("web_searches_used", 0)
                summary["categories_processed"].add(result["category"])

        summary["categories_processed"] = list(summary["categories_processed"])
        summary["total_search_results"] = len(summary["web_search_results"])

        return summary

# =============================================================================
# INITIALISIERUNG MIT FOUNDATION-SYSTEM
# =============================================================================

# Claude Worker V2 mit Foundation-System initialisieren
if api_config.claude_key:
    # Foundation-System Integration
    foundation_available = all([
        'config_manager' in globals() and config_manager,
        'time_manager' in globals() and time_manager,
        'persistence_manager' in globals() and persistence_manager
    ])

    if foundation_available:
        claude_worker_v2 = ClaudeWorkerV2(
            api_key=api_config.claude_key,
            config_manager=config_manager,
            time_manager=time_manager,
            persistence_manager=persistence_manager
        )
        print("üöÄ Claude Worker V2 mit Foundation-System bereit")
    else:
        # Fallback ohne Foundation
        claude_worker_v2 = ClaudeWorkerV2(api_key=api_config.claude_key)
        print("‚ö†Ô∏è Claude Worker V2 im Legacy-Modus (Foundation-System fehlt)")

else:
    print("‚ùå Claude Worker V2 nicht verf√ºgbar - API Key fehlt")
    claude_worker_v2 = None

# =============================================================================
# DIREKTER TEST - Foundation-Integration
# =============================================================================

if claude_worker_v2 and foundation_available:
    print("\nüß™ TESTE CLAUDE WORKER V2 MIT FOUNDATION-SYSTEM")
    print("=" * 60)

    try:
        test_location = "m√ºnchen"

        # Test 1: Web Search mit Foundation
        print("üìç Test 1: Web Search mit Foundation-System")
        search_result = claude_worker_v2.web_search_with_foundation(
            category="nachrichten",
            location=test_location,
            timeframe="heute",
            max_searches=3
        )

        if search_result:
            print(f"  ‚úÖ Web Search: {len(search_result.get('text_content', ''))} Zeichen Content")
            print(f"  üìÅ Content File: {search_result.get('content_filename', 'N/A')}")
            print(f"  üÜî Query ID: {search_result['query_id']}")
            print(f"  üìö Citations: {len(search_result.get('citations', []))}")

        # Test 2: Kategorie-spezifische Suche
        print(f"\nüìç Test 2: Kategorie-Processing")
        category_result = claude_worker_v2.process_category_for_newsletter("verkehr", test_location)

        if category_result:
            print(f"  ‚úÖ Verkehr: Web Search erfolgreich")
            print(f"  üåê Web Searches: {category_result.get('web_searches_used', 0)}")
            print(f"  üìö Citations: {len(category_result.get('citations', []))}")

        # Test 3: Citation Summary
        print(f"\nüìç Test 3: Citation Summary")
        citation_summary = claude_worker_v2.get_citation_summary(test_location)

        print(f"  üìä Total Citations: {citation_summary['total_citations']}")
        print(f"  üìã Kategorien: {citation_summary['categories']}")
        print(f"  üåê Unique Sources: {len(citation_summary['sources'])}")

        # Test 4: Newsletter Summary
        print(f"\nüìç Test 4: Newsletter Summary")
        newsletter_summary = claude_worker_v2.get_newsletter_summary_for_location(test_location)

        print(f"  üìä Location: {newsletter_summary['location']}")
        print(f"  üîç Search Results: {newsletter_summary['total_search_results']}")
        print(f"  üìö Total Citations: {newsletter_summary['total_citations']}")
        print(f"  üåê Total Web Searches: {newsletter_summary['total_web_searches_used']}")
        print(f"  üìã Kategorien: {newsletter_summary['categories_processed']}")

        print(f"\n‚úÖ Alle Claude Worker V2 Tests erfolgreich!")

    except Exception as test_error:
        print(f"‚ùå Test Fehler: {test_error}")
        import traceback
        print(f"üîç Traceback: {traceback.format_exc()}")

print("=" * 60)
print("‚úÖ Claude Worker V2 Setup komplett\n")

‚úÖ Claude Worker V2 initialisiert
üèóÔ∏è Foundation Integration: ‚úÖ
‚öôÔ∏è API Config geladen: Model claude-3-5-haiku-latest
üîç Web Search Config: 2 Parameter
üöÄ Claude Worker V2 mit Foundation-System bereit

üß™ TESTE CLAUDE WORKER V2 MIT FOUNDATION-SYSTEM
üìç Test 1: Web Search mit Foundation-System
üîç Query-Log gespeichert: e28abba4
üîç Claude Web Search: nachrichten in m√ºnchen
üìù Enhanced Query: lokale Nachrichten m√ºnchen heute heute aktuell
‚è∞ Timeframe: heute
üåê Max Searches: 3
üíæ Raw Response gespeichert: 007_claude_web_search_nachrichten_13-36-28.json
üìÑ Processed Content gespeichert: nachrichten_13-36-28.md
‚úÖ Claude Web Search erfolgreich!
üìä Web Searches verwendet: 1
üìö Citations: 8
üìù Content: 1660 Zeichen
  ‚úÖ Web Search: 1660 Zeichen Content
  üìÅ Content File: /content/drive/MyDrive/Newsletter_System/data/sessions/2025-07-20_13-35_m√ºnchen/processed_content/nachrichten_13-36-28.md
  üÜî Query ID: e28abba4
  üìö Citations: 8

üìç Test 2:

In [9]:
# =============================================================================
# ZELLE 4: Perplexity Worker V2 - Foundation-System Integration
# =============================================================================
# @title Perplexity Worker V2 - Mit ConfigManager + TimeContext + DataPersistence
from openai import OpenAI

class PerplexityWorkerV2:
    """
    Perplexity Worker V2 - Vollst√§ndig integriert mit Foundation-System
    - ConfigManager f√ºr intelligente Search Queries
    - TimeContextManager f√ºr Echtzeit-Kontextualisierung
    - DataPersistenceManager f√ºr Search-Result Tracking
    - Perplexity API f√ºr zus√§tzliche Search-Perspektiven
    """

    def __init__(self, api_key, config_manager=None, time_manager=None, persistence_manager=None):
        """
        Initialisiert Perplexity Worker V2

        Args:
            api_key: Perplexity API Key
            config_manager: ConfigManager Instance
            time_manager: TimeContextManager Instance
            persistence_manager: DataPersistenceManager Instance
        """
        self.client = OpenAI(
            api_key=api_key,
            base_url="https://api.perplexity.ai"
        )

        # Foundation-System Integration
        self.config_manager = config_manager
        self.time_manager = time_manager
        self.persistence_manager = persistence_manager

        # Legacy Support falls Foundation nicht verf√ºgbar
        self.has_foundation = all([config_manager, time_manager, persistence_manager])

        # Worker State
        self.search_results = []
        self.source_registry = {}  # Tracking aller Quellen f√ºr Cross-Validation

        print(f"‚úÖ Perplexity Worker V2 initialisiert")
        print(f"üèóÔ∏è Foundation Integration: {'‚úÖ' if self.has_foundation else '‚ùå'}")

        if self.has_foundation:
            # API-Config aus ConfigManager holen
            self.api_config = self.config_manager.get_api_config("perplexity")
            self.model = self.api_config.get("model", "sonar-pro")
            self.default_params = self.api_config.get("extra_body", {})
            print(f"‚öôÔ∏è API Config geladen: Model {self.model}")
            print(f"üîß Default Params: {len(self.default_params)} Parameter")
        else:
            self.model = "sonar-pro"
            self.default_params = {
                "search_mode": "web",
                "return_images": False,
                "return_related_questions": False
            }
            print("‚ö†Ô∏è L√§uft im Legacy-Modus ohne Foundation-System")

    # =========================================================================
    # SEARCH - Foundation-System Integration
    # =========================================================================

    def search_with_foundation(self, category, location, timeframe="heute", context_size="medium"):
        """
        Intelligente Search mit Foundation-System Integration

        Args:
            category: Newsletter-Kategorie (z.B. "wetter", "events")
            location: Ortsname
            timeframe: Zeitrahmen (default: optimal f√ºr Kategorie)
            context_size: Search context size ("low", "medium", "high")

        Returns:
            dict: Strukturierte Search-Ergebnisse mit Source-Tracking
        """
        if not self.has_foundation:
            print("‚ùå Foundation-System erforderlich f√ºr diese Methode")
            return None

        try:
            # 1. Optimal Timeframe bestimmen
            if timeframe == "heute":  # Auto-detect
                timeframe = self.time_manager.get_optimal_timeframe_for_category(category)

            # 2. Search Template aus Config holen
            search_template = self.config_manager.get_search_template(category, "perplexity", location)

            # 3. Template mit Zeit-Kontext anreichern
            enhanced_query = self.time_manager.inject_time_context(search_template, timeframe, "perplexity")

            # 4. Query Log speichern
            query_id = self.persistence_manager.save_query_log(
                category=category,
                api="perplexity_search",
                original_query=search_template,
                enhanced_query=enhanced_query,
                time_context=self.time_manager.get_time_context(timeframe, "perplexity")
            )

            print(f"üîç Perplexity Search: {category} in {location}")
            print(f"üìù Enhanced Query: {enhanced_query}")
            print(f"‚è∞ Timeframe: {timeframe}")
            print(f"üß† Context Size: {context_size}")

            # 5. Perplexity API-Parameter zusammensetzen
            extra_body = self.default_params.copy()
            extra_body["web_search_options"] = {
                "search_context_size": context_size,
                "user_location": {"country": "DE"}
            }

            # Zeit-spezifische Parameter hinzuf√ºgen
            time_context = self.time_manager.get_time_context(timeframe, "perplexity")
            if time_context.get("api_parameters", {}).get("date_hint"):
                enhanced_query += f" {time_context['api_parameters']['date_hint']}"

            # 6. Perplexity API Call
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {
                        "role": "user",
                        "content": enhanced_query
                    }
                ],
                temperature=self.api_config.get("temperature", 0.2),
                top_p=self.api_config.get("top_p", 0.9),
                stream=False,
                extra_body=extra_body
            )

            # 7. Raw Response speichern
            raw_filename = self.persistence_manager.save_raw_response(
                source="perplexity_search",
                response=response,
                query=enhanced_query,
                category=category,
                metadata={
                    "query_id": query_id,
                    "timeframe": timeframe,
                    "location": location,
                    "model": self.model,
                    "context_size": context_size
                }
            )

            # 8. Response verarbeiten
            if response and response.choices:
                processed_result = self._process_perplexity_response(
                    response, category, location, timeframe, query_id, raw_filename
                )

                if processed_result:
                    self.search_results.append(processed_result)

                    # 9. Processed Content als Markdown speichern
                    markdown_content = self._format_perplexity_search_as_markdown(processed_result)
                    content_filename = self.persistence_manager.save_processed_content(
                        category=category,
                        content=markdown_content,
                        content_type="markdown",
                        metadata={
                            "query_id": query_id,
                            "source": "perplexity_search",
                            "sources_count": len(processed_result.get("sources", [])),
                            "context_size": context_size
                        }
                    )

                    processed_result["content_filename"] = content_filename

                    print(f"‚úÖ Perplexity Search erfolgreich!")
                    print(f"üìù Content: {len(processed_result.get('content', ''))} Zeichen")
                    print(f"üìö Sources: {len(processed_result.get('sources', []))}")

                    return processed_result

            print("‚ùå Keine Response von Perplexity erhalten")
            return None

        except Exception as e:
            print(f"‚ùå Perplexity Search Fehler: {e}")

            # Fehler in Persistence speichern
            if self.persistence_manager:
                error_log = {
                    "error": str(e),
                    "category": category,
                    "location": location,
                    "query": enhanced_query if 'enhanced_query' in locals() else "N/A"
                }
                self.persistence_manager.save_raw_response(
                    source="perplexity_search_error",
                    response=error_log,
                    category=category
                )

            return None

    def _process_perplexity_response(self, response, category, location, timeframe, query_id, raw_filename):
        """Verarbeitet Perplexity Response und extrahiert Sources"""

        # Hauptinhalt aus Response
        main_content = response.choices[0].message.content

        processed_result = {
            "method": "search",
            "category": category,
            "location": location,
            "timeframe": timeframe,
            "query_id": query_id,
            "raw_filename": raw_filename,
            "timestamp": datetime.now().isoformat(),
            "content": main_content,
            "sources": [],
            "model": self.model
        }

        # Citations aus Response extrahieren (falls verf√ºgbar)
        # Perplexity hat manchmal citations in verschiedenen Formaten
        citations = []
        if hasattr(response, 'citations') and response.citations:
            citations = list(response.citations)

        # Sources aus Response extrahieren (falls verf√ºgbar)
        sources = []
        if hasattr(response, 'sources') and response.sources:
            sources = list(response.sources)
        elif hasattr(response, 'search_results') and response.search_results:
            sources = response.search_results

        # Sources verarbeiten und registrieren
        for i, source in enumerate(sources):
            if isinstance(source, dict):
                source_info = {
                    "index": i + 1,
                    "url": source.get("url", ""),
                    "title": source.get("title", f"Quelle {i+1}"),
                    "snippet": source.get("snippet", source.get("description", "")),
                    "date": source.get("date", "")
                }
            else:
                # Fallback f√ºr String-Sources
                source_info = {
                    "index": i + 1,
                    "url": str(source),
                    "title": f"Quelle {i+1}",
                    "snippet": "",
                    "date": ""
                }

            processed_result["sources"].append(source_info)

            # Source Registry f√ºr Cross-Validation
            source_id = f"{query_id}_source_{i+1}"
            self.source_registry[source_id] = {
                "category": category,
                "location": location,
                "source": source_info,
                "query_id": query_id,
                "timestamp": datetime.now().isoformat()
            }

        # Citations zu Sources hinzuf√ºgen falls verf√ºgbar
        if citations:
            processed_result["citations"] = citations
            for citation in citations:
                print(f"üì∞ Citation: {citation}")

        return processed_result

    def _format_perplexity_search_as_markdown(self, result):
        """Formatiert Perplexity Search Ergebnisse als Markdown"""

        markdown = f"# {result['category'].title()} - {result['location'].title()} (Perplexity Search)\n\n"
        markdown += f"**Generiert:** {result['timestamp']}\n"
        markdown += f"**Timeframe:** {result['timeframe']}\n"
        markdown += f"**Query ID:** {result['query_id']}\n"
        markdown += f"**Model:** {result['model']}\n"
        markdown += f"**Sources:** {len(result['sources'])}\n\n"

        # Haupt-Content
        markdown += "## Perplexity's Antwort\n\n"
        if result['content']:
            markdown += result['content'] + "\n\n"

        # Sources
        if result['sources']:
            markdown += "## Quellen\n\n"
            for source in result['sources']:
                markdown += f"**[{source['index']}] {source['title']}**\n"
                markdown += f"URL: {source['url']}\n"
                if source.get('snippet'):
                    markdown += f"Beschreibung: {source['snippet']}\n"
                if source.get('date'):
                    markdown += f"Datum: {source['date']}\n"
                markdown += "\n"

        # Citations falls verf√ºgbar
        if result.get('citations'):
            markdown += "## Citations\n\n"
            for i, citation in enumerate(result['citations'], 1):
                markdown += f"**Citation {i}:** {citation}\n"
            markdown += "\n"

        return markdown

    # =========================================================================
    # COMPARATIVE SEARCH
    # =========================================================================

    def comparative_search(self, category, location, comparison_topic=None):
        """
        Vergleichende Suche f√ºr Cross-Validation mit anderen Workern

        Args:
            category: Newsletter-Kategorie
            location: Ortsname
            comparison_topic: Spezifisches Thema f√ºr Vergleich (optional)

        Returns:
            dict: Vergleichende Search-Ergebnisse
        """
        if not self.has_foundation:
            return self.search_with_foundation(category, location)

        # Spezielle Query f√ºr Vergleichssuche
        if comparison_topic:
            focused_query = f"{comparison_topic} in {location} aktuell Deutschland"
        else:
            # Standard Template verwenden
            return self.search_with_foundation(category, location, context_size="high")

        try:
            query_id = self.persistence_manager.save_query_log(
                category=category,
                api="perplexity_comparative",
                original_query=comparison_topic or category,
                enhanced_query=focused_query,
                time_context={"comparison_mode": True}
            )

            print(f"üîÑ Comparative Search: {category}")
            print(f"üìù Focused Query: {focused_query}")

            # Perplexity mit gro√üem Kontext f√ºr umfassende Suche
            extra_body = self.default_params.copy()
            extra_body["web_search_options"] = {
                "search_context_size": "high",  # Fixed: "high" statt "large"
                "user_location": {"country": "DE"}
            }

            response = self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {
                        "role": "user",
                        "content": focused_query
                    }
                ],
                temperature=0.1,  # Niedriger f√ºr faktische Vergleiche
                top_p=0.9,
                stream=False,
                extra_body=extra_body
            )

            if response and response.choices:
                processed_result = self._process_perplexity_response(
                    response, category, location, "comparative", query_id, None
                )

                if processed_result:
                    processed_result["search_type"] = "comparative"
                    processed_result["comparison_topic"] = comparison_topic
                    print(f"‚úÖ Comparative Search erfolgreich: {len(processed_result.get('sources', []))} Sources")
                    return processed_result

            return None

        except Exception as e:
            print(f"‚ùå Comparative Search Fehler: {e}")
            return None

    # =========================================================================
    # MULTI-ANGLE SEARCH
    # =========================================================================

    def multi_angle_search(self, category, location, angles=None):
        """
        Multi-Angle Search f√ºr umfassende Kategorie-Abdeckung

        Args:
            category: Newsletter-Kategorie
            location: Ortsname
            angles: Liste von Suchperspektiven (optional)

        Returns:
            list: Multiple Search-Ergebnisse aus verschiedenen Winkeln
        """
        if not angles:
            # Standard-Angles basierend auf Kategorie
            angle_mapping = {
                "wetter": ["Wettervorhersage", "Wetterwarnungen", "Klima aktuell"],
                "verkehr": ["Verkehrslage", "√ñPNV St√∂rungen", "Baustellen"],
                "events": ["Kulturveranstaltungen", "Sport Events", "Festivals"],
                "nachrichten": ["Lokalnachrichten", "Politik lokal", "Wirtschaft lokal"],
                "sport": ["Sportergebnisse", "Spielberichte", "Vereinsnachrichten"]
            }

            angles = angle_mapping.get(category.lower(), [category])

        results = []

        print(f"üéØ Multi-Angle Search f√ºr {category}: {len(angles)} Perspektiven")

        for i, angle in enumerate(angles):
            print(f"  üìê Angle {i+1}/{len(angles)}: {angle}")

            result = self.comparative_search(category, location, angle)

            if result:
                result["angle"] = angle
                result["angle_index"] = i + 1
                results.append(result)
                print(f"    ‚úÖ {len(result.get('sources', []))} Sources gefunden")
            else:
                print(f"    ‚ùå Keine Ergebnisse f√ºr {angle}")

        print(f"üìä Multi-Angle Search: {len(results)}/{len(angles)} Angles erfolgreich")

        # Kombinierte Ergebnisse speichern
        if results and self.persistence_manager:
            combined_content = self._format_multi_angle_results(results, category, location)
            self.persistence_manager.save_processed_content(
                category=f"{category}_multi_angle",
                content=combined_content,
                content_type="markdown",
                metadata={
                    "angles_count": len(results),
                    "total_sources": sum(len(r.get('sources', [])) for r in results)
                }
            )

        return results

    def _format_multi_angle_results(self, results, category, location):
        """Formatiert Multi-Angle Search Ergebnisse als kombiniertes Markdown"""

        markdown = f"# {category.title()} Multi-Angle Search - {location.title()}\n\n"
        markdown += f"**Generiert:** {datetime.now().isoformat()}\n"
        markdown += f"**Angles:** {len(results)}\n"
        markdown += f"**Total Sources:** {sum(len(r.get('sources', [])) for r in results)}\n\n"

        for result in results:
            markdown += f"## Angle: {result.get('angle', 'Unbekannt')}\n\n"

            # Content
            if result.get('content'):
                content_preview = result['content'][:500] + "..." if len(result['content']) > 500 else result['content']
                markdown += f"**Antwort:**\n{content_preview}\n\n"

            # Top 3 Sources
            if result.get('sources'):
                markdown += f"**Top Sources ({len(result['sources'])}):**\n"
                for source in result['sources'][:3]:
                    markdown += f"- [{source['index']}] {source['title']} ({source['url']})\n"
                markdown += "\n"

            markdown += "---\n\n"

        return markdown

    # =========================================================================
    # NEWSLETTER INTEGRATION
    # =========================================================================

    def process_category_for_newsletter(self, category, location, search_strategy="standard"):
        """
        Verarbeitet eine Kategorie f√ºr Newsletter mit verschiedenen Strategien

        Args:
            category: Newsletter-Kategorie
            location: Ortsname
            search_strategy: "standard", "comparative", "multi_angle"

        Returns:
            dict/list: Verarbeitete Kategorie-Daten
        """
        print(f"üéØ Verarbeite {category} f√ºr {location} via Perplexity ({search_strategy})")

        if search_strategy == "multi_angle":
            return self.multi_angle_search(category, location)
        elif search_strategy == "comparative":
            return self.comparative_search(category, location)
        else:
            return self.search_with_foundation(category, location)

    def get_source_summary(self, location=None):
        """
        Gibt Zusammenfassung aller Sources zur√ºck (f√ºr Cross-Validation)

        Args:
            location: Filter nach Location (optional)

        Returns:
            dict: Source Summary
        """
        filtered_sources = {}

        if location:
            for source_id, source_data in self.source_registry.items():
                if source_data["location"].lower() == location.lower():
                    filtered_sources[source_id] = source_data
        else:
            filtered_sources = self.source_registry

        # Summary Statistics
        summary = {
            "total_sources": len(filtered_sources),
            "categories": set(),
            "domains": set(),
            "sources": filtered_sources
        }

        for source_data in filtered_sources.values():
            summary["categories"].add(source_data["category"])
            source_url = source_data["source"].get("url", "")
            if source_url:
                try:
                    domain = source_url.split("//")[1].split("/")[0]
                    summary["domains"].add(domain)
                except:
                    pass

        summary["categories"] = list(summary["categories"])
        summary["domains"] = list(summary["domains"])

        return summary

    def get_newsletter_summary_for_location(self, location):
        """Gibt Zusammenfassung aller Perplexity-Daten f√ºr einen Ort zur√ºck"""

        summary = {
            "location": location,
            "timestamp": datetime.now().isoformat(),
            "search_results": [],
            "total_sources": 0,
            "categories_processed": set(),
            "search_strategies": set()
        }

        # Search Results f√ºr Location
        for result in self.search_results:
            if result["location"].lower() == location.lower():
                search_info = {
                    "category": result["category"],
                    "timeframe": result["timeframe"],
                    "sources_count": len(result.get("sources", [])),
                    "content_filename": result.get("content_filename"),
                    "query_id": result["query_id"],
                    "search_type": result.get("search_type", "standard")
                }

                summary["search_results"].append(search_info)
                summary["total_sources"] += len(result.get("sources", []))
                summary["categories_processed"].add(result["category"])
                summary["search_strategies"].add(result.get("search_type", "standard"))

        summary["categories_processed"] = list(summary["categories_processed"])
        summary["search_strategies"] = list(summary["search_strategies"])
        summary["total_search_results"] = len(summary["search_results"])

        return summary

# =============================================================================
# INITIALISIERUNG MIT FOUNDATION-SYSTEM
# =============================================================================

# Perplexity Worker V2 mit Foundation-System initialisieren
if api_config.perplexity_key:
    # Foundation-System Integration
    foundation_available = all([
        'config_manager' in globals() and config_manager,
        'time_manager' in globals() and time_manager,
        'persistence_manager' in globals() and persistence_manager
    ])

    if foundation_available:
        perplexity_worker_v2 = PerplexityWorkerV2(
            api_key=api_config.perplexity_key,
            config_manager=config_manager,
            time_manager=time_manager,
            persistence_manager=persistence_manager
        )
        print("üöÄ Perplexity Worker V2 mit Foundation-System bereit")
    else:
        # Fallback ohne Foundation
        perplexity_worker_v2 = PerplexityWorkerV2(api_key=api_config.perplexity_key)
        print("‚ö†Ô∏è Perplexity Worker V2 im Legacy-Modus (Foundation-System fehlt)")

else:
    print("‚ùå Perplexity Worker V2 nicht verf√ºgbar - API Key fehlt")
    perplexity_worker_v2 = None

# =============================================================================
# DIREKTER TEST - Foundation-Integration
# =============================================================================

if perplexity_worker_v2 and foundation_available:
    print("\nüß™ TESTE PERPLEXITY WORKER V2 MIT FOUNDATION-SYSTEM")
    print("=" * 65)

    try:
        test_location = "m√ºnchen"

        # Test 1: Standard Search mit Foundation
        print("üìç Test 1: Standard Search mit Foundation-System")
        search_result = perplexity_worker_v2.search_with_foundation(
            category="sport",
            location=test_location,
            timeframe="heute",
            context_size="medium"
        )

        if search_result:
            print(f"  ‚úÖ Search: {len(search_result.get('content', ''))} Zeichen Content")
            print(f"  üìÅ Content File: {search_result.get('content_filename', 'N/A')}")
            print(f"  üÜî Query ID: {search_result['query_id']}")
            print(f"  üìö Sources: {len(search_result.get('sources', []))}")

        # Test 2: Comparative Search
        print(f"\nüìç Test 2: Comparative Search")
        comparative_result = perplexity_worker_v2.comparative_search(
            category="wetter",
            location=test_location,
            comparison_topic="Wettervorhersage Wochenende"
        )

        if comparative_result:
            print(f"  ‚úÖ Comparative: {len(comparative_result.get('sources', []))} Sources")
            print(f"  üîç Topic: {comparative_result.get('comparison_topic', 'N/A')}")

        # Test 3: Source Summary
        print(f"\nüìç Test 3: Source Summary")
        source_summary = perplexity_worker_v2.get_source_summary(test_location)

        print(f"  üìä Total Sources: {source_summary['total_sources']}")
        print(f"  üìã Kategorien: {source_summary['categories']}")
        print(f"  üåê Unique Domains: {len(source_summary['domains'])}")

        # Test 4: Newsletter Summary
        print(f"\nüìç Test 4: Newsletter Summary")
        newsletter_summary = perplexity_worker_v2.get_newsletter_summary_for_location(test_location)

        print(f"  üìä Location: {newsletter_summary['location']}")
        print(f"  üîç Search Results: {newsletter_summary['total_search_results']}")
        print(f"  üìö Total Sources: {newsletter_summary['total_sources']}")
        print(f"  üìã Kategorien: {newsletter_summary['categories_processed']}")
        print(f"  üéØ Strategien: {newsletter_summary['search_strategies']}")

        print(f"\n‚úÖ Alle Perplexity Worker V2 Tests erfolgreich!")

    except Exception as test_error:
        print(f"‚ùå Test Fehler: {test_error}")
        import traceback
        print(f"üîç Traceback: {traceback.format_exc()}")

print("=" * 65)
print("‚úÖ Perplexity Worker V2 Setup komplett\n")

‚úÖ Perplexity Worker V2 initialisiert
üèóÔ∏è Foundation Integration: ‚úÖ
‚öôÔ∏è API Config geladen: Model sonar-pro
üîß Default Params: 4 Parameter
üöÄ Perplexity Worker V2 mit Foundation-System bereit

üß™ TESTE PERPLEXITY WORKER V2 MIT FOUNDATION-SYSTEM
üìç Test 1: Standard Search mit Foundation-System
üîç Query-Log gespeichert: 92d69e0c
üîç Perplexity Search: sport in m√ºnchen
üìù Enhanced Query: Sport m√ºnchen Ergebnisse Vereine heute
‚è∞ Timeframe: heute
üß† Context Size: medium
üíæ Raw Response gespeichert: 009_perplexity_search_sport_13-36-54.json
üì∞ Citation: https://www.leichtathletik.de/wettkaempfe/ergebnisse
üì∞ Citation: https://sport.sky.de/fc-bayern-muenchen-ergebnisse
üì∞ Citation: https://www.weltfussball.de/tore_tabellen/
üì∞ Citation: https://www.sueddeutsche.de/sport-liveticker/tennis/atp-kroatien-konzum-croatia-open/ma11423393/matej-dodig_francesco-passaro/
üì∞ Citation: https://www.sport.de/spielort/ve81/allianz-arena/ergebnisse/
üì∞ Citation: htt

In [10]:
# =============================================================================
# ZELLE 5a: Enhanced Content Processor - Source Integration f√ºr detaillierte Newsletter
# =============================================================================
# @title Enhanced Content Processor - Macht gesammelte Quellen f√ºr Newsletter nutzbar

import re
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional, Any

class EnhancedContentProcessor:
    """
    Enhanced Content Processor - L√§dt und verarbeitet alle gesammelten Quellen
    Macht Markdown-Files nutzbar f√ºr detaillierte Newsletter-Generation
    """

    def __init__(self, persistence_manager, config_manager=None):
        """
        Initialisiert Enhanced Content Processor

        Args:
            persistence_manager: DataPersistenceManager Instance
            config_manager: ConfigManager Instance (optional)
        """
        self.persistence_manager = persistence_manager
        self.config_manager = config_manager

        # Content Registry
        self.processed_categories = {}
        self.extracted_facts = {}
        self.source_citations = {}

        print(f"‚úÖ Enhanced Content Processor initialisiert")
        print(f"üèóÔ∏è Integration: DataPersistenceManager + ContentExtraction")

    # =========================================================================
    # CONTENT LOADING & PARSING
    # =========================================================================

    def load_all_category_content(self, location):
        """
        L√§dt alle gespeicherten Markdown-Files f√ºr einen Ort

        Args:
            location: Ortsname

        Returns:
            dict: Alle Category-Contents strukturiert
        """
        print(f"üìÇ Lade alle Category-Contents f√ºr {location}")

        all_content = {}

        # Alle Content-Items aus Registry holen
        for content_item in self.persistence_manager.content_registry:
            category = content_item["category"]

            if category not in all_content:
                all_content[category] = []

            # Content-File lesen
            try:
                filepath = Path(content_item["filepath"])
                if filepath.exists():
                    with open(filepath, 'r', encoding='utf-8') as f:
                        content = f.read()

                    content_data = {
                        "source": content_item.get("source", "unknown"),
                        "content": content,
                        "timestamp": content_item["timestamp"],
                        "filename": content_item["filename"],
                        "content_type": content_item["content_type"],
                        "metadata": content_item
                    }

                    all_content[category].append(content_data)

            except Exception as e:
                print(f"  ‚ùå Fehler beim Laden von {content_item['filename']}: {e}")
                continue

        print(f"  üìä Kategorien geladen: {len(all_content)}")
        for category, items in all_content.items():
            print(f"    {category}: {len(items)} Content-Items")

        self.processed_categories = all_content
        return all_content

    def extract_facts_from_content(self, category, content_data):
        """
        Extrahiert konkrete Facts und Daten aus Content

        Args:
            category: Newsletter-Kategorie
            content_data: Content-Daten von einem Worker

        Returns:
            dict: Extrahierte Facts strukturiert
        """
        content = content_data["content"]
        source = content_data["source"]

        facts = {
            "category": category,
            "source": source,
            "timestamp": content_data["timestamp"],
            "raw_facts": [],
            "structured_data": {},
            "citations": []
        }

        # Kategorie-spezifische Fact-Extraction
        if category == "wetter":
            facts["structured_data"] = self._extract_weather_facts(content)

        elif category == "nachrichten":
            facts["structured_data"] = self._extract_news_facts(content)

        elif category == "events":
            facts["structured_data"] = self._extract_events_facts(content)

        elif category == "sport":
            facts["structured_data"] = self._extract_sport_facts(content)

        elif category == "verkehr":
            facts["structured_data"] = self._extract_traffic_facts(content)

        else:
            # Generic fact extraction
            facts["structured_data"] = self._extract_generic_facts(content)

        # URLs und Citations extrahieren
        facts["citations"] = self._extract_citations(content)

        return facts

    # =========================================================================
    # KATEGORIE-SPEZIFISCHE FACT EXTRACTION
    # =========================================================================

    def _extract_weather_facts(self, content):
        """Extrahiert Wetter-spezifische Facts"""
        weather_facts = {
            "temperatures": [],
            "conditions": [],
            "forecasts": [],
            "warnings": [],
            "times": []
        }

        # Temperaturen finden (z.B. "25¬∞C", "18 Grad", "25-30¬∞")
        temp_patterns = [
            r'(\d{1,2}¬∞C?)',
            r'(\d{1,2}-\d{1,2}¬∞C?)',
            r'(\d{1,2}\s?Grad)',
            r'Temperatur[en]?[:\s]+(\d{1,2}¬∞?C?)',
            r'(\d{1,2})\s?bis\s?(\d{1,2})\s?Grad'
        ]

        for pattern in temp_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            for match in matches:
                if isinstance(match, tuple):
                    weather_facts["temperatures"].extend([m for m in match if m])
                else:
                    weather_facts["temperatures"].append(match)

        # Wetterbedingungen
        weather_conditions = [
            r'(sonnig|bew√∂lkt|regnerisch|st√ºrmisch|neblig)',
            r'(Regen|Sonne|Wolken|Sturm|Nebel|Schnee)',
            r'(heiter|wolkig|tr√ºb|klar|mild|warm|kalt)',
            r'(Schauer|Gewitter|Nieselregen|Spr√ºhregen)'
        ]

        for pattern in weather_conditions:
            matches = re.findall(pattern, content, re.IGNORECASE)
            weather_facts["conditions"].extend([m.lower() for m in matches])

        # Zeiten (heute, morgen, nachmittag, etc.)
        time_patterns = [
            r'(heute|morgen|√ºbermorgen)',
            r'(vormittag|nachmittag|abend|nacht)',
            r'(\d{1,2}:\d{2})',
            r'(am\s+\w+tag)',
            r'(bis\s+\d{1,2}\s+Uhr)'
        ]

        for pattern in time_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            weather_facts["times"].extend([m.lower() for m in matches])

        # Warnungen
        warning_keywords = ['warnung', 'unwetter', 'sturm', 'starkregen', 'hitze', 'frost']
        for keyword in warning_keywords:
            if keyword in content.lower():
                # Kontext um Warnung extrahieren
                context_match = re.search(f'.{{0,50}}{keyword}.{{0,50}}', content, re.IGNORECASE)
                if context_match:
                    weather_facts["warnings"].append(context_match.group().strip())

        return weather_facts

    def _extract_news_facts(self, content):
        """Extrahiert News-spezifische Facts"""
        news_facts = {
            "headlines": [],
            "dates": [],
            "locations": [],
            "persons": [],
            "organizations": [],
            "key_statements": []
        }

        # Headlines/Titel (typische Markdown Headlines)
        headlines = re.findall(r'^#{1,3}\s+(.+)$', content, re.MULTILINE)
        news_facts["headlines"] = [h.strip() for h in headlines]

        # Datumsangaben
        date_patterns = [
            r'(\d{1,2}\.\d{1,2}\.\d{4})',
            r'(\d{1,2}\.\s?\w+\s?\d{4})',
            r'(Januar|Februar|M√§rz|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember)\s+\d{4}',
            r'(Montag|Dienstag|Mittwoch|Donnerstag|Freitag|Samstag|Sonntag)[,\s]+\d{1,2}\.'
        ]

        for pattern in date_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            news_facts["dates"].extend(matches)

        # Stadtteile und Orte in M√ºnchen
        munich_locations = [
            'Innenstadt', 'Maxvorstadt', 'Schwabing', 'Haidhausen', 'Giesing',
            'Sendling', 'Neuhausen', 'Pasing', 'Bogenhausen', 'Lehel',
            'Olympiapark', 'Marienplatz', 'Hauptbahnhof', 'Flughafen'
        ]

        for location in munich_locations:
            if location.lower() in content.lower():
                news_facts["locations"].append(location)

        # Organisationen
        org_patterns = [
            r'(Stadtrat|Stadtverwaltung|Rathaus)',
            r'(MVG|Stadtwerke)',
            r'(Polizei|Feuerwehr)',
            r'(Universit√§t|LMU|TU M√ºnchen)',
            r'(Flughafen M√ºnchen)'
        ]

        for pattern in org_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            news_facts["organizations"].extend(matches)

        # Key Statements (S√§tze mit wichtigen Info-Markern)
        statement_patterns = [
            r'[^.]*(?:plant|beschlie√üt|verk√ºndet|startet|er√∂ffnet)[^.]*\.',
            r'[^.]*(?:Millionen?|Euro|Prozent)[^.]*\.',
            r'[^.]*(?:ab sofort|ab\s+\d{1,2}\.|bis\s+\d{1,2}\.)[^.]*\.'
        ]

        for pattern in statement_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            news_facts["key_statements"].extend([s.strip() for s in matches[:3]])  # Nur erste 3

        return news_facts

    def _extract_events_facts(self, content):
        """Extrahiert Event-spezifische Facts"""
        events_facts = {
            "event_names": [],
            "dates_times": [],
            "locations": [],
            "prices": [],
            "categories": [],
            "descriptions": []
        }

        # Event-Namen (oft in Headlines oder als Event Name)
        event_patterns = [
            r'^#{2,4}\s+(.+)$',
            r'\*\*([^*]+)\*\*',
            r'Event[:\s]+([^\n]+)',
            r'Veranstaltung[:\s]+([^\n]+)',
            r'Konzert[:\s]+([^\n]+)',
            r'Festival[:\s]+([^\n]+)'
        ]

        for pattern in event_patterns:
            try:
                matches = re.findall(pattern, content, re.MULTILINE | re.IGNORECASE)
                events_facts["event_names"].extend([m.strip() for m in matches])
            except re.error as e:
                print(f"    ‚ùå Regex Error in event_patterns: {pattern} - {e}")
                continue

        # Datum und Zeit Informationen
        datetime_patterns = [
            r'(\d{1,2}\.\d{1,2}\.\d{4})',
            r'(\d{1,2}\.\d{1,2}\.)',
            r'(\d{1,2}:\d{2})',
            r'(ab\s+\d{1,2}:\d{2})',
            r'(\w+tag,\s+\d{1,2}\.\d{1,2}\.)',
            r'(vom\s+\d{1,2}\.\d{1,2}\.?\s+bis\s+\d{1,2}\.\d{1,2}\.?)'
        ]

        for pattern in datetime_patterns:
            try:
                matches = re.findall(pattern, content, re.IGNORECASE)
                events_facts["dates_times"].extend(matches)
            except re.error as e:
                print(f"    ‚ùå Regex Error in datetime_patterns: {pattern} - {e}")
                continue

        # Locations/Veranstaltungsorte
        venue_patterns = [
            r'(Olympiahalle|Olympiastadion|Olympiapark)',
            r'(Marienplatz|Viktualienmarkt)',
            r'(Englischer Garten|Isar)',
            r'(Gasteig|Philharmonie)',
            r'(L√∂wenbr√§ukeller|Augustiner)',
            r'(Museum\s+\w+)',
            r'Ort[:\s]+([^\n]+)',
            r'Adresse[:\s]+([^\n]+)'
        ]

        for pattern in venue_patterns:
            try:
                matches = re.findall(pattern, content, re.IGNORECASE)
                # Flache Liste f√ºr alle Matches
                for match in matches:
                    if isinstance(match, tuple):
                        # Nur non-empty Gruppen nehmen
                        for group in match:
                            if group.strip():
                                events_facts["locations"].append(group.strip())
                    else:
                        events_facts["locations"].append(match)
            except re.error as e:
                print(f"    ‚ùå Regex Error in venue_patterns: {pattern} - {e}")
                continue

        # Preise
        price_patterns = [
            r'(\d+[,.]?\d*\s?‚Ç¨)',
            r'(kostenlos|frei|gratis)',
            r'Eintritt[:\s]+(\d+\s?‚Ç¨|kostenlos|frei)',
            r'Tickets?[:\s]+(\d+\s?‚Ç¨)',
            r'(ab\s+\d+\s?‚Ç¨)'
        ]

        for pattern in price_patterns:
            try:
                matches = re.findall(pattern, content, re.IGNORECASE)
                for match in matches:
                    if isinstance(match, tuple):
                        # Nehme erste non-empty Gruppe
                        for group in match:
                            if group.strip():
                                events_facts["prices"].append(group.strip())
                                break
                    else:
                        events_facts["prices"].append(match)
            except re.error as e:
                print(f"    ‚ùå Regex Error in price_patterns: {pattern} - {e}")
                continue

        # Event-Kategorien
        category_keywords = [
            'Konzert', 'Festival', 'Theater', 'Ausstellung', 'Museum',
            'Sport', 'Markt', 'F√ºhrung', 'Workshop', 'Lesung',
            'Kabarett', 'Oper', 'Ballet', 'Film', 'Kino'
        ]

        for keyword in category_keywords:
            if keyword.lower() in content.lower():
                events_facts["categories"].append(keyword)

        return events_facts

    def _extract_sport_facts(self, content):
        """Extrahiert Sport-spezifische Facts"""
        sport_facts = {
            "teams": [],
            "scores": [],
            "dates_times": [],
            "leagues": [],
            "players": [],
            "results": []
        }

        # Teams (besonders M√ºnchen-bezogene)
        team_patterns = [
            r'(FC Bayern|Bayern M√ºnchen|FCB)',
            r'(TSV 1860|1860 M√ºnchen)',
            r'(Red Bull M√ºnchen)',
            r'(M√ºnchen Baskets)',
            r'(EHC M√ºnchen)',
            r'(\w+\s+M√ºnchen)',
            r'(FC\s+\w+|TSV\s+\w+|SC\s+\w+)'
        ]

        for pattern in team_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            sport_facts["teams"].extend(matches)

        # Spielst√§nde/Ergebnisse
        score_patterns = [
            r'(\d+:\d+)',
            r'(\d+\s*-\s*\d+)',
            r'(\d+\s*zu\s*\d+)',
            r'(gewonnen|verloren|unentschieden)',
            r'(Sieg|Niederlage|Remis)'
        ]

        for pattern in score_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            sport_facts["scores"].extend(matches)

        # Spieltermine
        datetime_patterns = [
            r'(\w+tag,\s+\d{1,2}\.\d{1,2}\.)',
            r'(\d{1,2}\.\d{1,2}\.\d{4})',
            r'(\d{1,2}:\d{2})',
            r'(um\s+\d{1,2}:\d{2})',
            r'(ab\s+\d{1,2}:\d{2})'
        ]

        for pattern in datetime_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            sport_facts["dates_times"].extend(matches)

        # Ligen/Wettbewerbe
        league_patterns = [
            r'(Bundesliga|Champions League|Europa League)',
            r'(DFB-Pokal|Pokal)',
            r'(Basketball Bundesliga|BBL)',
            r'(DEL|Deutsche Eishockey Liga)',
            r'(Regionalliga|Bayernliga)'
        ]

        for pattern in league_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            sport_facts["leagues"].extend(matches)

        return sport_facts

    def _extract_traffic_facts(self, content):
        """Extrahiert Verkehr-spezifische Facts"""
        traffic_facts = {
            "disruptions": [],
            "delays": [],
            "routes": [],
            "stations": [],
            "times": []
        }

        # Verkehrsst√∂rungen
        disruption_patterns = [
            r'(St√∂rung|Ausfall|Versp√§tung|Sperrung)',
            r'(gesperrt|blockiert|eingeschr√§nkt)',
            r'(Bauarbeiten|Baustelle)'
        ]

        for pattern in disruption_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            traffic_facts["disruptions"].extend(matches)

        # MVG Linien und Stationen
        mvg_patterns = [
            r'(U\d+|S\d+|Tram\s+\d+|Bus\s+\d+)',
            r'(Hauptbahnhof|Marienplatz|Sendlinger Tor|Odeonsplatz)'
        ]

        for pattern in mvg_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            traffic_facts["routes"].extend(matches)

        return traffic_facts

    def _extract_generic_facts(self, content):
        """Extrahiert generische Facts aus Content"""
        generic_facts = {
            "key_numbers": [],
            "dates": [],
            "locations": [],
            "key_phrases": [],
            "urls": []
        }

        # Zahlen mit Kontext
        number_patterns = [
            r'(\d+\s?%)',
            r'(\d+[.,]\d+)',
            r'(\d+\s?Euro?)',
            r'(\d+\s?Million)',
            r'(\d+\s?Prozent)'
        ]

        for pattern in number_patterns:
            matches = re.findall(pattern, content, re.IGNORECASE)
            generic_facts["key_numbers"].extend(matches)

        # Generische Datumspatterns
        date_patterns = [
            r'(\d{1,2}\.\d{1,2}\.\d{4})',
            r'(\w+\s+\d{4})'
        ]

        for pattern in date_patterns:
            matches = re.findall(pattern, content)
            generic_facts["dates"].extend(matches)

        return generic_facts

    def _extract_citations(self, content):
        """Extrahiert URLs und Zitierbare Quellen"""
        citations = []

        # URLs extrahieren
        url_pattern = r'https?://[^\s)\]]+|www\.[^\s)\]]+'
        urls = re.findall(url_pattern, content)

        for url in urls:
            citations.append({
                "type": "url",
                "source": url,
                "context": ""
            })

        # Markierte Zitate (in Anf√ºhrungszeichen)
        quote_pattern = r'"([^"]{10,})"'
        quotes = re.findall(quote_pattern, content)

        for quote in quotes:
            citations.append({
                "type": "quote",
                "source": quote,
                "context": ""
            })

        return citations

    # =========================================================================
    # CONTENT SYNTHESIS & STRUCTURING
    # =========================================================================

    def process_all_categories(self, location):
        """
        Verarbeitet alle Kategorien und extrahiert strukturierte Facts

        Args:
            location: Ortsname

        Returns:
            dict: Vollst√§ndig verarbeitete Category-Facts
        """
        print(f"üß† Verarbeite alle Category-Facts f√ºr {location}")

        # 1. Content laden
        all_content = self.load_all_category_content(location)

        # 2. Facts pro Kategorie extrahieren
        processed_facts = {}

        for category, content_items in all_content.items():
            print(f"  üéØ Verarbeite {category}: {len(content_items)} Sources")

            category_facts = {
                "category": category,
                "sources_processed": len(content_items),
                "worker_facts": [],
                "combined_facts": {},
                "citations": [],
                "confidence_score": 0.0
            }

            # Facts von allen Workern f√ºr diese Kategorie sammeln
            for content_data in content_items:
                worker_facts = self.extract_facts_from_content(category, content_data)
                category_facts["worker_facts"].append(worker_facts)

                # Citations sammeln
                category_facts["citations"].extend(worker_facts["citations"])

            # 3. Facts kombinieren und deduplizieren
            category_facts["combined_facts"] = self._combine_worker_facts(
                category, category_facts["worker_facts"]
            )

            # 4. Confidence Score berechnen
            category_facts["confidence_score"] = self._calculate_confidence_score(
                category_facts["worker_facts"]
            )

            processed_facts[category] = category_facts

            print(f"    ‚úÖ {category}: {category_facts['confidence_score']:.2f} Confidence, {len(category_facts['citations'])} Citations")

        self.extracted_facts = processed_facts

        print(f"üèÅ Fact-Processing komplett: {len(processed_facts)} Kategorien")
        return processed_facts

    def _combine_worker_facts(self, category, worker_facts_list):
        """Kombiniert Facts von verschiedenen Workern"""

        if category == "wetter":
            return self._combine_weather_facts(worker_facts_list)
        elif category == "nachrichten":
            return self._combine_news_facts(worker_facts_list)
        elif category == "events":
            return self._combine_events_facts(worker_facts_list)
        elif category == "sport":
            return self._combine_sport_facts(worker_facts_list)
        else:
            return self._combine_generic_facts(worker_facts_list)

    def _combine_weather_facts(self, worker_facts_list):
        """Kombiniert Wetter-Facts von verschiedenen Workern"""
        combined = {
            "temperatures": [],
            "conditions": [],
            "forecasts": [],
            "warnings": [],
            "times": []
        }

        for worker_facts in worker_facts_list:
            weather_data = worker_facts["structured_data"]
            for key in combined.keys():
                if key in weather_data:
                    combined[key].extend(weather_data[key])

        # Deduplizieren
        for key in combined.keys():
            combined[key] = list(set(combined[key]))

        return combined

    def _combine_news_facts(self, worker_facts_list):
        """Kombiniert News-Facts von verschiedenen Workern"""
        combined = {
            "headlines": [],
            "dates": [],
            "locations": [],
            "organizations": [],
            "key_statements": []
        }

        for worker_facts in worker_facts_list:
            news_data = worker_facts["structured_data"]
            for key in combined.keys():
                if key in news_data:
                    combined[key].extend(news_data[key])

        # Deduplizieren und begrenzen
        for key in combined.keys():
            combined[key] = list(set(combined[key]))[:5]  # Maximal 5 pro Typ

        return combined

    def _combine_events_facts(self, worker_facts_list):
        """Kombiniert Events-Facts von verschiedenen Workern"""
        combined = {
            "event_names": [],
            "dates_times": [],
            "locations": [],
            "prices": [],
            "categories": []
        }

        for worker_facts in worker_facts_list:
            events_data = worker_facts["structured_data"]
            for key in combined.keys():
                if key in events_data:
                    combined[key].extend(events_data[key])

        # Deduplizieren
        for key in combined.keys():
            combined[key] = list(set(combined[key]))

        return combined

    def _combine_sport_facts(self, worker_facts_list):
        """Kombiniert Sport-Facts von verschiedenen Workern"""
        combined = {
            "teams": [],
            "scores": [],
            "dates_times": [],
            "leagues": [],
            "results": []
        }

        for worker_facts in worker_facts_list:
            sport_data = worker_facts["structured_data"]
            for key in combined.keys():
                if key in sport_data:
                    combined[key].extend(sport_data[key])

        # Deduplizieren
        for key in combined.keys():
            combined[key] = list(set(combined[key]))

        return combined

    def _combine_generic_facts(self, worker_facts_list):
        """Kombiniert generische Facts"""
        combined = {
            "key_numbers": [],
            "dates": [],
            "locations": [],
            "key_phrases": []
        }

        for worker_facts in worker_facts_list:
            generic_data = worker_facts["structured_data"]
            for key in combined.keys():
                if key in generic_data:
                    combined[key].extend(generic_data[key])

        return combined

    def _calculate_confidence_score(self, worker_facts_list):
        """Berechnet Confidence Score basierend auf Worker-√úbereinstimmung"""

        # Basis-Score: Anzahl Worker
        base_score = min(len(worker_facts_list) * 0.3, 1.0)

        # Bonus f√ºr Citations
        total_citations = sum(len(wf["citations"]) for wf in worker_facts_list)
        citation_bonus = min(total_citations * 0.1, 0.3)

        # Bonus f√ºr strukturierte Daten
        structured_bonus = 0.0
        for worker_facts in worker_facts_list:
            if worker_facts["structured_data"]:
                structured_bonus += 0.2

        structured_bonus = min(structured_bonus, 0.4)

        total_score = min(base_score + citation_bonus + structured_bonus, 1.0)
        return round(total_score, 2)

    # =========================================================================
    # NEWSLETTER CONTENT PREPARATION
    # =========================================================================

    def prepare_content_for_newsletter(self, location, newsletter_style="standard"):
        """
        Bereitet verarbeitete Facts f√ºr Newsletter-Generation vor

        Args:
            location: Ortsname
            newsletter_style: "compact", "standard", "detailed"

        Returns:
            dict: Newsletter-ready Content Structure
        """
        print(f"üì∞ Bereite Content f√ºr {newsletter_style} Newsletter vor")

        if not self.extracted_facts:
            print("  ‚ö†Ô∏è Keine Facts verf√ºgbar - f√ºhre erst process_all_categories() aus")
            return None

        newsletter_content = {
            "location": location,
            "newsletter_style": newsletter_style,
            "timestamp": datetime.now().isoformat(),
            "categories": {},
            "total_facts": 0,
            "total_citations": 0
        }

        for category, facts_data in self.extracted_facts.items():

            # Content-Level basierend auf Style bestimmen
            if newsletter_style == "compact":
                content_level = "minimal"
                max_facts_per_category = 2
                include_citations = False

            elif newsletter_style == "standard":
                content_level = "balanced"
                max_facts_per_category = 5
                include_citations = False

            elif newsletter_style == "detailed":
                content_level = "comprehensive"
                max_facts_per_category = 10
                include_citations = True

            # Category Content strukturieren
            category_content = {
                "category": category,
                "confidence_score": facts_data["confidence_score"],
                "sources_count": facts_data["sources_processed"],
                "content_level": content_level,
                "facts": self._select_best_facts(
                    facts_data["combined_facts"],
                    max_facts_per_category
                ),
                "citations": facts_data["citations"][:3] if include_citations else [],
                "newsletter_text_snippets": self._generate_text_snippets(
                    category,
                    facts_data["combined_facts"],
                    content_level
                )
            }

            newsletter_content["categories"][category] = category_content
            newsletter_content["total_facts"] += len(category_content["facts"])
            newsletter_content["total_citations"] += len(category_content["citations"])

        print(f"  üìä Newsletter Content bereit:")
        print(f"    Style: {newsletter_style}")
        print(f"    Kategorien: {len(newsletter_content['categories'])}")
        print(f"    Total Facts: {newsletter_content['total_facts']}")
        print(f"    Total Citations: {newsletter_content['total_citations']}")

        return newsletter_content

    def _select_best_facts(self, combined_facts, max_facts):
        """W√§hlt die besten/relevantesten Facts aus"""

        best_facts = []

        # Priorisierung nach Fact-Typ und Relevanz
        for fact_type, facts_list in combined_facts.items():
            if not facts_list:
                continue

            # Priorisierung nach Kategorie
            priority_map = {
                "temperatures": 1,
                "event_names": 1,
                "teams": 1,
                "headlines": 1,
                "conditions": 2,
                "scores": 2,
                "dates": 2,
                "key_statements": 2,
                "locations": 3,
                "prices": 3
            }

            priority = priority_map.get(fact_type, 4)

            # Beste Facts aus dieser Kategorie nehmen
            for fact in facts_list[:2]:  # Max 2 pro Fact-Type
                if len(best_facts) < max_facts:
                    best_facts.append({
                        "type": fact_type,
                        "value": fact,
                        "priority": priority
                    })

        # Nach Priorit√§t sortieren
        best_facts.sort(key=lambda x: x["priority"])

        return best_facts[:max_facts]

    def _generate_text_snippets(self, category, combined_facts, content_level):
        """Generiert Text-Snippets f√ºr Newsletter-Integration"""

        snippets = []

        if category == "wetter" and combined_facts.get("temperatures"):
            temp_info = combined_facts["temperatures"][0] if combined_facts["temperatures"] else ""
            conditions = combined_facts.get("conditions", [""])
            condition_info = conditions[0] if conditions else ""

            if content_level == "minimal":
                snippets.append(f"Heute {temp_info}")
            elif content_level == "balanced":
                snippets.append(f"Heute {temp_info} mit {condition_info} Wetter")
            else:
                times = combined_facts.get("times", [])
                time_info = f" {times[0]}" if times else ""
                snippets.append(f"Heute{time_info} {temp_info} mit {condition_info} Bedingungen")

        elif category == "events" and combined_facts.get("event_names"):
            events = combined_facts["event_names"][:2]
            locations = combined_facts.get("locations", [])

            if content_level == "minimal":
                snippets.append(f"Events: {', '.join(events[:1])}")
            elif content_level == "balanced":
                location_info = f" im {locations[0]}" if locations else ""
                snippets.append(f"Events: {', '.join(events)}{location_info}")
            else:
                prices = combined_facts.get("prices", [])
                dates = combined_facts.get("dates_times", [])
                price_info = f" (ab {prices[0]})" if prices else ""
                date_info = f" am {dates[0]}" if dates else ""
                snippets.append(f"Events: {events[0]}{date_info}{price_info}")

        elif category == "sport" and combined_facts.get("teams"):
            teams = combined_facts["teams"][:2]
            scores = combined_facts.get("scores", [])

            if content_level == "minimal":
                snippets.append(f"Sport: {teams[0]}")
            elif content_level == "balanced":
                score_info = f" ({scores[0]})" if scores else ""
                snippets.append(f"Sport: {', '.join(teams)}{score_info}")
            else:
                leagues = combined_facts.get("leagues", [])
                dates = combined_facts.get("dates_times", [])
                league_info = f" in der {leagues[0]}" if leagues else ""
                date_info = f" am {dates[0]}" if dates else ""
                snippets.append(f"{teams[0]}{league_info}{date_info}")

        elif category == "nachrichten" and combined_facts.get("key_statements"):
            statements = combined_facts["key_statements"][:2]

            if content_level == "minimal":
                snippets.append(statements[0][:50] + "..." if statements else "")
            else:
                snippets.extend(statements[:2 if content_level == "balanced" else 3])

        return [s for s in snippets if s]  # Leere Snippets entfernen

    # =========================================================================
    # UTILITY METHODS
    # =========================================================================

    def get_processing_summary(self):
        """Gibt Zusammenfassung der Content-Processing zur√ºck"""

        if not self.extracted_facts:
            return {"status": "no_processing_done"}

        summary = {
            "status": "processed",
            "categories_processed": len(self.extracted_facts),
            "total_sources": sum(f["sources_processed"] for f in self.extracted_facts.values()),
            "average_confidence": sum(f["confidence_score"] for f in self.extracted_facts.values()) / len(self.extracted_facts),
            "total_citations": sum(len(f["citations"]) for f in self.extracted_facts.values()),
            "categories": {}
        }

        for category, facts_data in self.extracted_facts.items():
            summary["categories"][category] = {
                "sources": facts_data["sources_processed"],
                "confidence": facts_data["confidence_score"],
                "citations": len(facts_data["citations"]),
                "fact_types": len(facts_data["combined_facts"])
            }

        return summary

# =============================================================================
# INITIALISIERUNG UND INTEGRATION
# =============================================================================

# Enhanced Content Processor mit Foundation-System initialisieren
foundation_available = all([
    'persistence_manager' in globals() and persistence_manager,
    'config_manager' in globals() and config_manager
])

if foundation_available:
    enhanced_content_processor = EnhancedContentProcessor(
        persistence_manager=persistence_manager,
        config_manager=config_manager
    )
    print("üöÄ Enhanced Content Processor mit Foundation-System bereit")
    print("üìä Integration: Kann alle gesammelten Markdown-Files verarbeiten")
    print("üß† Capabilities: Fact-Extraction + Content-Synthesis + Newsletter-Preparation")

else:
    print("‚ùå Enhanced Content Processor nicht verf√ºgbar - Foundation-System fehlt")
    enhanced_content_processor = None

print("=" * 70)
print("‚úÖ Zelle 5a: Enhanced Content Processor Setup komplett\n")

‚úÖ Enhanced Content Processor initialisiert
üèóÔ∏è Integration: DataPersistenceManager + ContentExtraction
üöÄ Enhanced Content Processor mit Foundation-System bereit
üìä Integration: Kann alle gesammelten Markdown-Files verarbeiten
üß† Capabilities: Fact-Extraction + Content-Synthesis + Newsletter-Preparation
‚úÖ Zelle 5a: Enhanced Content Processor Setup komplett



In [11]:
# =============================================================================
# ZELLE 5a: Simple Content Processor - Einfache Alternative f√ºr Newsletter-Enhancement
# =============================================================================
# @title Simple Content Processor - Direkte Markdown-Integration statt komplexer Regex

import re
from pathlib import Path
from datetime import datetime

class SimpleContentProcessor:
    """
    Simple Content Processor - Einfache Alternative zum Enhanced Content Processor
    - L√§dt Markdown-Files direkt
    - Extrahiert nur wichtige Facts (Zahlen, Daten, URLs)
    - √úbergibt rohen Content an Gemini f√ºr intelligente Verarbeitung
    """

    def __init__(self, persistence_manager):
        """
        Initialisiert Simple Content Processor

        Args:
            persistence_manager: DataPersistenceManager Instance
        """
        self.persistence_manager = persistence_manager
        self.processed_content = {}

        print(f"‚úÖ Simple Content Processor initialisiert")
        print(f"üìä Konzept: Direkte Markdown-Integration + LLM-basierte Fact-Extraction")

    # =========================================================================
    # SIMPLE CONTENT LOADING
    # =========================================================================

    def load_all_content_for_newsletter(self, location, newsletter_style="standard"):
        """
        L√§dt allen Content und bereitet ihn f√ºr Newsletter-Stil vor

        Args:
            location: Ortsname
            newsletter_style: "compact", "standard", "detailed"

        Returns:
            dict: Newsletter-ready Content
        """
        print(f"üìÇ Lade Content f√ºr {newsletter_style} Newsletter")

        # Alle Content-Items aus Registry laden
        all_content = {}
        total_chars = 0

        for content_item in self.persistence_manager.content_registry:
            category = content_item["category"]

            if category not in all_content:
                all_content[category] = {
                    "sources": [],
                    "total_content": "",
                    "key_facts": [],
                    "source_count": 0
                }

            # Content-File lesen
            try:
                filepath = Path(content_item["filepath"])
                if filepath.exists():
                    with open(filepath, 'r', encoding='utf-8') as f:
                        content = f.read()

                    # Source Info
                    source_info = {
                        "source": content_item.get("source", "unknown"),
                        "filename": content_item["filename"],
                        "content": content,
                        "char_count": len(content)
                    }

                    all_content[category]["sources"].append(source_info)
                    all_content[category]["source_count"] += 1
                    total_chars += len(content)

            except Exception as e:
                print(f"  ‚ùå Fehler beim Laden: {content_item['filename']}")
                continue

        # Content f√ºr Newsletter-Stil aufbereiten
        newsletter_content = self._prepare_for_newsletter_style(all_content, newsletter_style)

        print(f"  üìä Kategorien: {len(all_content)}")
        print(f"  üìù Total Content: {total_chars:,} Zeichen")
        print(f"  üé® Style: {newsletter_style}")

        return newsletter_content

    def _prepare_for_newsletter_style(self, all_content, newsletter_style):
        """Bereitet Content f√ºr spezifischen Newsletter-Stil auf"""

        newsletter_content = {
            "style": newsletter_style,
            "timestamp": datetime.now().isoformat(),
            "categories": {},
            "total_sources": 0,
            "content_limits": self._get_content_limits(newsletter_style)
        }

        for category, content_data in all_content.items():

            # Style-spezifische Limits
            limits = newsletter_content["content_limits"]

            # Key Facts extrahieren (einfach!)
            key_facts = self._extract_simple_facts(content_data["sources"])

            # Content zusammenfassen basierend auf Stil
            if newsletter_style == "compact":
                # Nur wichtigste Facts
                prepared_content = self._prepare_compact_content(content_data, key_facts)

            elif newsletter_style == "standard":
                # Facts + erste Abs√§tze von jedem Source
                prepared_content = self._prepare_standard_content(content_data, key_facts)

            elif newsletter_style == "detailed":
                # Alles + URLs + vollst√§ndige Quellen
                prepared_content = self._prepare_detailed_content(content_data, key_facts)

            newsletter_content["categories"][category] = prepared_content
            newsletter_content["total_sources"] += content_data["source_count"]

        return newsletter_content

    def _get_content_limits(self, style):
        """Gibt Style-spezifische Limits zur√ºck"""
        limits = {
            "compact": {
                "max_facts_per_category": 3,
                "max_content_chars": 500,
                "include_sources": False,
                "include_urls": False
            },
            "standard": {
                "max_facts_per_category": 6,
                "max_content_chars": 1500,
                "include_sources": False,
                "include_urls": False
            },
            "detailed": {
                "max_facts_per_category": 12,
                "max_content_chars": 3000,
                "include_sources": True,
                "include_urls": True
            }
        }
        return limits.get(style, limits["standard"])

    # =========================================================================
    # SIMPLE FACT EXTRACTION (statt komplexer Regex)
    # =========================================================================

    def _extract_simple_facts(self, sources):
        """
        Einfache Fact-Extraction - findet Zahlen, Daten, wichtige Keywords

        Args:
            sources: Liste von Source-Content

        Returns:
            list: Einfache Facts (Strings mit Kontext)
        """
        all_facts = []

        for source in sources:
            content = source["content"]

            # Split in Zeilen f√ºr einfachere Verarbeitung
            lines = content.split('\n')

            for line in lines:
                line = line.strip()
                if len(line) < 10:  # Zu kurze Zeilen ignorieren
                    continue

                # Einfache Fact-Patterns (viel simpler als vorher!)
                is_fact = False

                # Zahlen mit Einheiten (Temp, Geld, Prozent, Zeit)
                if re.search(r'\d+[¬∞‚Ç¨%]\w*|\d{1,2}:\d{2}|\d{1,2}\.\d{1,2}\.', line):
                    is_fact = True

                # Datumsangaben
                if re.search(r'heute|morgen|montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonntag', line.lower()):
                    is_fact = True

                # Wichtige Keywords
                important_keywords = [
                    'er√∂ffnet', 'startet', 'beginnt', 'findet statt', 'kostenlos',
                    'Eintritt', 'Tickets', 'Anmeldung', 'Termin', 'Uhr'
                ]
                if any(keyword.lower() in line.lower() for keyword in important_keywords):
                    is_fact = True

                # Kurze, pr√§gnante S√§tze (oft Facts)
                if len(line.split()) <= 15 and ('.' in line or '!' in line):
                    is_fact = True

                if is_fact:
                    all_facts.append({
                        "text": line,
                        "source": source["source"],
                        "char_count": len(line)
                    })

        # Sortiere nach Relevanz (kurze, pr√§gnante Facts zuerst)
        all_facts.sort(key=lambda x: len(x["text"]))

        return all_facts

    def _prepare_compact_content(self, content_data, key_facts):
        """Compact Content: Nur wichtigste Facts"""
        return {
            "category_summary": f"{content_data['source_count']} Quellen verf√ºgbar",
            "key_facts": key_facts[:3],  # Top 3 Facts
            "content_snippets": [],
            "source_info": f"{content_data['source_count']} Sources",
            "urls": []
        }

    def _prepare_standard_content(self, content_data, key_facts):
        """Standard Content: Facts + Content-Snippets"""

        # Erste Abs√§tze von jedem Source
        content_snippets = []
        for source in content_data["sources"]:
            # Ersten sinnvollen Absatz finden
            paragraphs = source["content"].split('\n\n')
            for paragraph in paragraphs:
                if len(paragraph.strip()) > 50:  # Sinnvolle L√§nge
                    snippet = paragraph.strip()[:300]  # Ersten 300 Zeichen
                    if len(paragraph) > 300:
                        snippet += "..."
                    content_snippets.append({
                        "text": snippet,
                        "source": source["source"]
                    })
                    break

        return {
            "category_summary": f"{content_data['source_count']} Quellen analysiert",
            "key_facts": key_facts[:6],  # Top 6 Facts
            "content_snippets": content_snippets,
            "source_info": f"{content_data['source_count']} verschiedene Datenquellen",
            "urls": []
        }

    def _prepare_detailed_content(self, content_data, key_facts):
        """Detailed Content: Alles + URLs + Quellen"""

        # L√§ngere Content-Snippets
        content_snippets = []
        urls = []

        for source in content_data["sources"]:
            # L√§ngere Snippets f√ºr detailed
            paragraphs = source["content"].split('\n\n')
            combined_content = ""

            for paragraph in paragraphs[:3]:  # Erste 3 Abs√§tze
                if len(paragraph.strip()) > 20:
                    combined_content += paragraph.strip() + "\n\n"

            if combined_content:
                snippet = combined_content[:800]  # Erste 800 Zeichen
                if len(combined_content) > 800:
                    snippet += "..."

                content_snippets.append({
                    "text": snippet,
                    "source": source["source"],
                    "filename": source["filename"]
                })

            # URLs extrahieren
            url_matches = re.findall(r'https?://[^\s\)]+', source["content"])
            urls.extend(url_matches[:2])  # Max 2 URLs per source

        return {
            "category_summary": f"{content_data['source_count']} Quellen detailliert analysiert",
            "key_facts": key_facts[:12],  # Alle relevanten Facts
            "content_snippets": content_snippets,
            "source_info": f"{content_data['source_count']} Datenquellen: " + ", ".join([s["source"] for s in content_data["sources"]]),
            "urls": list(set(urls))  # Deduplizierte URLs
        }

    # =========================================================================
    # GEMINI-READY CONTENT FORMATTING
    # =========================================================================

    def format_for_gemini_prompt(self, newsletter_content, location):
        """
        Formatiert Content f√ºr Gemini Newsletter-Prompt

        Args:
            newsletter_content: Vorbereiteter Newsletter Content
            location: Ortsname

        Returns:
            str: Gemini-ready Content-Section f√ºr Prompt
        """
        style = newsletter_content["style"]

        formatted_content = f"""
VERF√úGBARE CONTENT-DATEN F√úR {location.upper()} ({style.upper()} STYLE):
Quellen insgesamt: {newsletter_content['total_sources']}
Kategorien: {len(newsletter_content['categories'])}

"""

        for category, content_data in newsletter_content["categories"].items():
            formatted_content += f"""
=== {category.upper()} ===
{content_data['category_summary']}

Key Facts:"""

            for i, fact in enumerate(content_data["key_facts"], 1):
                formatted_content += f"""
{i}. {fact['text']} (Quelle: {fact['source']})"""

            if content_data["content_snippets"]:
                formatted_content += f"""

Content-Zusammenfassung:"""
                for snippet in content_data["content_snippets"]:
                    formatted_content += f"""
‚Ä¢ {snippet['text'][:200]}... (von {snippet['source']})"""

            if style == "detailed" and content_data.get("urls"):
                formatted_content += f"""

Referenz-URLs:"""
                for url in content_data["urls"][:3]:
                    formatted_content += f"""
- {url}"""

            formatted_content += "\n"

        formatted_content += f"""
ANWEISUNG: Nutze diese konkreten Facts und Content-Snippets f√ºr einen {style} Newsletter!"""

        return formatted_content

    # =========================================================================
    # UTILITY METHODS
    # =========================================================================

    def get_content_summary(self, location):
        """Gibt Simple Content Summary zur√ºck"""

        if location not in self.processed_content:
            return {"status": "no_content_processed"}

        content = self.processed_content[location]

        summary = {
            "location": location,
            "style": content.get("style", "unknown"),
            "categories": len(content.get("categories", {})),
            "total_sources": content.get("total_sources", 0),
            "total_facts": sum(len(cat.get("key_facts", [])) for cat in content.get("categories", {}).values()),
            "processing_method": "simple_direct_content"
        }

        return summary

# =============================================================================
# INITIALISIERUNG
# =============================================================================

# Simple Content Processor initialisieren
if 'persistence_manager' in globals() and persistence_manager:
    simple_content_processor = SimpleContentProcessor(
        persistence_manager=persistence_manager
    )
    print("üöÄ Simple Content Processor bereit")
    print("üìä Methode: Direkte Markdown-Integration statt komplexer Regex")
    print("üéØ Vorteile: Einfach, wartbar, LLM-basierte Intelligenz")

else:
    print("‚ùå Simple Content Processor nicht verf√ºgbar - DataPersistenceManager fehlt")
    simple_content_processor = None

# =============================================================================
# DIREKTER TEST
# =============================================================================

if simple_content_processor:
    print("\nüß™ TESTE SIMPLE CONTENT PROCESSOR")
    print("=" * 50)

    try:
        test_location = "m√ºnchen"

        # Test 1: Content Loading f√ºr verschiedene Styles
        print("üìç Test 1: Content Loading")

        for style in ["compact", "standard", "detailed"]:
            print(f"\n  üé® Style: {style}")
            content = simple_content_processor.load_all_content_for_newsletter(test_location, style)

            if content:
                print(f"    ‚úÖ Kategorien: {len(content['categories'])}")
                print(f"    üìä Sources: {content['total_sources']}")

                # Beispiel einer Kategorie zeigen
                if content['categories']:
                    example_cat = list(content['categories'].keys())[0]
                    cat_data = content['categories'][example_cat]
                    print(f"    üìù {example_cat}: {len(cat_data['key_facts'])} Facts, {len(cat_data['content_snippets'])} Snippets")

        # Test 2: Gemini-Prompt Formatting
        print(f"\nüìç Test 2: Gemini-Prompt Formatting")
        standard_content = simple_content_processor.load_all_content_for_newsletter(test_location, "standard")

        if standard_content:
            prompt_section = simple_content_processor.format_for_gemini_prompt(standard_content, test_location)
            print(f"    ‚úÖ Prompt-Section generiert: {len(prompt_section)} Zeichen")
            print(f"    üëÄ Preview:")
            preview = prompt_section[:300] + "..." if len(prompt_section) > 300 else prompt_section
            print(f"    {preview}")

        print(f"\n‚úÖ Simple Content Processor Tests erfolgreich!")
        print(f"üí° Viel einfacher als Enhanced Content Processor!")

    except Exception as test_error:
        print(f"‚ùå Test Fehler: {test_error}")
        import traceback
        print(f"üîç Traceback: {traceback.format_exc()}")

print("=" * 50)
print("‚úÖ Simple Content Processor Setup komplett\n")

‚úÖ Simple Content Processor initialisiert
üìä Konzept: Direkte Markdown-Integration + LLM-basierte Fact-Extraction
üöÄ Simple Content Processor bereit
üìä Methode: Direkte Markdown-Integration statt komplexer Regex
üéØ Vorteile: Einfach, wartbar, LLM-basierte Intelligenz

üß™ TESTE SIMPLE CONTENT PROCESSOR
üìç Test 1: Content Loading

  üé® Style: compact
üìÇ Lade Content f√ºr compact Newsletter
  üìä Kategorien: 6
  üìù Total Content: 22,821 Zeichen
  üé® Style: compact
    ‚úÖ Kategorien: 6
    üìä Sources: 7
    üìù wetter: 3 Facts, 0 Snippets

  üé® Style: standard
üìÇ Lade Content f√ºr standard Newsletter
  üìä Kategorien: 6
  üìù Total Content: 22,821 Zeichen
  üé® Style: standard
    ‚úÖ Kategorien: 6
    üìä Sources: 7
    üìù wetter: 6 Facts, 2 Snippets

  üé® Style: detailed
üìÇ Lade Content f√ºr detailed Newsletter
  üìä Kategorien: 6
  üìù Total Content: 22,821 Zeichen
  üé® Style: detailed
    ‚úÖ Kategorien: 6
    üìä Sources: 7
    üìù wetter:

In [12]:
# =============================================================================
# ZELLE 5: Gemini Worker V2 - Enhanced mit Content Processor Integration
# =============================================================================
# @title Gemini Worker V2 Enhanced - Nutzt Enhanced Content Processor f√ºr detaillierte Newsletter

import google.generativeai as genai

class GeminiWorkerV2Enhanced:
    """
    Gemini Worker V2 Enhanced - Vollst√§ndig integriert mit Enhanced Content Processor
    - ConfigManager f√ºr Template-basierte Newsletter-Generation
    - TimeContextManager f√ºr zeitlichen Kontext in Newslettern
    - DataPersistenceManager f√ºr Newsletter-Archivierung
    - Enhanced Content Processor f√ºr strukturierte Fact-Integration
    - Google Gemini 2.0 Flash f√ºr intelligente Multi-Level Content-Synthese
    """

    def __init__(self, api_key, config_manager=None, time_manager=None, persistence_manager=None, content_processor=None):
        """
        Initialisiert Gemini Worker V2 Enhanced

        Args:
            api_key: Google Gemini API Key
            config_manager: ConfigManager Instance
            time_manager: TimeContextManager Instance
            persistence_manager: DataPersistenceManager Instance
            content_processor: Enhanced Content Processor Instance
        """
        genai.configure(api_key=api_key)

        # Foundation-System Integration
        self.config_manager = config_manager
        self.time_manager = time_manager
        self.persistence_manager = persistence_manager
        self.content_processor = content_processor  # NEW: Enhanced Content Processor

        # Legacy Support falls Foundation nicht verf√ºgbar
        self.has_foundation = all([config_manager, time_manager, persistence_manager])
        self.has_enhanced_content = content_processor is not None

        # Worker State
        self.generated_newsletters = []
        self.content_synthesis_history = []

        print(f"‚úÖ Gemini Worker V2 Enhanced initialisiert")
        print(f"üèóÔ∏è Foundation Integration: {'‚úÖ' if self.has_foundation else '‚ùå'}")
        print(f"üß† Enhanced Content Processor: {'‚úÖ' if self.has_enhanced_content else '‚ùå'}")

        if self.has_foundation:
            # API-Config aus ConfigManager holen
            self.api_config = self.config_manager.get_api_config("gemini")
            self.model_name = self.api_config.get("model", "gemini-2.0-flash-exp")
            self.model = genai.GenerativeModel(self.model_name)

            print(f"‚öôÔ∏è API Config geladen: Model {self.model_name}")
            print(f"üéõÔ∏è Generation Config: temperature={self.api_config.get('temperature', 0.3)}")
        else:
            self.model_name = "gemini-2.0-flash-exp"
            self.model = genai.GenerativeModel(self.model_name)
            print("‚ö†Ô∏è L√§uft im Legacy-Modus ohne Foundation-System")

    # =========================================================================
    # ENHANCED CONTENT COLLECTION & SYNTHESIS
    # =========================================================================

    def collect_enhanced_content_for_location(self, location):
        """
        Sammelt und verarbeitet Content mit Enhanced Content Processor

        Args:
            location: Ortsname

        Returns:
            dict: Enhanced Content-Daten mit strukturierten Facts
        """
        print(f"üß† Sammle Enhanced Content f√ºr {location}")

        enhanced_content = {
            "location": location,
            "timestamp": datetime.now().isoformat(),
            "processing_method": "enhanced" if self.has_enhanced_content else "legacy",
            "content_summary": {}
        }

        if self.has_enhanced_content:
            # Enhanced Content Processing
            print("  üìä Verwende Enhanced Content Processor")

            # 1. Alle Categories mit strukturierten Facts verarbeiten
            processed_facts = self.content_processor.process_all_categories(location)
            enhanced_content["processed_facts"] = processed_facts

            # 2. Processing Summary
            processing_summary = self.content_processor.get_processing_summary()
            enhanced_content["processing_summary"] = processing_summary

            print(f"  ‚úÖ Enhanced Processing komplett:")
            print(f"    Kategorien: {processing_summary['categories_processed']}")
            print(f"    Sources: {processing_summary['total_sources']}")
            print(f"    Avg Confidence: {processing_summary['average_confidence']:.2f}")
            print(f"    Citations: {processing_summary['total_citations']}")

        else:
            # Legacy Content Collection (Fallback)
            print("  ‚ö†Ô∏è Fallback: Legacy Content Collection")
            legacy_content = self.collect_all_content_for_location_legacy(location)
            enhanced_content.update(legacy_content)

        return enhanced_content

    def collect_all_content_for_location_legacy(self, location):
        """
        Legacy Content Collection (Fallback wenn Enhanced Content Processor fehlt)
        """
        print(f"üìä Sammle Content f√ºr {location} von allen Workern (Legacy)")

        collected_content = {
            "location": location,
            "timestamp": datetime.now().isoformat(),
            "firecrawl_content": [],
            "claude_content": [],
            "perplexity_content": [],
            "total_sources": 0,
            "categories_covered": set(),
            "content_summary": {}
        }

        # Firecrawl Content sammeln
        if 'firecrawl_worker_v2' in globals() and firecrawl_worker_v2:
            firecrawl_summary = firecrawl_worker_v2.get_newsletter_summary_for_location(location)
            collected_content["firecrawl_content"] = firecrawl_summary
            collected_content["categories_covered"].update(
                [item["category"] for item in firecrawl_summary.get("search_results", [])] +
                [item["category"] for item in firecrawl_summary.get("scrape_results", [])]
            )

            total_firecrawl_items = (
                len(firecrawl_summary.get("search_results", [])) +
                len(firecrawl_summary.get("scrape_results", []))
            )
            collected_content["total_sources"] += total_firecrawl_items

        # Claude Content sammeln
        if 'claude_worker_v2' in globals() and claude_worker_v2:
            claude_summary = claude_worker_v2.get_newsletter_summary_for_location(location)
            collected_content["claude_content"] = claude_summary
            collected_content["categories_covered"].update(claude_summary.get("categories_processed", []))
            collected_content["total_sources"] += claude_summary.get("total_search_results", 0)

        # Perplexity Content sammeln
        if 'perplexity_worker_v2' in globals() and perplexity_worker_v2:
            perplexity_summary = perplexity_worker_v2.get_newsletter_summary_for_location(location)
            collected_content["perplexity_content"] = perplexity_summary
            collected_content["categories_covered"].update(perplexity_summary.get("categories_processed", []))
            collected_content["total_sources"] += perplexity_summary.get("total_search_results", 0)

        collected_content["categories_covered"] = list(collected_content["categories_covered"])
        return collected_content

    # =========================================================================
    # ENHANCED NEWSLETTER GENERATION
    # =========================================================================

    def generate_newsletter_enhanced(self, location, categories=None, newsletter_style="standard"):
        """
        Enhanced Newsletter Generation mit strukturierten Facts

        Args:
            location: Ortsname
            categories: Kategorien-Liste (optional, aus Config falls None)
            newsletter_style: "compact", "standard", "detailed"

        Returns:
            dict: Enhanced Newsletter mit Metadaten
        """
        if not self.has_foundation:
            print("‚ùå Foundation-System erforderlich f√ºr Enhanced Newsletter")
            return None

        try:
            # 1. Kategorien bestimmen
            if categories is None:
                categories = self.config_manager.get_newsletter_categories("high")
                categories.extend(self.config_manager.get_newsletter_categories("medium"))

            # 2. Enhanced Content sammeln und verarbeiten
            enhanced_content = self.collect_enhanced_content_for_location(location)

            # 3. Newsletter-ready Content vorbereiten
            if self.has_enhanced_content:
                newsletter_content = self.content_processor.prepare_content_for_newsletter(
                    location, newsletter_style
                )
            else:
                newsletter_content = self._prepare_legacy_content(enhanced_content, newsletter_style)

            # 4. Zeit-Kontext f√ºr Newsletter
            current_time_info = self.time_manager.get_current_time_info()
            newsletter_date = self.time_manager.format_for_newsletter("heute")

            # 5. Enhanced Newsletter-Prompt erstellen
            newsletter_prompt = self._create_enhanced_newsletter_prompt(
                location, categories, newsletter_content, newsletter_date, newsletter_style
            )

            # 6. Query Log speichern
            query_id = self.persistence_manager.save_query_log(
                category="enhanced_newsletter_generation",
                api="gemini_2_0_flash_enhanced",
                original_query=f"Enhanced Newsletter f√ºr {location} ({newsletter_style})",
                enhanced_query=newsletter_prompt[:300] + "...",
                time_context=current_time_info
            )

            print(f"üì∞ Generiere Enhanced Newsletter f√ºr {location}")
            print(f"üìã Kategorien: {len(categories)}")
            print(f"üé® Style: {newsletter_style}")
            print(f"üìÖ Datum: {newsletter_date}")

            if newsletter_content:
                print(f"üìä Facts: {newsletter_content.get('total_facts', 0)}")
                print(f"üìö Citations: {newsletter_content.get('total_citations', 0)}")

            # 7. Gemini API Call mit erweiterten Parametern
            generation_config = genai.GenerationConfig(
                temperature=self._get_temperature_for_style(newsletter_style),
                max_output_tokens=self._get_max_tokens_for_style(newsletter_style)
            )

            response = self.model.generate_content(
                newsletter_prompt,
                generation_config=generation_config
            )

            # 8. Raw Response speichern
            raw_filename = self.persistence_manager.save_raw_response(
                source="gemini_enhanced_newsletter",
                response=response,
                query=newsletter_prompt[:500] + "...",
                category="enhanced_newsletter",
                metadata={
                    "query_id": query_id,
                    "location": location,
                    "categories": categories,
                    "newsletter_style": newsletter_style,
                    "enhanced_processing": self.has_enhanced_content,
                    "facts_count": newsletter_content.get('total_facts', 0) if newsletter_content else 0,
                    "citations_count": newsletter_content.get('total_citations', 0) if newsletter_content else 0,
                    "model": self.model_name
                }
            )

            # 9. Response verarbeiten
            if response and response.text:
                newsletter_result = {
                    "location": location,
                    "categories": categories,
                    "newsletter_content": response.text,
                    "newsletter_style": newsletter_style,
                    "newsletter_date": newsletter_date,
                    "query_id": query_id,
                    "raw_filename": raw_filename,
                    "timestamp": datetime.now().isoformat(),
                    "enhanced_processing": self.has_enhanced_content,
                    "facts_integrated": newsletter_content.get('total_facts', 0) if newsletter_content else 0,
                    "citations_integrated": newsletter_content.get('total_citations', 0) if newsletter_content else 0,
                    "word_count": len(response.text.split()),
                    "char_count": len(response.text),
                    "processing_summary": enhanced_content.get("processing_summary", {})
                }

                self.generated_newsletters.append(newsletter_result)

                # 10. Newsletter als Markdown speichern
                newsletter_filename = self.persistence_manager.save_final_newsletter(
                    newsletter_content=response.text,
                    format="markdown",
                    metadata={
                        "query_id": query_id,
                        "location": location,
                        "categories": categories,
                        "newsletter_style": newsletter_style,
                        "enhanced_processing": self.has_enhanced_content,
                        "facts_count": newsletter_result["facts_integrated"],
                        "citations_count": newsletter_result["citations_integrated"],
                        "word_count": newsletter_result["word_count"],
                        "generation_timestamp": newsletter_result["timestamp"]
                    }
                )

                newsletter_result["newsletter_filename"] = newsletter_filename

                print(f"‚úÖ Enhanced Newsletter erfolgreich generiert!")
                print(f"üìù W√∂rter: {newsletter_result['word_count']}")
                print(f"üìÑ Zeichen: {newsletter_result['char_count']}")
                print(f"üß† Facts integriert: {newsletter_result['facts_integrated']}")
                print(f"üìö Citations: {newsletter_result['citations_integrated']}")
                print(f"üìÅ Gespeichert: {newsletter_filename}")

                return newsletter_result
            else:
                print("‚ùå Keine Enhanced Newsletter-Response von Gemini erhalten")
                return None

        except Exception as e:
            print(f"‚ùå Enhanced Newsletter-Generierung Fehler: {e}")
            import traceback
            print(f"üîç Traceback: {traceback.format_exc()}")

            # Fehler in Persistence speichern
            if self.persistence_manager:
                error_log = {
                    "error": str(e),
                    "location": location,
                    "categories": categories,
                    "newsletter_style": newsletter_style,
                    "enhanced_processing": self.has_enhanced_content
                }
                self.persistence_manager.save_raw_response(
                    source="gemini_enhanced_newsletter_error",
                    response=error_log,
                    category="enhanced_newsletter"
                )

            return None

    def _create_enhanced_newsletter_prompt(self, location, categories, newsletter_content, newsletter_date, style):
        """Erstellt Enhanced Newsletter-Generation Prompt mit strukturierten Facts"""

        base_prompt = f"""Du bist ein erfahrener Newsletter-Redakteur f√ºr lokale Nachrichten in Deutschland.

Erstelle einen professionellen lokalen Newsletter f√ºr {location} vom {newsletter_date}.

NEWSLETTER-STIL: {style}

STIL-SPEZIFIKATIONEN:
"""

        if style == "compact":
            base_prompt += """- COMPACT: Kurze, pr√§gnante Informationen
- Maximal 300 W√∂rter
- 1-2 S√§tze pro Kategorie-Sektion
- Keine Quellenangaben
- Fokus auf wichtigste Informationen"""

        elif style == "standard":
            base_prompt += """- STANDARD: Ausgewogene, informative Berichterstattung
- 400-600 W√∂rter
- 2-4 S√§tze pro Kategorie-Sektion
- Konkrete Fakten und Details einbauen
- Keine Quellenangaben
- Lokaler Fokus mit spezifischen Informationen"""

        elif style == "detailed":
            base_prompt += """- DETAILED: Umfassende, faktenbasierte Berichterstattung
- 600-1000 W√∂rter
- 3-6 S√§tze pro Kategorie-Sektion
- Alle verf√ºgbaren konkreten Fakten integrieren
- Quellenangaben in eckigen Klammern [Quelle: ...]
- Spezifische Daten, Zeiten, Namen, Zahlen verwenden"""

        # Enhanced Content Integration
        if newsletter_content and self.has_enhanced_content:
            base_prompt += f"""

VERF√úGBARE STRUKTURIERTE FAKTEN ({newsletter_content.get('total_facts', 0)} Facts):
"""
            for category, content_data in newsletter_content.get('categories', {}).items():
                if content_data['facts']:
                    base_prompt += f"""
{category.upper()}:
- Confidence Score: {content_data['confidence_score']:.2f}
- Quellen: {content_data['sources_count']} verschiedene APIs"""

                    # Facts f√ºr diese Kategorie
                    for fact in content_data['facts'][:5]:  # Top 5 Facts
                        base_prompt += f"""
  ‚Ä¢ {fact['type']}: {fact['value']}"""

                    # Text-Snippets
                    if content_data['newsletter_text_snippets']:
                        base_prompt += f"""
- Vorgenerierte Textbausteine: {' | '.join(content_data['newsletter_text_snippets'])}"""

                    # Citations f√ºr detailed Style
                    if style == "detailed" and content_data['citations']:
                        base_prompt += f"""
- Verf√ºgbare Quellen ({len(content_data['citations'])}):"""
                        for citation in content_data['citations'][:3]:
                            if citation.get('source'):
                                base_prompt += f"""
  [Quelle: {citation['source'][:50]}...]"""

        else:
            # Legacy Content (fallback)
            base_prompt += f"""
HINWEIS: Verwende allgemeine lokale Informationen f√ºr {location}."""

        base_prompt += f"""

STRUKTUR-ANFORDERUNGEN:
1. **Titel**: "{location} Newsletter - {newsletter_date}"
2. **Kurze Begr√º√üung** (1-2 S√§tze)
3. **Kategorien-Sections** (nur die verf√ºgbaren):
   - Nachrichten
   - Wetter
   - Events
   - Sport
   - (weitere falls verf√ºgbar)
4. **Freundlicher Abschluss** (1 Satz)

CONTENT-RICHTLINUNGEN:
‚úÖ Verwende die bereitgestellten strukturierten Fakten
‚úÖ Integriere konkrete Zahlen, Daten, Namen aus den Facts
‚úÖ Deutscher, lokaler, informativer Ton
‚úÖ Zeitgem√§√üe Informationen mit Fokus auf {newsletter_date}
"""

        if style == "detailed":
            base_prompt += """‚úÖ F√ºge Quellenangaben in eckigen Klammern hinzu
‚úÖ Nutze alle verf√ºgbaren konkreten Details"""

        base_prompt += f"""

WICHTIG: Erstelle jetzt einen {style} Newsletter, der die strukturierten Fakten optimal nutzt!"""

        return base_prompt

    def _get_temperature_for_style(self, style):
        """Gibt Style-spezifische Temperature zur√ºck"""
        temperature_map = {
            "compact": 0.2,    # Pr√§zise, faktisch
            "standard": 0.3,   # Ausgewogen
            "detailed": 0.4    # Etwas kreativer f√ºr l√§ngere Texte
        }
        return temperature_map.get(style, 0.3)

    def _get_max_tokens_for_style(self, style):
        """Gibt Style-spezifische Max Tokens zur√ºck"""
        token_map = {
            "compact": 800,     # ~300 W√∂rter
            "standard": 1600,   # ~600 W√∂rter
            "detailed": 2500    # ~1000 W√∂rter
        }
        return token_map.get(style, 1600)

    def _prepare_legacy_content(self, enhanced_content, newsletter_style):
        """Fallback Content Preparation ohne Enhanced Content Processor"""

        legacy_content = {
            "location": enhanced_content["location"],
            "newsletter_style": newsletter_style,
            "timestamp": enhanced_content["timestamp"],
            "categories": {},
            "total_facts": 0,
            "total_citations": 0,
            "processing_method": "legacy"
        }

        # Vereinfachte Content-Struktur f√ºr Legacy-Mode
        for category in enhanced_content.get("categories_covered", []):
            legacy_content["categories"][category] = {
                "category": category,
                "content_level": "basic",
                "sources_available": True,
                "facts": [],
                "citations": []
            }

        return legacy_content

    # =========================================================================
    # BATCH NEWSLETTER GENERATION
    # =========================================================================

    def generate_all_newsletter_styles(self, location, categories=None):
        """
        Generiert alle 3 Newsletter-Styles f√ºr Vergleich

        Args:
            location: Ortsname
            categories: Kategorien-Liste (optional)

        Returns:
            dict: Alle 3 Newsletter-Versionen
        """
        print(f"üì∞ Generiere alle Newsletter-Styles f√ºr {location}")

        styles = ["compact", "standard", "detailed"]
        all_newsletters = {}

        for style in styles:
            print(f"\n  üé® Generiere {style.upper()} Newsletter...")
            newsletter = self.generate_newsletter_enhanced(location, categories, style)

            if newsletter:
                all_newsletters[style] = newsletter
                print(f"    ‚úÖ {style}: {newsletter['word_count']} W√∂rter, {newsletter.get('facts_integrated', 0)} Facts")
            else:
                print(f"    ‚ùå {style}: Generierung fehlgeschlagen")
                all_newsletters[style] = None

        # Vergleichs-Summary
        comparison_summary = {
            "location": location,
            "timestamp": datetime.now().isoformat(),
            "styles_generated": len([n for n in all_newsletters.values() if n]),
            "word_counts": {style: nl["word_count"] if nl else 0 for style, nl in all_newsletters.items()},
            "facts_counts": {style: nl.get("facts_integrated", 0) if nl else 0 for style, nl in all_newsletters.items()},
            "newsletters": all_newsletters
        }

        print(f"\nüìä VERGLEICH ALLER STYLES:")
        for style in styles:
            if all_newsletters[style]:
                wc = all_newsletters[style]["word_count"]
                fc = all_newsletters[style].get("facts_integrated", 0)
                print(f"  {style.upper()}: {wc} W√∂rter, {fc} Facts")
            else:
                print(f"  {style.upper()}: ‚ùå Fehler")

        return comparison_summary

    # =========================================================================
    # LEGACY COMPATIBILITY
    # =========================================================================

    def generate_newsletter_with_foundation(self, location, categories=None, newsletter_style="standard"):
        """
        Legacy Method - ruft Enhanced Version auf f√ºr R√ºckw√§rtskompatibilit√§t
        """
        print("‚ö†Ô∏è Legacy Method aufgerufen - verwende Enhanced Version")
        return self.generate_newsletter_enhanced(location, categories, newsletter_style)

    def enhance_newsletter_with_details(self, newsletter_result, enhancement_type="citations"):
        """
        Erweitert generierten Newsletter um zus√§tzliche Details (Enhanced Version)
        """
        if not newsletter_result:
            return None

        enhanced_content = newsletter_result["newsletter_content"]

        if enhancement_type == "metadata":
            # Enhanced Newsletter-Metadaten hinzuf√ºgen
            metadata_section = f"""

---

## Newsletter-Informationen

**Generiert:** {newsletter_result['timestamp']}
**Ort:** {newsletter_result['location']}
**Datum:** {newsletter_result['newsletter_date']}
**Stil:** {newsletter_result['newsletter_style']}
**Enhanced Processing:** {'‚úÖ' if newsletter_result.get('enhanced_processing') else '‚ùå'}
**Facts integriert:** {newsletter_result.get('facts_integrated', 0)}
**Citations:** {newsletter_result.get('citations_integrated', 0)}
**Umfang:** {newsletter_result['word_count']} W√∂rter, {newsletter_result['char_count']} Zeichen

*Dieser Newsletter wurde automatisch mit Enhanced Content Processing generiert.*
"""
            enhanced_content += metadata_section

        elif enhancement_type == "processing_summary":
            # Processing Summary hinzuf√ºgen
            if newsletter_result.get("processing_summary"):
                ps = newsletter_result["processing_summary"]
                summary_section = f"""

---

## Content Processing Summary

**Kategorien verarbeitet:** {ps.get('categories_processed', 0)}
**Quellen analysiert:** {ps.get('total_sources', 0)}
**Durchschnittliche Confidence:** {ps.get('average_confidence', 0):.2f}
**Citations extrahiert:** {ps.get('total_citations', 0)}

### Kategorien-Details:
"""
                for category, details in ps.get('categories', {}).items():
                    summary_section += f"- **{category.title()}**: {details['sources']} Quellen, {details['confidence']:.2f} Confidence\n"

                enhanced_content += summary_section

        return enhanced_content

    # =========================================================================
    # UTILITY METHODS
    # =========================================================================

    def get_enhanced_newsletter_summary(self, location=None):
        """Gibt Enhanced Newsletter Summary zur√ºck"""

        if location:
            filtered_newsletters = [
                n for n in self.generated_newsletters
                if n["location"].lower() == location.lower()
            ]
        else:
            filtered_newsletters = self.generated_newsletters

        summary = {
            "total_newsletters": len(filtered_newsletters),
            "enhanced_processing_count": len([n for n in filtered_newsletters if n.get("enhanced_processing")]),
            "locations": list(set(n["location"] for n in filtered_newsletters)),
            "newsletter_styles": list(set(n["newsletter_style"] for n in filtered_newsletters)),
            "total_words": sum(n["word_count"] for n in filtered_newsletters),
            "total_facts_integrated": sum(n.get("facts_integrated", 0) for n in filtered_newsletters),
            "total_citations_integrated": sum(n.get("citations_integrated", 0) for n in filtered_newsletters),
            "average_facts_per_newsletter": sum(n.get("facts_integrated", 0) for n in filtered_newsletters) / max(len(filtered_newsletters), 1),
            "newsletters": filtered_newsletters
        }

        return summary

# =============================================================================
# INITIALISIERUNG MIT ENHANCED CONTENT PROCESSOR
# =============================================================================

# Gemini Worker V2 Enhanced mit Enhanced Content Processor initialisieren
if api_config.gemini_key:
    # Foundation-System Integration
    foundation_available = all([
        'config_manager' in globals() and config_manager,
        'time_manager' in globals() and time_manager,
        'persistence_manager' in globals() and persistence_manager
    ])

    enhanced_content_available = 'enhanced_content_processor' in globals() and enhanced_content_processor

    if foundation_available:
        gemini_worker_v2_enhanced = GeminiWorkerV2Enhanced(
            api_key=api_config.gemini_key,
            config_manager=config_manager,
            time_manager=time_manager,
            persistence_manager=persistence_manager,
            content_processor=enhanced_content_processor if enhanced_content_available else None
        )
        print("üöÄ Gemini Worker V2 Enhanced mit Foundation-System bereit")

        if enhanced_content_available:
            print("üß† Enhanced Content Processor integriert - strukturierte Facts verf√ºgbar")
        else:
            print("‚ö†Ô∏è Enhanced Content Processor fehlt - l√§uft im Legacy-Modus")

    else:
        # Fallback ohne Foundation
        gemini_worker_v2_enhanced = GeminiWorkerV2Enhanced(api_key=api_config.gemini_key)
        print("‚ö†Ô∏è Gemini Worker V2 Enhanced im Legacy-Modus (Foundation-System fehlt)")

else:
    print("‚ùå Gemini Worker V2 Enhanced nicht verf√ºgbar - API Key fehlt")
    gemini_worker_v2_enhanced = None

# =============================================================================
# DIREKTER TEST - Enhanced Newsletter Generation
# =============================================================================

if gemini_worker_v2_enhanced and foundation_available:
    print("\nüß™ TESTE GEMINI WORKER V2 ENHANCED")
    print("=" * 65)

    try:
        test_location = "m√ºnchen"

        # Test 1: Enhanced Content Collection
        print("üìç Test 1: Enhanced Content Collection")
        enhanced_content = gemini_worker_v2_enhanced.collect_enhanced_content_for_location(test_location)

        processing_method = enhanced_content.get("processing_method", "unknown")
        print(f"  üß† Processing Method: {processing_method}")

        if processing_method == "enhanced":
            ps = enhanced_content.get("processing_summary", {})
            print(f"  üìä Kategorien: {ps.get('categories_processed', 0)}")
            print(f"  üìÅ Sources: {ps.get('total_sources', 0)}")
            print(f"  ‚≠ê Avg Confidence: {ps.get('average_confidence', 0):.2f}")
            print(f"  üìö Citations: {ps.get('total_citations', 0)}")

        # Test 2: Enhanced Newsletter Generation (Standard Style)
        print(f"\nüìç Test 2: Enhanced Newsletter (Standard)")
        standard_newsletter = gemini_worker_v2_enhanced.generate_newsletter_enhanced(
            location=test_location,
            categories=["wetter", "nachrichten", "events", "sport"],
            newsletter_style="standard"
        )

        if standard_newsletter:
            print(f"  ‚úÖ Standard Newsletter generiert!")
            print(f"  üìù W√∂rter: {standard_newsletter['word_count']}")
            print(f"  üß† Facts: {standard_newsletter.get('facts_integrated', 0)}")
            print(f"  üìö Citations: {standard_newsletter.get('citations_integrated', 0)}")
            print(f"  üìÅ File: {standard_newsletter.get('newsletter_filename', 'N/A')}")

        # Test 3: Batch Generation (alle 3 Styles)
        print(f"\nüìç Test 3: Alle Newsletter-Styles")
        all_styles_result = gemini_worker_v2_enhanced.generate_all_newsletter_styles(
            location=test_location,
            categories=["wetter", "nachrichten", "events"]
        )

        if all_styles_result:
            print(f"  üìä Styles generiert: {all_styles_result['styles_generated']}/3")
            print(f"  üìù Word Counts: {all_styles_result['word_counts']}")
            print(f"  üß† Facts Counts: {all_styles_result['facts_counts']}")

        # Test 4: Enhanced Summary
        print(f"\nüìç Test 4: Enhanced Newsletter Summary")
        summary = gemini_worker_v2_enhanced.get_enhanced_newsletter_summary(test_location)

        print(f"  üìä Total Newsletter: {summary['total_newsletters']}")
        print(f"  üß† Enhanced Processing: {summary['enhanced_processing_count']}")
        print(f"  üìù Total Words: {summary['total_words']}")
        print(f"  üéØ Avg Facts/Newsletter: {summary['average_facts_per_newsletter']:.1f}")
        print(f"  üìö Total Citations: {summary['total_citations_integrated']}")

        print(f"\n‚úÖ Alle Gemini Worker V2 Enhanced Tests erfolgreich!")

    except Exception as test_error:
        print(f"‚ùå Test Fehler: {test_error}")
        import traceback
        print(f"üîç Traceback: {traceback.format_exc()}")

print("=" * 65)
print("‚úÖ Zelle 5: Gemini Worker V2 Enhanced Setup komplett\n")

‚úÖ Gemini Worker V2 Enhanced initialisiert
üèóÔ∏è Foundation Integration: ‚úÖ
üß† Enhanced Content Processor: ‚úÖ
‚öôÔ∏è API Config geladen: Model gemini-2.0-flash-exp
üéõÔ∏è Generation Config: temperature=0.3
üöÄ Gemini Worker V2 Enhanced mit Foundation-System bereit
üß† Enhanced Content Processor integriert - strukturierte Facts verf√ºgbar

üß™ TESTE GEMINI WORKER V2 ENHANCED
üìç Test 1: Enhanced Content Collection
üß† Sammle Enhanced Content f√ºr m√ºnchen
  üìä Verwende Enhanced Content Processor
üß† Verarbeite alle Category-Facts f√ºr m√ºnchen
üìÇ Lade alle Category-Contents f√ºr m√ºnchen
  üìä Kategorien geladen: 6
    wetter: 2 Content-Items
    rathaus: 1 Content-Items
    events: 1 Content-Items
    nachrichten: 1 Content-Items
    verkehr: 1 Content-Items
    sport: 1 Content-Items
  üéØ Verarbeite wetter: 2 Sources
    ‚úÖ wetter: 1.00 Confidence, 3 Citations
  üéØ Verarbeite rathaus: 1 Sources
    ‚úÖ rathaus: 0.80 Confidence, 16 Citations
  üéØ Verarbeite ev

In [13]:
# =============================================================================
# ZELLE 5b: Simple Gemini Worker - Einfache Newsletter Generation mit Simple Content Processor
# =============================================================================
# @title Simple Gemini Worker - Wartbar, einfach, effektiv

import google.generativeai as genai
from datetime import datetime

class SimpleGeminiWorker:
    """
    Simple Gemini Worker - Einfache Newsletter-Generation
    - Nutzt Simple Content Processor f√ºr Fact-Integration
    - Direkte Markdown-Content √úbergabe an Gemini
    - Fokus auf Einfachheit und Wartbarkeit
    - 3 Newsletter-Levels: compact/standard/detailed
    """

    def __init__(self, api_key, config_manager=None, time_manager=None, persistence_manager=None, simple_content_processor=None):
        """
        Initialisiert Simple Gemini Worker

        Args:
            api_key: Google Gemini API Key
            config_manager: ConfigManager Instance (optional)
            time_manager: TimeContextManager Instance (optional)
            persistence_manager: DataPersistenceManager Instance (optional)
            simple_content_processor: Simple Content Processor Instance (optional)
        """
        genai.configure(api_key=api_key)

        # Foundation-System Integration (optional)
        self.config_manager = config_manager
        self.time_manager = time_manager
        self.persistence_manager = persistence_manager
        self.simple_content_processor = simple_content_processor

        # Availability Checks
        self.has_foundation = all([config_manager, time_manager, persistence_manager])
        self.has_simple_content = simple_content_processor is not None

        # Gemini Model
        self.model_name = "gemini-2.0-flash-exp"
        self.model = genai.GenerativeModel(self.model_name)

        # Worker State
        self.generated_newsletters = []

        print(f"‚úÖ Simple Gemini Worker initialisiert")
        print(f"üèóÔ∏è Foundation System: {'‚úÖ' if self.has_foundation else '‚ùå'}")
        print(f"üß† Simple Content Processor: {'‚úÖ' if self.has_simple_content else '‚ùå'}")
        print(f"ü§ñ Model: {self.model_name}")

    # =========================================================================
    # SIMPLE NEWSLETTER GENERATION
    # =========================================================================

    def generate_simple_newsletter(self, location, categories=None, newsletter_style="standard"):
        """
        Einfache Newsletter-Generation mit Simple Content Processor

        Args:
            location: Ortsname
            categories: Kategorien-Liste (optional, wird ignoriert - alle verf√ºgbaren verwendet)
            newsletter_style: "compact", "standard", "detailed"

        Returns:
            dict: Generated Newsletter mit Metadaten
        """
        print(f"üì∞ Generiere {newsletter_style.upper()} Newsletter f√ºr {location}")

        try:
            # 1. Zeit-Kontext bestimmen
            if self.time_manager:
                newsletter_date = self.time_manager.format_for_newsletter("heute")
                current_time_info = self.time_manager.get_current_time_info()
            else:
                newsletter_date = datetime.now().strftime("%d.%m.%Y")
                current_time_info = {"date": newsletter_date}

            # 2. Content mit Simple Content Processor laden
            if self.has_simple_content:
                newsletter_content = self.simple_content_processor.load_all_content_for_newsletter(
                    location, newsletter_style
                )
                content_section = self.simple_content_processor.format_for_gemini_prompt(
                    newsletter_content, location
                )

                total_sources = newsletter_content.get("total_sources", 0)
                total_facts = sum(len(cat.get("key_facts", [])) for cat in newsletter_content.get("categories", {}).values())

                print(f"  üìä Content geladen: {total_sources} Sources, {total_facts} Facts")

            else:
                # Fallback ohne Content Processor
                content_section = f"Erstelle Newsletter f√ºr {location} basierend auf allgemeinen lokalen Informationen."
                total_sources = 0
                total_facts = 0
                print(f"  ‚ö†Ô∏è Fallback: Kein Content Processor - allgemeine Informationen")

            # 3. Simple Newsletter-Prompt erstellen
            newsletter_prompt = self._create_simple_prompt(
                location, newsletter_date, newsletter_style, content_section
            )

            # 4. Query Log speichern
            query_id = None
            if self.persistence_manager:
                query_id = self.persistence_manager.save_query_log(
                    category="simple_newsletter_generation",
                    api="gemini_simple",
                    original_query=f"Simple Newsletter {location} ({newsletter_style})",
                    enhanced_query=newsletter_prompt[:200] + "...",
                    time_context=current_time_info
                )

            # 5. Gemini Generation Config
            generation_config = genai.GenerationConfig(
                temperature=self._get_temperature_for_style(newsletter_style),
                max_output_tokens=self._get_max_tokens_for_style(newsletter_style)
            )

            print(f"  ü§ñ Generiere mit Gemini...")

            # 6. Gemini API Call
            response = self.model.generate_content(
                newsletter_prompt,
                generation_config=generation_config
            )

            # 7. Response verarbeiten
            if response and response.text:
                newsletter_result = {
                    "location": location,
                    "newsletter_style": newsletter_style,
                    "newsletter_content": response.text,
                    "newsletter_date": newsletter_date,
                    "query_id": query_id,
                    "timestamp": datetime.now().isoformat(),
                    "word_count": len(response.text.split()),
                    "char_count": len(response.text),
                    "sources_used": total_sources,
                    "facts_integrated": total_facts,
                    "processing_method": "simple_direct",
                    "model": self.model_name
                }

                # 8. Raw Response speichern
                if self.persistence_manager:
                    raw_filename = self.persistence_manager.save_raw_response(
                        source="gemini_simple_newsletter",
                        response=response,
                        query=newsletter_prompt[:500] + "...",
                        category="simple_newsletter",
                        metadata={
                            "query_id": query_id,
                            "location": location,
                            "newsletter_style": newsletter_style,
                            "sources_used": total_sources,
                            "facts_integrated": total_facts
                        }
                    )
                    newsletter_result["raw_filename"] = raw_filename

                    # 9. Newsletter als Markdown speichern
                    newsletter_filename = self.persistence_manager.save_final_newsletter(
                        newsletter_content=response.text,
                        format="markdown",
                        metadata={
                            "query_id": query_id,
                            "location": location,
                            "newsletter_style": newsletter_style,
                            "sources_used": total_sources,
                            "facts_integrated": total_facts,
                            "processing_method": "simple_direct",
                            "word_count": newsletter_result["word_count"]
                        }
                    )
                    newsletter_result["newsletter_filename"] = newsletter_filename

                self.generated_newsletters.append(newsletter_result)

                print(f"  ‚úÖ Newsletter generiert!")
                print(f"  üìù W√∂rter: {newsletter_result['word_count']}")
                print(f"  üìä Sources: {newsletter_result['sources_used']}")
                print(f"  üéØ Facts: {newsletter_result['facts_integrated']}")
                if newsletter_result.get("newsletter_filename"):
                    print(f"  üìÅ Gespeichert: {newsletter_result['newsletter_filename']}")

                return newsletter_result

            else:
                print(f"  ‚ùå Keine Response von Gemini erhalten")
                return None

        except Exception as e:
            print(f"‚ùå Simple Newsletter Generation Fehler: {e}")
            import traceback
            print(f"üîç Traceback: {traceback.format_exc()}")

            # Fehler speichern
            if self.persistence_manager:
                self.persistence_manager.save_raw_response(
                    source="gemini_simple_newsletter_error",
                    response={"error": str(e), "location": location, "style": newsletter_style},
                    category="simple_newsletter_error"
                )

            return None

    def _create_simple_prompt(self, location, newsletter_date, style, content_section):
        """Erstellt einfachen, effektiven Newsletter-Prompt"""

        # Style-spezifische Anweisungen
        style_instructions = {
            "compact": {
                "description": "Kurzer, pr√§gnanter Newsletter",
                "word_target": "250-350 W√∂rter",
                "details": "1-2 S√§tze pro Kategorie, wichtigste Informationen",
                "tone": "knapp und informativ"
            },
            "standard": {
                "description": "Ausgewogener, informativer Newsletter",
                "word_target": "400-600 W√∂rter",
                "details": "2-4 S√§tze pro Kategorie, konkrete Facts einbauen",
                "tone": "freundlich und detailliert"
            },
            "detailed": {
                "description": "Umfassender, faktenreicher Newsletter",
                "word_target": "600-900 W√∂rter",
                "details": "3-6 S√§tze pro Kategorie, alle verf√ºgbaren Details nutzen",
                "tone": "ausf√ºhrlich und fundiert"
            }
        }

        style_info = style_instructions.get(style, style_instructions["standard"])

        prompt = f"""Du bist ein lokaler Newsletter-Redakteur f√ºr {location}.

AUFTRAG: Erstelle einen {style_info['description']} f√ºr {location} vom {newsletter_date}.

STIL: {style.upper()}
- {style_info['description']}
- Ziel: {style_info['word_target']}
- Details: {style_info['details']}
- Ton: {style_info['tone']}

{content_section}

STRUKTUR:
1. **Titel**: "{location} Newsletter - {newsletter_date}"
2. **Begr√º√üung**: Kurze, freundliche Ansprache
3. **Kategorien-Sections**:
   - Nutze die bereitgestellten Facts und Content-Snippets
   - Integriere konkrete Zahlen, Daten, Zeiten
   - Verwende lokale Bez√ºge
4. **Abschluss**: Freundlicher Tagesabschluss

WICHTIGE ANFORDERUNGEN:
‚úÖ Deutsche Sprache, lokaler Ton
‚úÖ Konkrete Facts aus dem bereitgestellten Content verwenden
‚úÖ Zeitgem√§√üe Informationen mit Fokus auf {newsletter_date}
‚úÖ {style_info['word_target']} einhalten
‚úÖ Freundlich und professionell

Erstelle jetzt den {style} Newsletter f√ºr die B√ºrger von {location}!"""

        return prompt

    def _get_temperature_for_style(self, style):
        """Style-spezifische Temperature"""
        temp_map = {
            "compact": 0.2,    # Pr√§zise, faktisch
            "standard": 0.3,   # Ausgewogen
            "detailed": 0.4    # Etwas kreativer
        }
        return temp_map.get(style, 0.3)

    def _get_max_tokens_for_style(self, style):
        """Style-spezifische Max Tokens"""
        token_map = {
            "compact": 900,     # ~350 W√∂rter
            "standard": 1500,   # ~600 W√∂rter
            "detailed": 2300    # ~900 W√∂rter
        }
        return token_map.get(style, 1500)

    # =========================================================================
    # BATCH NEWSLETTER GENERATION
    # =========================================================================

    def generate_all_newsletter_styles(self, location):
        """
        Generiert alle 3 Newsletter-Styles zum Vergleich

        Args:
            location: Ortsname

        Returns:
            dict: Alle 3 Newsletter-Versionen
        """
        print(f"üì∞ Generiere ALLE Newsletter-Styles f√ºr {location}")
        print("=" * 50)

        styles = ["compact", "standard", "detailed"]
        all_newsletters = {}

        for style in styles:
            print(f"\nüé® {style.upper()} Newsletter:")
            newsletter = self.generate_simple_newsletter(location, newsletter_style=style)

            if newsletter:
                all_newsletters[style] = newsletter
                print(f"  ‚úÖ Erfolgreich: {newsletter['word_count']} W√∂rter")
            else:
                print(f"  ‚ùå Fehlgeschlagen")
                all_newsletters[style] = None

        # Vergleichs-Summary
        comparison_summary = {
            "location": location,
            "timestamp": datetime.now().isoformat(),
            "styles_generated": len([n for n in all_newsletters.values() if n]),
            "processing_method": "simple_direct",
            "comparison": {}
        }

        print(f"\nüìä STYLE-VERGLEICH:")
        for style in styles:
            if all_newsletters[style]:
                nl = all_newsletters[style]
                comparison_summary["comparison"][style] = {
                    "word_count": nl["word_count"],
                    "sources_used": nl["sources_used"],
                    "facts_integrated": nl["facts_integrated"]
                }
                print(f"  {style.upper():>8}: {nl['word_count']:>3} W√∂rter, {nl['sources_used']:>2} Sources, {nl['facts_integrated']:>2} Facts")
            else:
                comparison_summary["comparison"][style] = None
                print(f"  {style.upper():>8}: ‚ùå Fehler")

        comparison_summary["newsletters"] = all_newsletters
        return comparison_summary

    # =========================================================================
    # NEWSLETTER COMPARISON & ANALYSIS
    # =========================================================================

    def compare_newsletter_quality(self, location):
        """Analysiert Newsletter-Qualit√§t √ºber alle Styles"""

        location_newsletters = [
            n for n in self.generated_newsletters
            if n["location"].lower() == location.lower()
        ]

        if not location_newsletters:
            print(f"‚ùå Keine Newsletter f√ºr {location} gefunden")
            return None

        quality_analysis = {
            "location": location,
            "total_newsletters": len(location_newsletters),
            "styles_analyzed": set(n["newsletter_style"] for n in location_newsletters),
            "avg_word_count": sum(n["word_count"] for n in location_newsletters) / len(location_newsletters),
            "total_sources_utilized": sum(n["sources_used"] for n in location_newsletters),
            "total_facts_integrated": sum(n["facts_integrated"] for n in location_newsletters),
            "processing_method": "simple_direct",
            "newsletters": location_newsletters
        }

        print(f"üìä NEWSLETTER QUALIT√ÑTS-ANALYSE f√ºr {location}:")
        print(f"  Newsletter generiert: {quality_analysis['total_newsletters']}")
        print(f"  Styles: {', '.join(quality_analysis['styles_analyzed'])}")
        print(f"  √ò W√∂rter: {quality_analysis['avg_word_count']:.0f}")
        print(f"  Total Sources genutzt: {quality_analysis['total_sources_utilized']}")
        print(f"  Total Facts integriert: {quality_analysis['total_facts_integrated']}")

        return quality_analysis

    # =========================================================================
    # UTILITY METHODS
    # =========================================================================

    def get_simple_newsletter_summary(self, location=None):
        """Gibt Simple Newsletter Summary zur√ºck"""

        if location:
            filtered_newsletters = [
                n for n in self.generated_newsletters
                if n["location"].lower() == location.lower()
            ]
        else:
            filtered_newsletters = self.generated_newsletters

        summary = {
            "total_newsletters": len(filtered_newsletters),
            "locations": list(set(n["location"] for n in filtered_newsletters)),
            "newsletter_styles": list(set(n["newsletter_style"] for n in filtered_newsletters)),
            "processing_method": "simple_direct",
            "total_words": sum(n["word_count"] for n in filtered_newsletters),
            "total_sources_used": sum(n["sources_used"] for n in filtered_newsletters),
            "total_facts_integrated": sum(n["facts_integrated"] for n in filtered_newsletters),
            "avg_facts_per_newsletter": sum(n["facts_integrated"] for n in filtered_newsletters) / max(len(filtered_newsletters), 1),
            "newsletters": filtered_newsletters
        }

        return summary

    # Legacy Compatibility f√ºr bestehende Interfaces
    def generate_newsletter_with_foundation(self, location, categories=None, newsletter_style="standard"):
        """Legacy-kompatible Methode"""
        return self.generate_simple_newsletter(location, categories, newsletter_style)

# =============================================================================
# INITIALISIERUNG
# =============================================================================

# Simple Gemini Worker initialisieren
if api_config.gemini_key:
    # Foundation-System Integration (optional)
    foundation_available = all([
        'config_manager' in globals() and config_manager,
        'time_manager' in globals() and time_manager,
        'persistence_manager' in globals() and persistence_manager
    ])

    simple_content_available = 'simple_content_processor' in globals() and simple_content_processor

    simple_gemini_worker = SimpleGeminiWorker(
        api_key=api_config.gemini_key,
        config_manager=config_manager if foundation_available else None,
        time_manager=time_manager if foundation_available else None,
        persistence_manager=persistence_manager if foundation_available else None,
        simple_content_processor=simple_content_processor if simple_content_available else None
    )

    print("üöÄ Simple Gemini Worker bereit!")

    if simple_content_available:
        print("üß† Simple Content Processor integriert - Facts verf√ºgbar")
    else:
        print("‚ö†Ô∏è L√§uft ohne Simple Content Processor")

else:
    print("‚ùå Simple Gemini Worker nicht verf√ºgbar - API Key fehlt")
    simple_gemini_worker = None

# =============================================================================
# DIREKTER TEST
# =============================================================================

if simple_gemini_worker:
    print("\nüß™ TESTE SIMPLE GEMINI WORKER")
    print("=" * 55)

    try:
        test_location = "m√ºnchen"

        # Test 1: Single Newsletter Generation
        print("üìç Test 1: Standard Newsletter Generation")
        standard_newsletter = simple_gemini_worker.generate_simple_newsletter(
            location=test_location,
            newsletter_style="standard"
        )

        if standard_newsletter:
            print(f"  ‚úÖ Standard Newsletter:")
            print(f"    W√∂rter: {standard_newsletter['word_count']}")
            print(f"    Sources: {standard_newsletter['sources_used']}")
            print(f"    Facts: {standard_newsletter['facts_integrated']}")

            # Preview der ersten 150 Zeichen
            preview = standard_newsletter['newsletter_content'][:150] + "..."
            print(f"    Preview: {preview}")

        # Test 2: Batch Generation (alle 3 Styles)
        print(f"\nüìç Test 2: Batch Generation (alle Styles)")
        all_styles = simple_gemini_worker.generate_all_newsletter_styles(test_location)

        if all_styles:
            print(f"  üìä Styles generiert: {all_styles['styles_generated']}/3")

        # Test 3: Quality Analysis
        print(f"\nüìç Test 3: Newsletter Quality Analysis")
        quality_analysis = simple_gemini_worker.compare_newsletter_quality(test_location)

        # Test 4: Summary
        print(f"\nüìç Test 4: Simple Newsletter Summary")
        summary = simple_gemini_worker.get_simple_newsletter_summary(test_location)

        print(f"  üìä Total Newsletter: {summary['total_newsletters']}")
        print(f"  üìù Total Words: {summary['total_words']}")
        print(f"  üìä Avg Facts/Newsletter: {summary['avg_facts_per_newsletter']:.1f}")
        print(f"  üé® Styles: {', '.join(summary['newsletter_styles'])}")

        print(f"\n‚úÖ Simple Gemini Worker Tests erfolgreich!")
        print(f"üí° Viel einfacher und wartbarer als Enhanced Version!")

    except Exception as test_error:
        print(f"‚ùå Test Fehler: {test_error}")
        import traceback
        print(f"üîç Traceback: {traceback.format_exc()}")

print("=" * 55)
print("‚úÖ Zelle 5b: Simple Gemini Worker Setup komplett\n")

‚úÖ Simple Gemini Worker initialisiert
üèóÔ∏è Foundation System: ‚úÖ
üß† Simple Content Processor: ‚úÖ
ü§ñ Model: gemini-2.0-flash-exp
üöÄ Simple Gemini Worker bereit!
üß† Simple Content Processor integriert - Facts verf√ºgbar

üß™ TESTE SIMPLE GEMINI WORKER
üìç Test 1: Standard Newsletter Generation
üì∞ Generiere STANDARD Newsletter f√ºr m√ºnchen
üìÇ Lade Content f√ºr standard Newsletter
  üìä Kategorien: 6
  üìù Total Content: 22,821 Zeichen
  üé® Style: standard
  üìä Content geladen: 7 Sources, 36 Facts
üîç Query-Log gespeichert: adaa6025
  ü§ñ Generiere mit Gemini...
üíæ Raw Response gespeichert: 014_gemini_simple_newsletter_simple_newsletter_13-37-23.json
üì∞ Newsletter gespeichert: newsletter_m√ºnchen_2025-07-20_13-37.md
  ‚úÖ Newsletter generiert!
  üìù W√∂rter: 367
  üìä Sources: 7
  üéØ Facts: 36
  üìÅ Gespeichert: /content/drive/MyDrive/Newsletter_System/data/sessions/2025-07-20_13-35_m√ºnchen/final_newsletter/newsletter_m√ºnchen_2025-07-20_13-37.md
  ‚ú

In [14]:
# =============================================================================
# ZELLE 6: Main Controller - Orchestrierte Multi-API Newsletter Pipeline
# =============================================================================
# @title Main Controller - Vollst√§ndige Newsletter-Generation mit Foundation-System

class NewsletterMainController:
    """
    Main Controller f√ºr das komplette Newsletter-System
    Orchestriert alle Worker und das Foundation-System f√ºr die automatische Newsletter-Generierung
    """

    def __init__(self, config_manager, time_manager, persistence_manager):
        """
        Initialisiert Main Controller

        Args:
            config_manager: ConfigManager Instance
            time_manager: TimeContextManager Instance
            persistence_manager: DataPersistenceManager Instance
        """
        # Foundation-System
        self.config_manager = config_manager
        self.time_manager = time_manager
        self.persistence_manager = persistence_manager

        # Worker Registry
        self.workers = {}
        self._register_workers()

        # Controller State
        self.newsletter_sessions = []
        self.execution_log = []

        print(f"‚úÖ Main Controller initialisiert")
        print(f"üèóÔ∏è Foundation-System: ConfigManager, TimeContextManager, DataPersistenceManager")
        print(f"ü§ñ Registrierte Worker: {len(self.workers)}")

    def _register_workers(self):
        """Registriert alle verf√ºgbaren Worker"""

        # Firecrawl Worker V2
        if 'firecrawl_worker_v2' in globals() and firecrawl_worker_v2:
            self.workers['firecrawl'] = firecrawl_worker_v2
            print("  üï∑Ô∏è Firecrawl Worker V2 registriert")

        # Claude Worker V2
        if 'claude_worker_v2' in globals() and claude_worker_v2:
            self.workers['claude'] = claude_worker_v2
            print("  ü§ñ Claude Worker V2 registriert")

        # Perplexity Worker V2
        if 'perplexity_worker_v2' in globals() and perplexity_worker_v2:
            self.workers['perplexity'] = perplexity_worker_v2
            print("  üîç Perplexity Worker V2 registriert")

        # Gemini Worker V2
        if 'gemini_worker_v2' in globals() and gemini_worker_v2:
            self.workers['gemini'] = gemini_worker_v2
            print("  ‚ú® Gemini Worker V2 registriert")

    # =========================================================================
    # NEWSLETTER GENERATION PIPELINE
    # =========================================================================

    def generate_complete_newsletter(self, location, categories=None, newsletter_config=None):
        """
        Komplette Newsletter-Pipeline von Datensammlung bis Generierung

        Args:
            location: Ortsname
            categories: Kategorien-Liste (optional, aus Config falls None)
            newsletter_config: Pipeline-Konfiguration (optional)

        Returns:
            dict: Vollst√§ndige Newsletter-Session mit Ergebnissen
        """
        print(f"üöÄ STARTE KOMPLETTE NEWSLETTER-PIPELINE")
        print(f"üìç Location: {location}")
        print("=" * 60)

        # Session initialisieren
        session_start = datetime.now()
        session_id = f"newsletter_{location}_{session_start.strftime('%Y%m%d_%H%M')}"

        pipeline_session = {
            "session_id": session_id,
            "location": location,
            "start_time": session_start.isoformat(),
            "categories": categories,
            "config": newsletter_config or {},
            "phases": {},
            "results": {},
            "errors": [],
            "status": "running"
        }

        try:
            # Phase 1: Konfiguration und Planung
            print("üìã PHASE 1: Konfiguration und Planung")
            planning_result = self._phase_1_planning(location, categories)
            pipeline_session["phases"]["planning"] = planning_result
            pipeline_session["categories"] = planning_result["categories"]

            # Phase 2: Datensammlung mit Multi-API Approach
            print(f"\nüìä PHASE 2: Multi-API Datensammlung")
            data_collection_result = self._phase_2_data_collection(location, planning_result["categories"])
            pipeline_session["phases"]["data_collection"] = data_collection_result

            # Phase 3: Content-Synthese und Quality-Check
            print(f"\nüß† PHASE 3: Content-Synthese")
            synthesis_result = self._phase_3_synthesis(location, data_collection_result)
            pipeline_session["phases"]["synthesis"] = synthesis_result

            # Phase 4: Newsletter-Generierung
            print(f"\nüì∞ PHASE 4: Newsletter-Generierung")
            generation_result = self._phase_4_generation(location, synthesis_result)
            pipeline_session["phases"]["generation"] = generation_result

            # Phase 5: Finalisierung und Archivierung
            print(f"\nüèÅ PHASE 5: Finalisierung")
            finalization_result = self._phase_5_finalization(location, pipeline_session)
            pipeline_session["phases"]["finalization"] = finalization_result

            # Session abschlie√üen
            pipeline_session["status"] = "completed"
            pipeline_session["end_time"] = datetime.now().isoformat()
            pipeline_session["duration"] = (datetime.now() - session_start).total_seconds()

            # Results zusammenfassen
            pipeline_session["results"] = self._summarize_pipeline_results(pipeline_session)

            self.newsletter_sessions.append(pipeline_session)

            print(f"\nüéâ NEWSLETTER-PIPELINE ERFOLGREICH ABGESCHLOSSEN")
            print(f"‚è±Ô∏è Dauer: {pipeline_session['duration']:.1f} Sekunden")
            print(f"üìä Kategorien verarbeitet: {len(pipeline_session['results'].get('categories_processed', []))}")
            print(f"üìÅ API-Calls: {pipeline_session['results'].get('total_api_calls', 0)}")
            print(f"üì∞ Newsletter: {pipeline_session['results'].get('newsletter_generated', False)}")

            return pipeline_session

        except Exception as e:
            print(f"‚ùå PIPELINE FEHLER: {e}")
            pipeline_session["status"] = "failed"
            pipeline_session["error"] = str(e)
            pipeline_session["end_time"] = datetime.now().isoformat()

            import traceback
            error_details = traceback.format_exc()
            pipeline_session["error_details"] = error_details
            print(f"üîç Error Details: {error_details}")

            return pipeline_session

    def _phase_1_planning(self, location, categories):
        """Phase 1: Konfiguration und Planung"""
        print("  üìã Erstelle Newsletter-Plan...")

        # Kategorien aus Config holen falls nicht angegeben
        if categories is None:
            high_priority = self.config_manager.get_newsletter_categories("high")
            medium_priority = self.config_manager.get_newsletter_categories("medium")
            categories = high_priority + medium_priority[:3]  # Begrenzt f√ºr PoC

        # Time-aware Newsletter Plan erstellen
        newsletter_plan = self.time_manager.create_time_aware_newsletter_plan(
            self.config_manager, location, categories
        )

        # Worker-Strategien bestimmen
        worker_strategies = {}
        for category in categories:
            method = self.config_manager.get_category_method(category)

            if method == "scrape":
                worker_strategies[category] = ["firecrawl"]
            elif method == "search":
                # Multi-API Search f√ºr bessere Abdeckung
                available_search_workers = [w for w in ["firecrawl", "claude", "perplexity"] if w in self.workers]
                worker_strategies[category] = available_search_workers[:2]  # Top 2 f√ºr Effizienz

        planning_result = {
            "categories": categories,
            "newsletter_plan": newsletter_plan,
            "worker_strategies": worker_strategies,
            "location_config": self.config_manager.get_location_config(location),
            "timestamp": datetime.now().isoformat()
        }

        print(f"    ‚úÖ Plan erstellt: {len(categories)} Kategorien, {len(worker_strategies)} Worker-Strategien")

        return planning_result

    def _phase_2_data_collection(self, location, categories):
        """Phase 2: Multi-API Datensammlung"""
        print("  üìä Starte Multi-API Datensammlung...")

        collection_results = {
            "firecrawl_results": [],
            "claude_results": [],
            "perplexity_results": [],
            "categories_processed": [],
            "total_api_calls": 0,
            "errors": []
        }

        for category in categories:
            print(f"    üéØ Verarbeite Kategorie: {category}")

            # Firecrawl Worker
            if "firecrawl" in self.workers:
                try:
                    firecrawl_result = self.workers["firecrawl"].process_category_for_newsletter(category, location)
                    if firecrawl_result:
                        collection_results["firecrawl_results"].append(firecrawl_result)
                        collection_results["total_api_calls"] += 1
                        print(f"      üï∑Ô∏è Firecrawl: ‚úÖ")
                    else:
                        print(f"      üï∑Ô∏è Firecrawl: ‚ùå")
                except Exception as e:
                    collection_results["errors"].append(f"Firecrawl {category}: {e}")
                    print(f"      üï∑Ô∏è Firecrawl: ‚ùå ({e})")

            # Claude Worker (nur f√ºr Search-geeignete Kategorien)
            category_method = self.config_manager.get_category_method(category)
            if "claude" in self.workers and category_method == "search":
                try:
                    claude_result = self.workers["claude"].process_category_for_newsletter(category, location)
                    if claude_result:
                        collection_results["claude_results"].append(claude_result)
                        collection_results["total_api_calls"] += 1
                        print(f"      ü§ñ Claude: ‚úÖ ({claude_result.get('web_searches_used', 0)} searches)")
                    else:
                        print(f"      ü§ñ Claude: ‚ùå")
                except Exception as e:
                    collection_results["errors"].append(f"Claude {category}: {e}")
                    print(f"      ü§ñ Claude: ‚ùå ({e})")

            # Perplexity Worker (selective f√ºr Cross-Validation)
            if "perplexity" in self.workers and category in ["nachrichten", "sport", "events"]:
                try:
                    perplexity_result = self.workers["perplexity"].process_category_for_newsletter(category, location)
                    if perplexity_result:
                        collection_results["perplexity_results"].append(perplexity_result)
                        collection_results["total_api_calls"] += 1
                        print(f"      üîç Perplexity: ‚úÖ ({len(perplexity_result.get('sources', []))} sources)")
                    else:
                        print(f"      üîç Perplexity: ‚ùå")
                except Exception as e:
                    collection_results["errors"].append(f"Perplexity {category}: {e}")
                    print(f"      üîç Perplexity: ‚ùå ({e})")

            collection_results["categories_processed"].append(category)

        print(f"    ‚úÖ Datensammlung komplett: {collection_results['total_api_calls']} API-Calls")

        return collection_results

    def _phase_3_synthesis(self, location, data_collection_result):
        """Phase 3: Content-Synthese"""
        print("  üß† Synthetisiere gesammelte Daten...")

        # Content Quality Assessment
        quality_metrics = {
            "firecrawl_items": len(data_collection_result["firecrawl_results"]),
            "claude_items": len(data_collection_result["claude_results"]),
            "perplexity_items": len(data_collection_result["perplexity_results"]),
            "total_sources": 0,
            "categories_with_content": [],
            "content_diversity_score": 0
        }

        # Source Counting und Quality Assessment
        categories_with_content = set()

        for firecrawl_result in data_collection_result["firecrawl_results"]:
            categories_with_content.add(firecrawl_result["category"])
            if firecrawl_result["method"] == "search":
                quality_metrics["total_sources"] += firecrawl_result.get("results_count", 0)
            elif firecrawl_result["method"] == "scrape":
                quality_metrics["total_sources"] += firecrawl_result.get("successful_scrapes", 0)

        for claude_result in data_collection_result["claude_results"]:
            categories_with_content.add(claude_result["category"])
            quality_metrics["total_sources"] += len(claude_result.get("citations", []))

        for perplexity_result in data_collection_result["perplexity_results"]:
            categories_with_content.add(perplexity_result["category"])
            quality_metrics["total_sources"] += len(perplexity_result.get("sources", []))

        quality_metrics["categories_with_content"] = list(categories_with_content)
        quality_metrics["content_diversity_score"] = len(categories_with_content) / len(data_collection_result["categories_processed"]) if data_collection_result["categories_processed"] else 0

        synthesis_result = {
            "quality_metrics": quality_metrics,
            "content_ready_for_generation": quality_metrics["total_sources"] > 0,
            "recommended_newsletter_style": "compact" if quality_metrics["total_sources"] < 10 else "standard",
            "timestamp": datetime.now().isoformat()
        }

        print(f"    üìä Content Quality: {quality_metrics['total_sources']} Sources, {len(categories_with_content)} Kategorien")
        print(f"    üéØ Diversity Score: {quality_metrics['content_diversity_score']:.2f}")
        print(f"    üì∞ Newsletter Style: {synthesis_result['recommended_newsletter_style']}")

        return synthesis_result

    def _phase_4_generation(self, location, synthesis_result):
        """Phase 4: Newsletter-Generierung"""
        print("  üì∞ Generiere Newsletter...")

        if not synthesis_result["content_ready_for_generation"]:
            print("    ‚ùå Nicht genug Content f√ºr Newsletter-Generierung")
            return {"success": False, "reason": "insufficient_content"}

        if "gemini" not in self.workers:
            print("    ‚ùå Gemini Worker nicht verf√ºgbar")
            return {"success": False, "reason": "no_gemini_worker"}

        try:
            # Newsletter generieren
            newsletter_result = self.workers["gemini"].generate_newsletter_with_foundation(
                location=location,
                categories=synthesis_result["quality_metrics"]["categories_with_content"],
                newsletter_style=synthesis_result["recommended_newsletter_style"]
            )

            if newsletter_result:
                # Enhanced Newsletter erstellen
                enhanced_newsletter = self.workers["gemini"].enhance_newsletter_with_details(
                    newsletter_result,
                    enhancement_type="metadata"
                )

                generation_result = {
                    "success": True,
                    "newsletter_result": newsletter_result,
                    "enhanced_newsletter": enhanced_newsletter,
                    "newsletter_file": newsletter_result.get("newsletter_filename"),
                    "word_count": newsletter_result["word_count"],
                    "char_count": newsletter_result["char_count"]
                }

                print(f"    ‚úÖ Newsletter generiert: {newsletter_result['word_count']} W√∂rter")

                return generation_result
            else:
                print("    ‚ùå Newsletter-Generierung fehlgeschlagen")
                return {"success": False, "reason": "generation_failed"}

        except Exception as e:
            print(f"    ‚ùå Newsletter-Generation Fehler: {e}")
            return {"success": False, "reason": "generation_error", "error": str(e)}

    def _phase_5_finalization(self, location, pipeline_session):
        """Phase 5: Finalisierung und Archivierung"""
        print("  üèÅ Finalisiere Session...")

        # Session Meta-Daten in Persistence speichern
        if self.persistence_manager:
            self.persistence_manager.save_session_meta()

        # Execution Log erstellen
        execution_summary = {
            "session_id": pipeline_session["session_id"],
            "location": location,
            "duration": pipeline_session.get("duration", 0),
            "phases_completed": list(pipeline_session["phases"].keys()),
            "success": pipeline_session["status"] == "completed",
            "timestamp": datetime.now().isoformat()
        }

        self.execution_log.append(execution_summary)

        finalization_result = {
            "session_archived": True,
            "execution_logged": True,
            "persistence_completed": True,
            "summary": execution_summary
        }

        print(f"    ‚úÖ Session finalisiert und archiviert")

        return finalization_result

    def _summarize_pipeline_results(self, pipeline_session):
        """Erstellt Zusammenfassung der Pipeline-Ergebnisse"""

        results_summary = {
            "session_id": pipeline_session["session_id"],
            "location": pipeline_session["location"],
            "status": pipeline_session["status"],
            "duration": pipeline_session.get("duration", 0),
            "categories_processed": [],
            "total_api_calls": 0,
            "newsletter_generated": False,
            "content_sources": 0,
            "errors_count": len(pipeline_session.get("errors", []))
        }

        # Data Collection Results
        if "data_collection" in pipeline_session["phases"]:
            data_collection = pipeline_session["phases"]["data_collection"]
            results_summary["categories_processed"] = data_collection.get("categories_processed", [])
            results_summary["total_api_calls"] = data_collection.get("total_api_calls", 0)

        # Synthesis Results
        if "synthesis" in pipeline_session["phases"]:
            synthesis = pipeline_session["phases"]["synthesis"]
            results_summary["content_sources"] = synthesis["quality_metrics"].get("total_sources", 0)

        # Generation Results
        if "generation" in pipeline_session["phases"]:
            generation = pipeline_session["phases"]["generation"]
            results_summary["newsletter_generated"] = generation.get("success", False)
            if results_summary["newsletter_generated"]:
                results_summary["newsletter_word_count"] = generation.get("word_count", 0)
                results_summary["newsletter_file"] = generation.get("newsletter_file")

        return results_summary

    # =========================================================================
    # UTILITY METHODS
    # =========================================================================

    def get_system_status(self):
        """Gibt aktuellen System-Status zur√ºck"""

        return {
            "foundation_system": {
                "config_manager": self.config_manager is not None,
                "time_manager": self.time_manager is not None,
                "persistence_manager": self.persistence_manager is not None
            },
            "workers": {
                name: worker is not None for name, worker in self.workers.items()
            },
            "sessions": {
                "total_sessions": len(self.newsletter_sessions),
                "completed_sessions": len([s for s in self.newsletter_sessions if s["status"] == "completed"]),
                "failed_sessions": len([s for s in self.newsletter_sessions if s["status"] == "failed"])
            },
            "execution_log_entries": len(self.execution_log)
        }

    def get_session_summary(self, session_id=None):
        """Gibt Zusammenfassung einer Session zur√ºck"""

        if session_id:
            for session in self.newsletter_sessions:
                if session["session_id"] == session_id:
                    return session["results"] if "results" in session else session
            return None
        else:
            # Neueste Session
            if self.newsletter_sessions:
                return self.newsletter_sessions[-1]["results"] if "results" in self.newsletter_sessions[-1] else self.newsletter_sessions[-1]
            return None

# =============================================================================
# INITIALISIERUNG
# =============================================================================

# Main Controller mit Foundation-System initialisieren
foundation_available = all([
    'config_manager' in globals() and config_manager,
    'time_manager' in globals() and time_manager,
    'persistence_manager' in globals() and persistence_manager
])

if foundation_available:
    main_controller = NewsletterMainController(
        config_manager=config_manager,
        time_manager=time_manager,
        persistence_manager=persistence_manager
    )
    print("üöÄ Main Controller mit Foundation-System bereit")

    # System Status
    system_status = main_controller.get_system_status()
    print(f"üìä System Status:")
    print(f"   Foundation: ‚úÖ Komplett")
    print(f"   Worker: {len([w for w in system_status['workers'].values() if w])} von {len(system_status['workers'])} verf√ºgbar")

else:
    print("‚ùå Main Controller nicht verf√ºgbar - Foundation-System fehlt")
    main_controller = None

print("=" * 60)
print("‚úÖ Main Controller Setup komplett\n")

  üï∑Ô∏è Firecrawl Worker V2 registriert
  ü§ñ Claude Worker V2 registriert
  üîç Perplexity Worker V2 registriert
‚úÖ Main Controller initialisiert
üèóÔ∏è Foundation-System: ConfigManager, TimeContextManager, DataPersistenceManager
ü§ñ Registrierte Worker: 3
üöÄ Main Controller mit Foundation-System bereit
üìä System Status:
   Foundation: ‚úÖ Komplett
   Worker: 3 von 3 verf√ºgbar
‚úÖ Main Controller Setup komplett



In [15]:
# =============================================================================
# ZELLE 7: Komplette Ende-zu-Ende Newsletter Demo
# =============================================================================
# @title Ultimative Multi-API Newsletter-Pipeline Demo

print("üé¨ STARTE ULTIMATIVE NEWSLETTER-PIPELINE DEMO")
print("=" * 70)
print("üìç Demo-Location: M√ºnchen")
print("üìã Demo-Kategorien: Wetter, Nachrichten, Events, Sport")
print("üèóÔ∏è System: Foundation + 4 Worker V2 + Main Controller")
print("=" * 70)

if main_controller:
    try:
        # =================================================================
        # KOMPLETTE NEWSLETTER-PIPELINE AUSF√úHREN
        # =================================================================

        print("\nüöÄ AUSF√úHRUNG DER 5-PHASEN PIPELINE")
        print("=" * 50)

        # Pipeline ausf√ºhren
        demo_result = main_controller.generate_complete_newsletter(
            location="m√ºnchen",
            categories=["wetter", "nachrichten", "events", "sport"]
        )

        # =================================================================
        # ERGEBNISSE ANALYSIEREN UND ANZEIGEN
        # =================================================================

        print("\nüìä PIPELINE-ERGEBNISSE ANALYSE")
        print("=" * 50)

        if demo_result["status"] == "completed":
            results = demo_result["results"]

            print("‚úÖ PIPELINE ERFOLGREICH ABGESCHLOSSEN!")
            print(f"üìç Location: {results['location']}")
            print(f"‚è±Ô∏è Dauer: {results['duration']:.1f} Sekunden")
            print(f"üìã Kategorien: {len(results['categories_processed'])}")
            print(f"üì° API-Calls: {results['total_api_calls']}")
            print(f"üìä Content Sources: {results['content_sources']}")
            print(f"üì∞ Newsletter: {'‚úÖ' if results['newsletter_generated'] else '‚ùå'}")

            if results['newsletter_generated']:
                print(f"üìù Newsletter W√∂rter: {results.get('newsletter_word_count', 'N/A')}")
                print(f"üìÅ Newsletter Datei: {results.get('newsletter_file', 'N/A')}")

            # =================================================================
            # DETAILLIERTE PHASEN-ANALYSE
            # =================================================================

            print(f"\nüîç DETAILLIERTE PHASEN-ANALYSE")
            print("=" * 50)

            # Phase 1: Planning
            if "planning" in demo_result["phases"]:
                planning = demo_result["phases"]["planning"]
                print("üìã PHASE 1 - PLANNING:")
                print(f"   Kategorien geplant: {len(planning['categories'])}")
                print(f"   Worker-Strategien: {len(planning['worker_strategies'])}")

                for category, workers in planning['worker_strategies'].items():
                    workers_str = ", ".join(workers) if isinstance(workers, list) else str(workers)
                    print(f"   {category}: {workers_str}")

            # Phase 2: Data Collection
            if "data_collection" in demo_result["phases"]:
                data_collection = demo_result["phases"]["data_collection"]
                print(f"\nüìä PHASE 2 - DATA COLLECTION:")
                print(f"   üï∑Ô∏è Firecrawl Results: {len(data_collection['firecrawl_results'])}")
                print(f"   ü§ñ Claude Results: {len(data_collection['claude_results'])}")
                print(f"   üîç Perplexity Results: {len(data_collection['perplexity_results'])}")
                print(f"   üì° Total API Calls: {data_collection['total_api_calls']}")
                print(f"   ‚ùå Errors: {len(data_collection['errors'])}")

                if data_collection['errors']:
                    print("   Error Details:")
                    for error in data_collection['errors'][:3]:  # Erste 3 Errors
                        print(f"     - {error}")

            # Phase 3: Synthesis
            if "synthesis" in demo_result["phases"]:
                synthesis = demo_result["phases"]["synthesis"]
                metrics = synthesis["quality_metrics"]
                print(f"\nüß† PHASE 3 - SYNTHESIS:")
                print(f"   Total Sources: {metrics['total_sources']}")
                print(f"   Content Diversity: {metrics['content_diversity_score']:.2f}")
                print(f"   Kategorien mit Content: {len(metrics['categories_with_content'])}")
                print(f"   Empfohlener Style: {synthesis['recommended_newsletter_style']}")

            # Phase 4: Generation
            if "generation" in demo_result["phases"]:
                generation = demo_result["phases"]["generation"]
                print(f"\nüì∞ PHASE 4 - GENERATION:")
                print(f"   Success: {generation['success']}")
                if generation['success']:
                    print(f"   Newsletter W√∂rter: {generation['word_count']}")
                    print(f"   Newsletter Zeichen: {generation['char_count']}")
                    print(f"   Enhanced Version: ‚úÖ")
                else:
                    print(f"   Failure Reason: {generation.get('reason', 'Unknown')}")

            # Phase 5: Finalization
            if "finalization" in demo_result["phases"]:
                finalization = demo_result["phases"]["finalization"]
                print(f"\nüèÅ PHASE 5 - FINALIZATION:")
                print(f"   Session Archived: {finalization['session_archived']}")
                print(f"   Execution Logged: {finalization['execution_logged']}")
                print(f"   Persistence Completed: {finalization['persistence_completed']}")

            # =================================================================
            # NEWSLETTER VORSCHAU
            # =================================================================

            if results['newsletter_generated'] and "generation" in demo_result["phases"]:
                print(f"\nüì∞ NEWSLETTER PREVIEW")
                print("=" * 50)

                newsletter_result = demo_result["phases"]["generation"]["newsletter_result"]
                newsletter_content = newsletter_result["newsletter_content"]

                # Erste 500 Zeichen des Newsletters anzeigen
                preview_length = 500
                if len(newsletter_content) > preview_length:
                    preview = newsletter_content[:preview_length] + "\n\n[... Newsletter continues ...]"
                else:
                    preview = newsletter_content

                print(preview)

                print(f"\nüìÅ Vollst√§ndiger Newsletter gespeichert in:")
                print(f"   {newsletter_result.get('newsletter_filename', 'N/A')}")

        else:
            # Pipeline failed
            print("‚ùå PIPELINE FEHLGESCHLAGEN!")
            print(f"Status: {demo_result['status']}")
            print(f"Error: {demo_result.get('error', 'Unknown error')}")

            if 'error_details' in demo_result:
                print(f"\nError Details:")
                print(demo_result['error_details'])

        # =================================================================
        # SYSTEM-STATISTIKEN
        # =================================================================

        print(f"\nüìà SYSTEM-STATISTIKEN")
        print("=" * 50)

        system_status = main_controller.get_system_status()

        print("üèóÔ∏è FOUNDATION SYSTEM:")
        foundation = system_status["foundation_system"]
        for component, status in foundation.items():
            print(f"   {component}: {'‚úÖ' if status else '‚ùå'}")

        print(f"\nü§ñ WORKER STATUS:")
        workers = system_status["workers"]
        for worker, status in workers.items():
            print(f"   {worker}: {'‚úÖ' if status else '‚ùå'}")

        print(f"\nüìä SESSION STATISTICS:")
        sessions = system_status["sessions"]
        print(f"   Total Sessions: {sessions['total_sessions']}")
        print(f"   Completed Sessions: {sessions['completed_sessions']}")
        print(f"   Failed Sessions: {sessions['failed_sessions']}")
        print(f"   Success Rate: {(sessions['completed_sessions'] / max(sessions['total_sessions'], 1) * 100):.1f}%")

        # =================================================================
        # PERSISTENCE MANAGER STATUS
        # =================================================================

        if persistence_manager:
            print(f"\nüíæ PERSISTENCE STATUS:")
            session_summary = persistence_manager.get_session_summary()
            print(f"   Session ID: {session_summary['session_id']}")
            print(f"   API Calls: {session_summary['api_calls']}")
            print(f"   Categories Processed: {len(session_summary['categories_processed'])}")
            print(f"   Content Items: {session_summary['total_content_items']}")
            print(f"   Newsletter Generated: {session_summary['newsletter_generated']}")
            print(f"   Session Path: {session_summary['session_path']}")

        # =================================================================
        # ABSCHLUSS
        # =================================================================

        print(f"\nüéâ DEMO KOMPLETT!")
        print("=" * 50)
        print("‚úÖ Komplette Multi-API Newsletter-Pipeline erfolgreich demonstriert")
        print("üèóÔ∏è Foundation-System: ConfigManager + TimeContext + DataPersistence")
        print("ü§ñ Worker V2: Firecrawl + Claude + Perplexity + Gemini")
        print("üéõÔ∏è Main Controller: 5-Phasen orchestrierte Pipeline")
        print("üíæ Audit Trail: Vollst√§ndige Nachvollziehbarkeit aller API-Calls")
        print("üì∞ Newsletter: Intelligente Multi-Source Content-Synthese")

        # Demo-Session f√ºr Download verf√ºgbar machen
        if results.get('newsletter_generated') and results.get('newsletter_file'):
            print(f"\nüì• Newsletter Download bereit:")
            print(f"   {results['newsletter_file']}")

            # Optional: Newsletter auch in Colab anzeigen
            try:
                from google.colab import files
                print(f"üíæ Newsletter wird zum Download bereitgestellt...")
                files.download(results['newsletter_file'])
            except:
                print(f"üíæ Newsletter im Google Drive verf√ºgbar")

    except Exception as demo_error:
        print(f"‚ùå DEMO FEHLER: {demo_error}")
        import traceback
        print(f"üîç Traceback:")
        print(traceback.format_exc())

else:
    print("‚ùå Main Controller nicht verf√ºgbar - kann Demo nicht starten")

print("\n" + "=" * 70)
print("üèÅ ENDE DER ULTIMATIVEN NEWSLETTER-PIPELINE DEMO")
print("=" * 70)

üé¨ STARTE ULTIMATIVE NEWSLETTER-PIPELINE DEMO
üìç Demo-Location: M√ºnchen
üìã Demo-Kategorien: Wetter, Nachrichten, Events, Sport
üèóÔ∏è System: Foundation + 4 Worker V2 + Main Controller

üöÄ AUSF√úHRUNG DER 5-PHASEN PIPELINE
üöÄ STARTE KOMPLETTE NEWSLETTER-PIPELINE
üìç Location: m√ºnchen
üìã PHASE 1: Konfiguration und Planung
  üìã Erstelle Newsletter-Plan...
    ‚úÖ Plan erstellt: 4 Kategorien, 4 Worker-Strategien

üìä PHASE 2: Multi-API Datensammlung
  üìä Starte Multi-API Datensammlung...
    üéØ Verarbeite Kategorie: wetter
üéØ Verarbeite wetter f√ºr m√ºnchen via search
üîç Query-Log gespeichert: f565955d
üîç Firecrawl Search: wetter in m√ºnchen
üìù Enhanced Query: Wetter m√ºnchen heute Vorhersage Deutschland
‚è∞ Timeframe: heute
üìä Limit: 3
üíæ Raw Response gespeichert: 018_firecrawl_search_wetter_13-37-41.json
‚úÖ 3 Suchergebnisse erhalten
üìÑ Processed Content gespeichert: wetter_13-37-41.md
  1. Wetter M√ºnchen heute - aktuelle Wettervorhersage f√ºr M√ºnc

In [16]:
# =============================================================================
# Newsletter Gradio UI - Einfache Web-Oberfl√§che f√ºr Newsletter-System
# =============================================================================
# @title Newsletter Gradio UI

import gradio as gr
import time
from datetime import datetime

# Gradio installieren falls nicht vorhanden
try:
    import gradio as gr
except ImportError:
    import subprocess
    import sys
    print("üì¶ Installiere Gradio...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "gradio"])
    import gradio as gr
    print("‚úÖ Gradio installiert!")

class NewsletterUI:
    """
    Gradio UI f√ºr Newsletter-System
    - Einfache Auswahl von Stadt, Kategorien, Style
    - Live Newsletter-Generation und Anzeige
    - Integration mit Simple Gemini Worker
    """

    def __init__(self, simple_gemini_worker=None, config_manager=None):
        """
        Initialisiert Newsletter UI

        Args:
            simple_gemini_worker: Simple Gemini Worker Instance
            config_manager: Config Manager f√ºr verf√ºgbare Kategorien
        """
        self.simple_gemini_worker = simple_gemini_worker
        self.config_manager = config_manager

        # Verf√ºgbare Optionen
        self.available_locations = ["m√ºnchen", "berlin", "hamburg", "k√∂ln", "frankfurt"]
        self.available_categories = ["wetter", "nachrichten", "events", "sport", "verkehr", "rathaus"]
        self.newsletter_styles = ["compact", "standard", "detailed"]

        # Lade verf√ºgbare Kategorien aus ConfigManager falls verf√ºgbar
        if self.config_manager:
            try:
                high_prio = self.config_manager.get_newsletter_categories("high")
                medium_prio = self.config_manager.get_newsletter_categories("medium")
                self.available_categories = high_prio + medium_prio
            except:
                pass  # Fallback zu Standard-Kategorien

        print("‚úÖ Newsletter UI initialisiert")
        print(f"üìç Locations: {len(self.available_locations)}")
        print(f"üìã Categories: {len(self.available_categories)}")

    def generate_newsletter_ui(self, location, categories, newsletter_style, progress=gr.Progress()):
        """
        Newsletter-Generation f√ºr Gradio UI mit detaillierten Progress-Updates

        Args:
            location: Ausgew√§hlte Stadt
            categories: Ausgew√§hlte Kategorien (wird aktuell ignoriert - alle verf√ºgbaren verwendet)
            newsletter_style: Newsletter-Stil
            progress: Gradio Progress Bar

        Returns:
            tuple: (Newsletter-Content, Status-Message, Processing-Info)
        """
        if not self.simple_gemini_worker:
            return (
                "‚ùå **Fehler:** Simple Gemini Worker nicht verf√ºgbar!",
                "ERROR: Kein Worker verf√ºgbar",
                "System nicht korrekt initialisiert"
            )

        if not location:
            return (
                "‚ùå **Fehler:** Bitte w√§hle eine Stadt aus!",
                "ERROR: Keine Stadt ausgew√§hlt",
                ""
            )

        try:
            # Detaillierte Progress Updates
            progress(0.05, desc="üöÄ Initialisiere Newsletter-Generation...")
            time.sleep(0.5)  # Kurze Pause f√ºr UI-Update

            start_time = time.time()

            progress(0.15, desc="üìä Analysiere verf√ºgbare Content-Quellen...")
            time.sleep(0.5)

            # Simple Content Processor Integration anzeigen
            if self.simple_gemini_worker.has_simple_content:
                progress(0.25, desc="üß† Simple Content Processor l√§dt Markdown-Files...")
                time.sleep(0.5)

                progress(0.35, desc="üîç Extrahiere Facts aus 17 Quellen (56K Zeichen)...")
                time.sleep(1.0)

                progress(0.45, desc="‚öôÔ∏è Bereite Content f√ºr Newsletter-Stil vor...")
                time.sleep(0.5)
            else:
                progress(0.35, desc="‚ö†Ô∏è Fallback: Verwende allgemeine Informationen...")
                time.sleep(0.5)

            progress(0.55, desc="üìù Generiere Newsletter-Prompt f√ºr Gemini...")
            time.sleep(0.5)

            progress(0.65, desc="ü§ñ Gemini 2.0 Flash generiert Newsletter...")

            # L√§ngeres Timeout f√ºr Gemini - manchmal dauert es l√§nger
            generation_start = time.time()

            # Newsletter generieren mit Timeout-Monitoring
            newsletter_result = self.simple_gemini_worker.generate_simple_newsletter(
                location=location.lower(),
                categories=categories,
                newsletter_style=newsletter_style
            )

            generation_duration = time.time() - generation_start
            print(f"ü§ñ Gemini Generation dauerte: {generation_duration:.1f}s")

            if generation_duration > 30:
                progress(0.75, desc="‚è≥ Gemini braucht l√§nger als erwartet...")
                time.sleep(0.5)
                progress(0.80, desc="‚è≥ Warte auf Gemini Response...")
                time.sleep(0.5)

            progress(0.85, desc="‚ú® Verarbeite Gemini-Response...")
            time.sleep(0.5)

            progress(0.95, desc="üíæ Speichere Newsletter in Google Drive...")
            time.sleep(0.5)

            generation_time = time.time() - start_time

            if newsletter_result:
                progress(1.0, desc="‚úÖ Newsletter erfolgreich generiert!")

                # Erfolgreiche Generation
                newsletter_content = newsletter_result["newsletter_content"]

                # Erweiterte Status-Message mit Details
                status_msg = f"""<div class="status-success">
                    ‚úÖ <strong>Newsletter erfolgreich generiert!</strong>
                    <br>‚è±Ô∏è <strong>Generation:</strong> {generation_time:.1f}s
                    <br>üß† <strong>Verarbeitung:</strong> {newsletter_result['sources_used']} Sources ‚Üí {newsletter_result['facts_integrated']} Facts ‚Üí {newsletter_result['word_count']} W√∂rter
                </div>"""

                # Erweiterte Processing-Info mit Timeline
                processing_info = f"""## üìä Generation Timeline & Details

### ‚ö° Performance
- **Gesamtzeit:** {generation_time:.1f} Sekunden
- **Status:** Erfolgreich abgeschlossen
- **Generation:** {datetime.now().strftime('%d.%m.%Y %H:%M:%S')}

### üìç Newsletter-Specs
- **Stadt:** {newsletter_result['location'].title()}
- **Stil:** {newsletter_result['newsletter_style'].title()}
- **Ziel-W√∂rter:** {self._get_word_target_for_style(newsletter_result['newsletter_style'])}
- **Tats√§chliche W√∂rter:** {newsletter_result['word_count']}
- **Zeichen:** {newsletter_result['char_count']:,}

### üß† Content-Verarbeitung
- **Sources verwendet:** {newsletter_result['sources_used']} verschiedene APIs
- **Facts integriert:** {newsletter_result['facts_integrated']} konkrete Informationen
- **Processing Method:** {newsletter_result['processing_method']}
- **Model:** {newsletter_result['model']}

### üìä Content-Pipeline
1. **üîç Content Loading:** 56,319 Zeichen aus 17 Sources
2. **üß† Fact Extraction:** {newsletter_result['facts_integrated']} strukturierte Facts
3. **üìù Prompt Generation:** Style-spezifischer Gemini-Prompt
4. **ü§ñ AI Generation:** Gemini 2.0 Flash Verarbeitung
5. **üíæ Persistence:** Gespeichert mit Audit-Trail

### üìÅ Archivierung
- **Query ID:** {newsletter_result.get('query_id', 'N/A')}
- **Datei:** {newsletter_result.get('newsletter_filename', 'N/A')}
- **Raw Response:** {newsletter_result.get('raw_filename', 'N/A')}"""

                return newsletter_content, status_msg, processing_info

            else:
                progress(1.0, desc="‚ùå Generation fehlgeschlagen")
                # Generation fehlgeschlagen
                error_status = f"""<div class="status-error">
                    ‚ùå <strong>Newsletter-Generation fehlgeschlagen!</strong>
                    <br>üìç <strong>Stadt:</strong> {location}
                    <br>‚è±Ô∏è <strong>Dauer:</strong> {generation_time:.1f}s
                    <br>üí° <strong>Tipp:</strong> Siehe Console f√ºr Details oder versuche anderen Style
                </div>"""

                return (
                    f"‚ùå **Fehler:** Newsletter-Generation f√ºr {location} fehlgeschlagen!\n\n**M√∂gliche Ursachen:**\n- API-Limit erreicht\n- Netzwerk-Problem\n- Content-Processing Fehler\n\n**Tipp:** Versuche es in ein paar Minuten erneut.",
                    error_status,
                    f"**Debug Info:**\n- Stadt: {location}\n- Style: {newsletter_style}\n- Dauer: {generation_time:.1f}s\n- Simple Content Processor: {'‚úÖ' if self.simple_gemini_worker.has_simple_content else '‚ùå'}"
                )

        except Exception as e:
            progress(1.0, desc="‚ùå System-Fehler aufgetreten")
            error_msg = f"""‚ùå **System Fehler w√§hrend Newsletter-Generation**

**Fehler:** {str(e)}

**Debug-Informationen:**
- **Stadt:** {location}
- **Style:** {newsletter_style}
- **Simple Content Processor:** {'‚úÖ' if self.simple_gemini_worker.has_simple_content else '‚ùå'}
- **Foundation System:** {'‚úÖ' if self.simple_gemini_worker.has_foundation else '‚ùå'}

**M√∂gliche L√∂sungen:**
1. Versuche es mit einem anderen Style
2. Pr√ºfe ob alle Worker verf√ºgbar sind
3. Siehe Console-Output f√ºr Details
4. Starte die Zellen 5a, 5b neu falls n√∂tig"""

            error_status = f"""<div class="status-error">
                ‚ùå <strong>System-Fehler!</strong>
                <br>üêõ <strong>Exception:</strong> {str(e)[:100]}...
                <br>üí° <strong>Siehe Details rechts f√ºr L√∂sungsvorschl√§ge</strong>
            </div>"""

            return (
                error_msg,
                error_status,
                f"""**Exception Details:**
```
{str(e)}
```

**System Status:**
- Simple Gemini Worker: {'‚úÖ' if self.simple_gemini_worker else '‚ùå'}
- Simple Content Processor: {'‚úÖ' if (self.simple_gemini_worker and self.simple_gemini_worker.has_simple_content) else '‚ùå'}"""
            )

    def generate_newsletter_ui_with_debug(self, location, categories, newsletter_style, progress=gr.Progress()):
        """Debug-Version der Newsletter-Generation mit mehr Logging"""

        print(f"\nüî• DEBUG: Newsletter Generation gestartet")
        print(f"üìç Location: {location}")
        print(f"üé® Style: {newsletter_style}")
        print(f"üß† Simple Content Processor: {self.simple_gemini_worker.has_simple_content if self.simple_gemini_worker else 'N/A'}")

        if not self.simple_gemini_worker:
            return (
                "‚ùå **DEBUG: Simple Gemini Worker nicht verf√ºgbar!**",
                '<div class="status-error">‚ùå <strong>DEBUG: Kein Worker!</strong></div>',
                "Simple Gemini Worker fehlt"
            )

        if not location:
            return (
                "‚ùå **DEBUG: Bitte w√§hle eine Stadt aus!**",
                '<div class="status-error">‚ùå <strong>DEBUG: Keine Stadt!</strong></div>',
                "Keine Stadt ausgew√§hlt"
            )

        try:
            # Schritt-f√ºr-Schritt mit Logging
            progress(0.1, desc="üöÄ DEBUG: Starte Generation...")
            print("‚úÖ Step 1: Initialisiert")
            time.sleep(1)

            progress(0.3, desc="üìä DEBUG: Pr√ºfe Simple Content Processor...")
            if self.simple_gemini_worker.has_simple_content:
                print("‚úÖ Step 2: Simple Content Processor verf√ºgbar")
            else:
                print("‚ö†Ô∏è Step 2: Simple Content Processor NICHT verf√ºgbar - Fallback Mode")
            time.sleep(1)

            progress(0.5, desc="ü§ñ DEBUG: Rufe Gemini Worker auf...")
            print("ü§ñ Step 3: Starte Simple Gemini Worker...")

            start_gemini = time.time()
            newsletter_result = self.simple_gemini_worker.generate_simple_newsletter(
                location=location.lower(),
                categories=categories,
                newsletter_style=newsletter_style
            )
            gemini_duration = time.time() - start_gemini

            progress(0.8, desc="‚úÖ DEBUG: Gemini fertig, verarbeite...")
            print(f"ü§ñ Step 4: Gemini fertig nach {gemini_duration:.1f}s")
            time.sleep(0.5)

            if newsletter_result:
                progress(1.0, desc="‚úÖ DEBUG: Komplett erfolgreich!")
                print("‚úÖ Step 5: Newsletter erfolgreich generiert")

                debug_info = f"""## üî• DEBUG SUCCESS

### ‚úÖ Generation erfolgreich!
- **Gemini-Zeit:** {gemini_duration:.1f}s
- **W√∂rter:** {newsletter_result['word_count']}
- **Sources:** {newsletter_result['sources_used']}
- **Facts:** {newsletter_result['facts_integrated']}

### üß† System-Status
- **Simple Gemini Worker:** ‚úÖ
- **Simple Content Processor:** {'‚úÖ' if self.simple_gemini_worker.has_simple_content else '‚ùå (Fallback-Mode)'}
- **Foundation System:** {'‚úÖ' if self.simple_gemini_worker.has_foundation else '‚ùå'}

### üì∞ Newsletter
{newsletter_result['newsletter_content']}"""

                return (
                    debug_info,
                    '<div class="status-success">‚úÖ <strong>DEBUG: Newsletter erfolgreich!</strong></div>',
                    f"Debug-Modus: Generation in {gemini_duration:.1f}s"
                )
            else:
                progress(1.0, desc="‚ùå DEBUG: Generation fehlgeschlagen")
                print("‚ùå Step 5: Newsletter-Generation FEHLGESCHLAGEN")

                return (
                    f"‚ùå **DEBUG: Newsletter-Generation fehlgeschlagen!**\n\n**Gemini-Zeit:** {gemini_duration:.1f}s\n**Siehe Console f√ºr Details**",
                    '<div class="status-error">‚ùå <strong>DEBUG: Generation failed!</strong></div>',
                    f"Debug-Modus: Fehlgeschlagen nach {gemini_duration:.1f}s"
                )

        except Exception as e:
            progress(1.0, desc="‚ùå DEBUG: Exception!")
            print(f"‚ùå EXCEPTION: {e}")
            import traceback
            traceback.print_exc()

            return (
                f"‚ùå **DEBUG EXCEPTION:** {str(e)}",
                '<div class="status-error">‚ùå <strong>DEBUG: Exception!</strong></div>',
                f"Exception: {str(e)}"
            )

    def _get_word_target_for_style(self, style):
        """Gibt Wort-Ziel f√ºr Style zur√ºck"""
        targets = {
            "compact": "250-350 W√∂rter",
            "standard": "400-600 W√∂rter",
            "detailed": "600-900 W√∂rter"
        }
        return targets.get(style, "400-600 W√∂rter")

    def generate_all_styles_ui(self, location, categories, progress=gr.Progress()):
        """
        Generiert alle 3 Newsletter-Styles f√ºr Vergleich mit detaillierten Updates

        Args:
            location: Ausgew√§hlte Stadt
            categories: Ausgew√§hlte Kategorien
            progress: Gradio Progress Bar

        Returns:
            tuple: (Compact, Standard, Detailed, Status, Processing-Info)
        """
        if not self.simple_gemini_worker:
            error_msg = "‚ùå **Fehler:** Simple Gemini Worker nicht verf√ºgbar!"
            return error_msg, error_msg, error_msg, "ERROR", ""

        if not location:
            error_msg = "‚ùå **Fehler:** Bitte w√§hle eine Stadt aus!"
            return error_msg, error_msg, error_msg, "ERROR", ""

        try:
            progress(0.05, desc="üöÄ Starte Batch-Generation f√ºr alle 3 Styles...")
            time.sleep(0.5)
            start_time = time.time()

            progress(0.15, desc="üìä Analysiere Content f√ºr Style-Vergleich...")
            time.sleep(0.5)

            progress(0.25, desc="üß† Bereite Content-Processing vor...")
            time.sleep(0.5)

            progress(0.35, desc="üìù COMPACT Newsletter (300 W√∂rter)...")
            time.sleep(1.0)

            progress(0.55, desc="üì∞ STANDARD Newsletter (500 W√∂rter)...")
            time.sleep(1.0)

            progress(0.75, desc="üìö DETAILED Newsletter (800+ W√∂rter)...")
            time.sleep(1.0)

            progress(0.85, desc="üîÑ Vergleiche alle Styles...")
            time.sleep(0.5)

            # Alle 3 Styles generieren
            all_styles_result = self.simple_gemini_worker.generate_all_newsletter_styles(
                location=location.lower()
            )

            progress(0.95, desc="üíæ Speichere alle Newsletter...")
            time.sleep(0.5)

            generation_time = time.time() - start_time

            if all_styles_result and all_styles_result.get("newsletters"):
                progress(1.0, desc="‚úÖ Alle Styles erfolgreich generiert!")

                newsletters = all_styles_result["newsletters"]

                # Einzelne Newsletter extrahieren
                compact_newsletter = newsletters.get("compact", {}).get("newsletter_content", "‚ùå **Compact Generation fehlgeschlagen**\n\nM√∂gliche Ursachen:\n- API-Limit erreicht\n- Netzwerk-Problem\n- Versuche einzelnen Newsletter-Generator")
                standard_newsletter = newsletters.get("standard", {}).get("newsletter_content", "‚ùå **Standard Generation fehlgeschlagen**\n\nM√∂gliche Ursachen:\n- API-Limit erreicht\n- Netzwerk-Problem\n- Versuche einzelnen Newsletter-Generator")
                detailed_newsletter = newsletters.get("detailed", {}).get("newsletter_content", "‚ùå **Detailed Generation fehlgeschlagen**\n\nM√∂gliche Ursachen:\n- API-Limit erreicht\n- Netzwerk-Problem\n- Versuche einzelnen Newsletter-Generator")

                # Status
                styles_generated = all_styles_result.get("styles_generated", 0)
                status_msg = f"""<div class="status-success">
                    ‚úÖ <strong>{styles_generated}/3 Newsletter-Styles generiert!</strong>
                    <br>‚è±Ô∏è <strong>Batch-Zeit:</strong> {generation_time:.1f}s
                    <br>üéØ <strong>Style-Vergleich:</strong> Compact | Standard | Detailed nebeneinander
                </div>"""

                # Processing Info mit Style-Details
                comparison = all_styles_result.get("comparison", {})
                processing_info = f"""## üé≠ Style-Vergleich Analyse

### ‚ö° Batch-Performance
- **Gesamtzeit:** {generation_time:.1f} Sekunden
- **Erfolgreiche Styles:** {styles_generated}/3
- **Durchschnitt/Style:** {generation_time/3:.1f}s

### üìä Style-Breakdown:"""

                for style in ["compact", "standard", "detailed"]:
                    if style in comparison and comparison[style]:
                        comp = comparison[style]
                        processing_info += f"""

**{style.upper()}:**
- **Ziel:** {self._get_word_target_for_style(style)}
- **Tats√§chlich:** {comp['word_count']} W√∂rter
- **Sources:** {comp['sources_used']} APIs
- **Facts:** {comp['facts_integrated']} konkrete Informationen"""
                    else:
                        processing_info += f"""

**{style.upper()}:** ‚ùå Generation fehlgeschlagen"""

                processing_info += f"""

### üí° Style-Unterschiede verstehen
- **COMPACT:** Fokus auf wichtigste Infos, schnell lesbar
- **STANDARD:** Ausgewogen, konkrete Details, professionell
- **DETAILED:** Alle verf√ºgbaren Facts, umfassend, journalistisch

### üìÅ Batch-Archivierung
- **Location:** {location.title()}
- **Timestamp:** {datetime.now().strftime('%d.%m.%Y %H:%M:%S')}
- **Processing Method:** Simple Direct Content Integration"""

                return compact_newsletter, standard_newsletter, detailed_newsletter, status_msg, processing_info

            else:
                progress(1.0, desc="‚ùå Batch-Generation fehlgeschlagen")
                error_msg = f"""‚ùå **Batch-Generation fehlgeschlagen!**

**Debug-Info:**
- **Stadt:** {location}
- **Dauer:** {generation_time:.1f}s
- **Status:** Keine Newsletter generiert

**M√∂gliche Ursachen:**
- Alle API-Limits erreicht
- System-√úberlastung
- Netzwerk-Problem

**L√∂sung:** Versuche einzelne Newsletter im ersten Tab."""

                error_status = f"""<div class="status-error">
                    ‚ùå <strong>Batch-Generation fehlgeschlagen!</strong>
                    <br>‚è±Ô∏è <strong>Nach:</strong> {generation_time:.1f}s
                    <br>üí° <strong>Versuche einzelne Newsletter im ersten Tab</strong>
                </div>"""

                return error_msg, error_msg, error_msg, error_status, "**Batch-Fehler:** Siehe Console f√ºr Details"

        except Exception as e:
            progress(1.0, desc="‚ùå Batch-System-Fehler")
            error_msg = f"""‚ùå **Batch-System Fehler:** {str(e)}

**Versuche einzelne Newsletter im ersten Tab!**"""

            error_status = f"""<div class="status-error">
                ‚ùå <strong>Batch-System Fehler!</strong>
                <br>üêõ <strong>Exception:</strong> {str(e)[:50]}...
            </div>"""

            return error_msg, error_msg, error_msg, error_status, f"Exception: {str(e)}"

    def get_recent_newsletters_ui(self, location):
        """
        Zeigt k√ºrzlich generierte Newsletter f√ºr eine Stadt

        Args:
            location: Stadt-Filter

        Returns:
            str: Formatierte Liste der Newsletter
        """
        if not self.simple_gemini_worker:
            return "‚ùå Simple Gemini Worker nicht verf√ºgbar"

        try:
            summary = self.simple_gemini_worker.get_simple_newsletter_summary(
                location=location.lower() if location else None
            )

            if not summary["newsletters"]:
                return f"üì≠ Noch keine Newsletter f√ºr {location if location else 'alle St√§dte'} generiert."

            recent_info = f"""**Newsletter-Historie f√ºr {location.title() if location else 'Alle St√§dte'}:**

**Gesamt-Statistiken:**
- **Total Newsletter:** {summary['total_newsletters']}
- **St√§dte:** {', '.join([loc.title() for loc in summary['locations']])}
- **Styles:** {', '.join([style.title() for style in summary['newsletter_styles']])}
- **Gesamt-W√∂rter:** {summary['total_words']:,}
- **√ò Facts/Newsletter:** {summary['avg_facts_per_newsletter']:.1f}

**K√ºrzliche Newsletter:**"""

            # Neueste 5 Newsletter
            recent_newsletters = sorted(
                summary["newsletters"],
                key=lambda x: x["timestamp"],
                reverse=True
            )[:5]

            for i, newsletter in enumerate(recent_newsletters, 1):
                timestamp = datetime.fromisoformat(newsletter["timestamp"]).strftime("%d.%m.%Y %H:%M")
                recent_info += f"""
{i}. **{newsletter['location'].title()} ({newsletter['newsletter_style'].title()})** - {timestamp}
   - {newsletter['word_count']} W√∂rter, {newsletter['sources_used']} Sources, {newsletter['facts_integrated']} Facts"""

            return recent_info

        except Exception as e:
            return f"‚ùå Fehler beim Laden der Historie: {str(e)}"

    def create_gradio_interface(self):
        """
        Erstellt Gradio Interface

        Returns:
            gr.Blocks: Gradio Interface
        """
        with gr.Blocks(
            title="üì∞ Newsletter Generator",
            theme=gr.themes.Soft(),
            css="""
                .main-header {
                    text-align: center;
                    margin-bottom: 30px;
                }
                .status-success {
                    color: green;
                    font-weight: bold;
                    padding: 15px;
                    background: #d4edda;
                    border: 1px solid #c3e6cb;
                    border-radius: 5px;
                    margin: 10px 0;
                }
                .status-error {
                    color: #721c24;
                    font-weight: bold;
                    padding: 15px;
                    background: #f8d7da;
                    border: 1px solid #f5c6cb;
                    border-radius: 5px;
                    margin: 10px 0;
                }
                .processing-info {
                    background: #f0f0f0;
                    padding: 15px;
                    border-radius: 8px;
                    margin-top: 10px;
                    border: 1px solid #ddd;
                }
                .newsletter-content {
                    min-height: 400px;
                    border: 1px solid #ddd;
                    border-radius: 8px;
                }
                .compact-newsletter, .standard-newsletter, .detailed-newsletter {
                    min-height: 350px;
                    max-height: 500px;
                    overflow-y: auto;
                    border: 1px solid #ddd;
                    border-radius: 8px;
                    padding: 10px;
                }
                /* Fix f√ºr Progress Bar Sichtbarkeit */
                .progress-container {
                    width: 100% !important;
                    margin: 10px 0 !important;
                    min-height: 30px !important;
                }
                .progress-bar {
                    height: 25px !important;
                    border-radius: 5px !important;
                }
            """
        ) as interface:

            # Header
            gr.HTML("""
                <div class="main-header">
                    <h1>üì∞ Lokaler Newsletter Generator</h1>
                    <p>Generiere professionelle Newsletter f√ºr deutsche St√§dte mit KI-Power!</p>
                </div>
            """)

            with gr.Tabs():

                # Tab 1: Einzelner Newsletter
                with gr.Tab("üìù Newsletter Generator", elem_id="single_tab"):

                    with gr.Row():
                        with gr.Column(scale=1):
                            gr.HTML("<h3>üéõÔ∏è Einstellungen</h3>")

                            location_input = gr.Dropdown(
                                choices=self.available_locations,
                                label="üèôÔ∏è Stadt/Region",
                                value="m√ºnchen",
                                info="W√§hle eine Stadt f√ºr deinen Newsletter"
                            )

                            categories_input = gr.CheckboxGroup(
                                choices=self.available_categories,
                                label="üìã Kategorien",
                                value=self.available_categories[:4],  # Erste 4 als Standard
                                info="W√§hle interessante Kategorien (aktuell werden alle verf√ºgbaren verwendet)"
                            )

                            style_input = gr.Radio(
                                choices=self.newsletter_styles,
                                label="üé® Newsletter-Stil",
                                value="standard",
                                info="Compact: 300 W√∂rter | Standard: 500 W√∂rter | Detailed: 800+ W√∂rter"
                            )

                            generate_btn = gr.Button(
                                "üöÄ Newsletter Generieren",
                                variant="primary",
                                size="lg"
                            )

                            # Debug-Button f√ºr Troubleshooting
                            debug_btn = gr.Button(
                                "üî• DEBUG Newsletter (bei Problemen)",
                                variant="secondary",
                                size="sm"
                            )

                        with gr.Column(scale=2):
                            gr.HTML("<h3>üì∞ Generated Newsletter</h3>")

                            newsletter_output = gr.Markdown(
                                label="Newsletter",
                                value="üëà W√§hle eine Stadt und klicke 'Newsletter Generieren'",
                                elem_classes=["newsletter-content"],
                                show_label=True,
                                container=True
                            )

                    # Status in separater Row mit voller Breite
                    with gr.Row():
                        status_output = gr.HTML(
                            label="Generation Status",
                            visible=True
                        )

                    # Processing Info in separater Row
                    with gr.Row():
                        processing_info = gr.Markdown(
                            label="üìä Generation Details",
                            visible=False,
                            show_label=True,
                            container=True
                        )

                    # Event Handler f√ºr normalen Newsletter
                    generate_btn.click(
                        fn=self.generate_newsletter_ui,
                        inputs=[location_input, categories_input, style_input],
                        outputs=[newsletter_output, status_output, processing_info]
                    ).then(
                        fn=lambda: gr.update(visible=True),
                        outputs=[processing_info]
                    )

                    # Event Handler f√ºr Debug-Newsletter
                    debug_btn.click(
                        fn=self.generate_newsletter_ui_with_debug,
                        inputs=[location_input, categories_input, style_input],
                        outputs=[newsletter_output, status_output, processing_info]
                    ).then(
                        fn=lambda: gr.update(visible=True),
                        outputs=[processing_info]
                    )

                # Tab 2: Alle 3 Styles
                with gr.Tab("üé≠ Style-Vergleich", elem_id="comparison_tab"):

                    with gr.Row():
                        comparison_location = gr.Dropdown(
                            choices=self.available_locations,
                            label="üèôÔ∏è Stadt f√ºr Style-Vergleich",
                            value="m√ºnchen"
                        )

                        comparison_categories = gr.CheckboxGroup(
                            choices=self.available_categories,
                            label="üìã Kategorien",
                            value=self.available_categories[:3]
                        )

                        compare_btn = gr.Button(
                            "üé≠ Alle Styles Generieren",
                            variant="primary"
                        )

                    # Comparison Results mit besserer Sichtbarkeit
                    with gr.Row():
                        with gr.Column():
                            gr.HTML("<h4>üìÑ Compact Newsletter</h4>")
                            compact_output = gr.Markdown(
                                value="Warte auf Generation...",
                                elem_classes=["compact-newsletter"],
                                show_label=False,
                                container=True,
                                height=400
                            )

                        with gr.Column():
                            gr.HTML("<h4>üì∞ Standard Newsletter</h4>")
                            standard_output = gr.Markdown(
                                value="Warte auf Generation...",
                                elem_classes=["standard-newsletter"],
                                show_label=False,
                                container=True,
                                height=400
                            )

                        with gr.Column():
                            gr.HTML("<h4>üìö Detailed Newsletter</h4>")
                            detailed_output = gr.Markdown(
                                value="Warte auf Generation...",
                                elem_classes=["detailed-newsletter"],
                                show_label=False,
                                container=True,
                                height=400
                            )

                    # Status und Processing Info mit besserer Sichtbarkeit
                    with gr.Row():
                        comparison_status = gr.HTML(
                            show_label=True,
                            container=True
                        )

                    with gr.Row():
                        comparison_processing = gr.Markdown(
                            visible=False,
                            show_label=True,
                            container=True
                        )

                    # Event Handler f√ºr Style-Vergleich
                    compare_btn.click(
                        fn=self.generate_all_styles_ui,
                        inputs=[comparison_location, comparison_categories],
                        outputs=[compact_output, standard_output, detailed_output, comparison_status, comparison_processing]
                    ).then(
                        fn=lambda: gr.update(visible=True),
                        outputs=[comparison_processing]
                    )

                # Tab 3: Newsletter-Historie
                with gr.Tab("üìö Historie", elem_id="history_tab"):

                    with gr.Row():
                        history_location = gr.Dropdown(
                            choices=[""] + self.available_locations,
                            label="üèôÔ∏è Stadt Filter (leer = alle)",
                            value=""
                        )

                        refresh_btn = gr.Button("üîÑ Aktualisieren")

                    history_output = gr.Markdown(
                        value="üì≠ Noch keine Newsletter generiert",
                        label="Newsletter-Historie"
                    )

                    # Event Handler f√ºr Historie
                    refresh_btn.click(
                        fn=self.get_recent_newsletters_ui,
                        inputs=[history_location],
                        outputs=[history_output]
                    )

                    # Auto-load Historie beim Tab-Wechsel
                    history_location.change(
                        fn=self.get_recent_newsletters_ui,
                        inputs=[history_location],
                        outputs=[history_output]
                    )

            # Footer
            gr.HTML("""
                <div style="text-align: center; margin-top: 50px; padding: 20px; background: #f8f9fa; border-radius: 10px;">
                    <h4>üöÄ Newsletter System Info</h4>
                    <p><strong>Technologie:</strong> Simple Gemini Worker + Simple Content Processor + Foundation System</p>
                    <p><strong>APIs:</strong> Firecrawl (Scraping) + Claude (Web Search) + Perplexity (Cross-Validation) + Gemini (Generation)</p>
                    <p><strong>Features:</strong> Multi-Source Content Integration ‚Ä¢ 3 Newsletter-Levels ‚Ä¢ Fact-Extraction ‚Ä¢ Audit-Trail</p>
                </div>
            """)

        return interface

    def launch_ui(self, **launch_kwargs):
        """
        Startet Gradio Interface

        Args:
            **launch_kwargs: Gradio launch Parameter
        """
        interface = self.create_gradio_interface()

        default_kwargs = {
            "server_name": "0.0.0.0",
            "server_port": 7860,
            "share": True,
            "show_api": False,
            "quiet": False
        }

        # Merge mit user kwargs
        launch_config = {**default_kwargs, **launch_kwargs}

        print("üöÄ Starte Newsletter UI...")
        print(f"üìä Konfiguration: {launch_config}")

        interface.launch(**launch_config)

# =============================================================================
# UI INITIALISIERUNG
# =============================================================================

# Newsletter UI initialisieren
worker_available = 'simple_gemini_worker' in globals() and simple_gemini_worker
config_available = 'config_manager' in globals() and config_manager

if worker_available:
    newsletter_ui = NewsletterUI(
        simple_gemini_worker=simple_gemini_worker,
        config_manager=config_manager if config_available else None
    )

    print("‚úÖ Newsletter UI bereit!")
    print("üì± Features: Newsletter Generator + Style-Vergleich + Historie")
    print("üéØ Ready to launch mit: newsletter_ui.launch_ui()")

else:
    print("‚ùå Newsletter UI nicht verf√ºgbar - Simple Gemini Worker fehlt")
    newsletter_ui = None

# =============================================================================
# QUICK LAUNCH FUNCTION
# =============================================================================

def launch_newsletter_ui(share=True, port=7860):
    """
    Quick Launch Function f√ºr Newsletter UI

    Args:
        share: √ñffentlicher Gradio Link
        port: Server Port
    """
    if newsletter_ui:
        print("üöÄ Starte Newsletter UI...")
        newsletter_ui.launch_ui(
            share=share,
            server_port=port,
            show_api=False
        )
    else:
        print("‚ùå Newsletter UI nicht verf√ºgbar!")

print("\n" + "="*60)
print("üì± NEWSLETTER GRADIO UI BEREIT!")
print("="*60)
print("üöÄ Starten mit: launch_newsletter_ui()")
print("üåê Oder detailliert: newsletter_ui.launch_ui(share=True)")
print("="*60)

‚úÖ Newsletter UI initialisiert
üìç Locations: 5
üìã Categories: 6
‚úÖ Newsletter UI bereit!
üì± Features: Newsletter Generator + Style-Vergleich + Historie
üéØ Ready to launch mit: newsletter_ui.launch_ui()

üì± NEWSLETTER GRADIO UI BEREIT!
üöÄ Starten mit: launch_newsletter_ui()
üåê Oder detailliert: newsletter_ui.launch_ui(share=True)


In [17]:
# Finde freie Ports:
import socket

def find_free_port(start_port=7860):
    for port in range(start_port, start_port + 100):
        try:
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
                s.bind(('localhost', port))
                return port
        except OSError:
            continue
    return None

free_port = find_free_port()
print(f"Freier Port: {free_port}")

# Dann mit freiem Port starten:
launch_newsletter_ui(port=free_port)

Freier Port: 7860
üöÄ Starte Newsletter UI...
üöÄ Starte Newsletter UI...
üìä Konfiguration: {'server_name': '0.0.0.0', 'server_port': 7860, 'share': True, 'show_api': False, 'quiet': False}
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0bb650cb0d1410b4f7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [21]:
# Pr√ºfe Gemini API-Key
print(f"Gemini API Key vorhanden: {'‚úÖ' if api_config.gemini_key else '‚ùå'}")
if api_config.gemini_key:
    print(f"Key length: {len(api_config.gemini_key)}")
    print(f"Key starts with: {api_config.gemini_key[:10]}...")

Gemini API Key vorhanden: ‚úÖ
Key length: 39
Key starts with: AIzaSyCDUV...


In [22]:
# Teste mit anderem Model
try:
    import google.generativeai as genai
    test_model = genai.GenerativeModel("gemini-2.5-flash")  # Anderes Model

    response = test_model.generate_content("Test")
    print(f"‚úÖ Alternative Model funktioniert: {response.text}")

except Exception as e:
    print(f"‚ùå Auch alternatives Model fehlt: {e}")

‚úÖ Alternative Model funktioniert: Received! How can I help you today?


In [20]:
# Test mit Timeout
import signal

def timeout_handler(signum, frame):
    raise TimeoutError("Gemini API Timeout!")

signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(10)  # 10 Sekunden Timeout

try:
    print("ü§ñ Teste mit 10s Timeout...")
    response = simple_gemini_worker.model.generate_content("Test")
    signal.alarm(0)  # Cancel timeout

    if response:
        print("‚úÖ Gemini funktioniert!")
    else:
        print("‚ùå Keine Response")

except TimeoutError:
    print("‚ùå GEMINI API TIMEOUT - API antwortet nicht!")
except Exception as e:
    print(f"‚ùå Other Error: {e}")

ü§ñ Teste mit 10s Timeout...
‚úÖ Gemini funktioniert!
