## Imports

In [1]:
from ape_optimizer import APEOptimizer
import sys
from pathlib import Path
import json
import os
from dotenv import load_dotenv
from datetime import datetime


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load environment variables
load_dotenv()

# Add current directory to path (assuming ape_optimizer.py is in same directory)
sys.path.append('.')

# Import APE Optimizer

# ===== KONFIGURATION =====
# Pfade anpassen falls nötig
USER_DATA_DIR = Path("output_directory/users")  
RESULTS_DIR = Path("../results/ape_optimization")

# Test User
TEST_USER_ID = "534023.0"

# Parameter für Test
NUM_CANDIDATES = 3  # Weniger für schnelleren Test
NUM_EVALUATION_TWEETS = 5  # Weniger für schnelleren Test

print(f"🧪 APE Optimizer Test Setup")
print(f"User Data Dir: {USER_DATA_DIR}")
print(f"Results Dir: {RESULTS_DIR}")
print(f"Test User: {TEST_USER_ID}")
print(f"Candidates: {NUM_CANDIDATES}")
print(f"Evaluation Tweets: {NUM_EVALUATION_TWEETS}")

# ===== PRÜFE OB DATEN VORHANDEN =====
user_file = USER_DATA_DIR / f"{TEST_USER_ID}.jsonl"
print(f"\nChecking data availability...")
print(f"User file exists: {user_file.exists()}")

if user_file.exists():
    # Prüfe Dateiinhalt
    try:
        with open(user_file, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        print(f"User file has {len(lines)} lines")
        
        # Check for history and holdout sets
        has_history = False
        has_holdout = False
        for line in lines:
            if line.strip():
                data = json.loads(line)
                if data.get("set") == "history":
                    has_history = True
                    history_count = len(data.get("tweets", []))
                    print(f"History tweets: {history_count}")
                elif data.get("set") == "holdout":
                    has_holdout = True
                    holdout_count = len(data.get("tweets", []))
                    print(f"Holdout tweets: {holdout_count}")
        
        print(f"Has history: {has_history}")
        print(f"Has holdout: {has_holdout}")
        
    except Exception as e:
        print(f"Error reading user file: {e}")
else:
    print(f"❌ User file not found: {user_file}")
    print(f"Available users:")
    if USER_DATA_DIR.exists():
        for file in USER_DATA_DIR.glob("*.jsonl"):
            print(f"  - {file.stem}")
    else:
        print(f"  User data directory doesn't exist!")

# ===== STARTE APE OPTIMIERUNG =====
print(f"\n{'='*60}")
print(f"🚀 STARTING APE OPTIMIZATION TEST")
print(f"{'='*60}")

try:
    # Initialisiere APE Optimizer
    optimizer = APEOptimizer(
        user_data_dir=USER_DATA_DIR,
        results_dir=RESULTS_DIR,
        api_key=None  # Verwendet GOOGLE_API_KEY aus .env
    )
    
    print(f"✅ APE Optimizer initialized successfully")
    
    # Führe APE Optimierung aus
    print(f"\n🔄 Running APE optimization for user {TEST_USER_ID}...")
    
    best_persona, best_score, detailed_results = optimizer.run_ape_optimization(
        user_id=TEST_USER_ID,
        num_candidates=NUM_CANDIDATES,
        num_evaluation_tweets=NUM_EVALUATION_TWEETS
    )
    
    # ===== ERGEBNISSE ANZEIGEN =====
    if best_persona:
        print(f"\n🎉 APE OPTIMIZATION SUCCESSFUL!")
        print(f"{'='*60}")
        
        print(f"\n📊 RESULTS SUMMARY:")
        print(f"Best Score: {best_score:.4f}")
        print(f"Generated Candidates: {detailed_results['num_candidates_generated']}")
        print(f"Evaluation Tweets Used: {detailed_results['num_evaluation_tweets']}")
        
        print(f"\n🏆 BEST PERSONA:")
        print(f"{best_persona}")
        
        print(f"\n📈 ALL CANDIDATES PERFORMANCE:")
        for i, candidate in enumerate(detailed_results['all_candidates']):
            rank = candidate['final_rank']
            score = candidate['score']
            persona_preview = candidate['persona'][:100] + "..."
            print(f"  Rank {rank}: Score {score:.4f}")
            print(f"    {persona_preview}")
            print()
        
        print(f"📊 STATISTICS:")
        stats = detailed_results['statistics']
        print(f"  Mean Score: {stats['mean_score']:.4f}")
        print(f"  Std Dev: {stats['std_score']:.4f}")
        print(f"  Min Score: {stats['min_score']:.4f}")
        print(f"  Max Score: {stats['max_score']:.4f}")
        
        # Speichere Ergebnisse in Variablen für weitere Nutzung
        test_results = {
            'best_persona': best_persona,
            'best_score': best_score,
            'detailed_results': detailed_results
        }
        
        print(f"\n💾 Results saved to:")
        print(f"  - Individual: {RESULTS_DIR}/ape_results_{TEST_USER_ID}.json")
        print(f"  - Master: {RESULTS_DIR}/ape_optimization_results.json")
        
    else:
        print(f"\n❌ APE OPTIMIZATION FAILED!")
        print(f"No valid persona could be generated or evaluated for user {TEST_USER_ID}")
        test_results = None

except Exception as e:
    print(f"\n💥 ERROR DURING APE OPTIMIZATION:")
    print(f"Error: {e}")
    import traceback
    traceback.print_exc()
    test_results = None

# ===== OPTIONAL: WEITERE ANALYSE =====
if 'test_results' in locals() and test_results:
    print(f"\n🔍 OPTIONAL: Detailed Analysis Available")
    print(f"Run the following commands for more details:")
    print(f"  - test_results['best_persona']  # Best persona text")
    print(f"  - test_results['best_score']    # Best score")
    print(f"  - test_results['detailed_results']['all_candidates']  # All candidates")
    
    # Beispiel für detaillierte Kandidaten-Analyse
    print(f"\n📋 CANDIDATE COMPARISON:")
    for candidate in test_results['detailed_results']['all_candidates']:
        print(f"Score {candidate['score']:.4f}: {candidate['persona'][:80]}...")

print(f"\n✅ Test completed!")

🧪 APE Optimizer Test Setup
User Data Dir: output_directory\users
Results Dir: ..\results\ape_optimization
Test User: 534023.0
Candidates: 3
Evaluation Tweets: 5

Checking data availability...
User file exists: True
User file has 2 lines
History tweets: 100
Holdout tweets: 100
Has history: True
Has holdout: True

🚀 STARTING APE OPTIMIZATION TEST
✓ punkt already available
Downloading wordnet...
✓ wordnet downloaded successfully
Downloading omw-1.4...
✓ omw-1.4 downloaded successfully
✓ stopwords already available
NLTK data check completed.
APE Optimizer initialized successfully
✅ APE Optimizer initialized successfully

🔄 Running APE optimization for user 534023.0...

STARTING APE OPTIMIZATION FOR USER: 534023.0
Loaded 100 history tweets and 100 holdout tweets

Step 1: Generating 3 persona candidates...
Starte Generierung von Persona-Prompt-Kandidaten für Nutzer: 534023.0

--- SENDE PROMPT AN GOOGLE GEMINI API (JSON Modus) ---
--- WARTE AUF ANTWORT VON GEMINI API ... ---
--- ANTWORT VON G