In [None]:
# All Scraper Comparison with Domain-Based Processing
# Standardized version with consistent evaluation across all scrapers
# Domain-based evaluation with weighted scoring: 1 point for single indicator, 2 points for both

import pymongo
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from collections import defaultdict
from urllib.parse import urlparse
from IPython.display import HTML, display
import os
from matplotlib.figure import Figure
from matplotlib.backends.backend_agg import FigureCanvasAgg
from PIL import Image
import io
import base64
import re

# Set matplotlib backend and font configuration
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial', 'DejaVu Sans', 'Liberation Sans', 'Helvetica']

# --- CONFIGURATION ---
MONGO_HOST = 'mongo'
MONGO_PORT = 27017
MONGO_USER = 'admin'
MONGO_PASS = 'changeme'
MONGO_DB_NAME = 'tasks'
COLLECTIONS_TO_ANALYZE = ['network_scraper', 'api_scraper', 'term_scraper']

# --- TASK SELECTION ---
TARGET_TASK_NAMES = []  # Empty means analyze all tasks

def get_main_domain_from_url(url):
    """Extract main domain from URL (consistent with scrapers)"""
    try:
        parsed_url = urlparse(url)
        netloc = parsed_url.netloc
        
        subdomains_to_remove = ["sso.", "idp.", "login.", "www."]
        for subdomain in subdomains_to_remove:
            if netloc.startswith(subdomain):
                netloc = netloc[len(subdomain):]
        
        parts = netloc.split(".")
        if len(parts) > 2:
            netloc = ".".join(parts[-2:])
        
        return f"{parsed_url.scheme}://{netloc}"
    except Exception:
        return url

def detect_secure_specific_indicators(url_data, collection_name):
    """
    Detect SECURE SPECIFIC indicators (API calls, network requests with clear passkey patterns)
    These are the most reliable passkey indicators specific to each scraper type.
    
    Returns True if domain has secure specific indicators, False otherwise.
    """
    if collection_name == 'api_scraper':
        # Secure specific: API calls with publicKey
        return (url_data.get('api_calls_public_key') and 
                isinstance(url_data['api_calls_public_key'], list) and 
                len(url_data['api_calls_public_key']) > 0)
    
    elif collection_name == 'term_scraper':
        # Secure specific: Terms with publicKey parameter
        return (url_data.get('navigator_credentials_get({publickey_', False) or
                url_data.get('navigator_credentials_create({publickey_', False))
    
    elif collection_name == 'network_scraper':
        # Secure specific: Secure network requests/responses
        secure_requests = url_data.get('secure_passkey_requests', [])
        secure_responses = url_data.get('secure_passkey_responses', [])
        return ((isinstance(secure_requests, list) and len(secure_requests) > 0) or
                (isinstance(secure_responses, list) and len(secure_responses) > 0))
    
    return False

def detect_secure_html_indicators(url_data, collection_name):
    """
    Detect SECURE HTML indicators (HTML elements found on page)
    These are consistent across all scrapers: webauthn inputs and passkey buttons.
    
    Returns True if domain has secure HTML indicators, False otherwise.
    """
    return (url_data.get('webauthn_input_found', False) or 
            url_data.get('passkey_button_found', False))

def detect_possible_indicators(url_data, collection_name):
    """
    Detect POSSIBLE indicators (less reliable indicators that might indicate passkey support)
    These vary by scraper type but represent uncertain/partial evidence.
    
    Returns True if domain has possible indicators, False otherwise.
    """
    if collection_name == 'api_scraper':
        # Possible: General credential API calls or identity calls
        api_calls_cred = url_data.get('api_calls_credentials_get', [])
        api_calls_id = url_data.get('api_calls_identity', [])
        return ((isinstance(api_calls_cred, list) and len(api_calls_cred) > 0) or
                (isinstance(api_calls_id, list) and len(api_calls_id) > 0))
    
    elif collection_name == 'term_scraper':
        # Possible: General navigator.credentials calls without publicKey
        return (url_data.get('navigator_credentials_get(', False) or
                url_data.get('navigator_credentials_create(', False) or
                url_data.get('autocomplete_webauthn', False) or
                url_data.get('startauthentication', False) or
                url_data.get('isuserverifyingplatformauthenticatoravailable', False))
    
    elif collection_name == 'network_scraper':
        # Possible: Non-secure network patterns
        possible_req = url_data.get('possible_passkey_requests', [])
        possible_resp = url_data.get('possible_passkey_responses', [])
        passkey_req = url_data.get('passkey_requests', [])
        passkey_resp = url_data.get('passkey_responses', [])
        patterns = url_data.get('passkey_patterns_detected', False)
        
        return ((isinstance(possible_req, list) and len(possible_req) > 0) or
                (isinstance(possible_resp, list) and len(possible_resp) > 0) or
                (isinstance(passkey_req, list) and len(passkey_req) > 0) or
                (isinstance(passkey_resp, list) and len(passkey_resp) > 0) or
                patterns)
    
    return False

def detect_fedcm_indicators(url_data, collection_name):
    """
    Detect FedCM (Federated Credential Management) indicators
    These are specific to identity/federated authentication.
    
    Returns True if domain has FedCM indicators, False otherwise.
    """
    if collection_name == 'api_scraper':
        # FedCM: Identity API calls
        api_calls_id = url_data.get('api_calls_identity', [])
        return (isinstance(api_calls_id, list) and len(api_calls_id) > 0)
    
    elif collection_name == 'term_scraper':
        # FedCM: Terms with identity or federated parameters
        return (url_data.get('navigator_credentials_get({identity_', False) or
                url_data.get('navigator_credentials_create({identity_', False) or
                url_data.get('navigator_credentials_get({federated_', False) or
                url_data.get('navigator_credentials_create({federated_', False))
    
    elif collection_name == 'network_scraper':
        # FedCM: Network FedCM detections
        fedcm_detections = url_data.get('fedcm_detections', [])
        return (isinstance(fedcm_detections, list) and len(fedcm_detections) > 0)
    
    return False

def evaluate_domain_classification(domain_data, collection_name):
    """
    STANDARDIZED domain classification logic used across all scrapers.
    Each domain is classified into exactly ONE category based on priority:
    1. Secure Both (HTML + Specific) - highest priority
    2. Secure HTML Only
    3. Secure Specific Only  
    4. No Secure Indicators Found - lowest priority
    
    Args:
        domain_data: List of URL data for the domain
        collection_name: Name of the collection (scraper type)
        
    Returns:
        dict: Classification results with flags and indicators found
    """
    # Check all URLs in domain for any indicators
    has_secure_specific = False
    has_secure_html = False
    has_possible = False  
    has_fedcm = False
    
    for url_data in domain_data:
        if detect_secure_specific_indicators(url_data, collection_name):
            has_secure_specific = True
        if detect_secure_html_indicators(url_data, collection_name):  
            has_secure_html = True
        if detect_possible_indicators(url_data, collection_name):
            has_possible = True
        if detect_fedcm_indicators(url_data, collection_name):
            has_fedcm = True
    
    # Classify domain based on secure indicators (HTML + Specific combination)
    classification = {
        'secure_html_only': False,
        'secure_specific_only': False, 
        'secure_both': False,
        'no_secure_found': False,
        'has_possible': has_possible,
        'has_fedcm': has_fedcm,
        'weighted_score': 0  # 1 point for single, 2 points for both
    }
    
    if has_secure_html and has_secure_specific:
        classification['secure_both'] = True
        classification['weighted_score'] = 2
    elif has_secure_html:
        classification['secure_html_only'] = True  
        classification['weighted_score'] = 1
    elif has_secure_specific:
        classification['secure_specific_only'] = True
        classification['weighted_score'] = 1
    else:
        classification['no_secure_found'] = True
        classification['weighted_score'] = 0
    
    return classification

def load_and_process_collection_data(db, collection_name, target_tasks):
    """Load data from a collection and process it for domain-based analysis"""
    collection = db[collection_name]
    
    # Build base query
    base_query = {}
    if target_tasks:
        base_query['result.task_name'] = {'$in': target_tasks}
    
    data = list(collection.find(base_query))
    if not data:
        print(f"No documents found in '{collection_name}'.")
        return pd.DataFrame(), {}

    normalized_data = []
    for doc in data:
        if 'result' in doc and doc['result'] is not None:
            if isinstance(doc['result'], dict):
                flat_doc = pd.json_normalize(doc['result']).to_dict(orient='records')[0]
                
                # Handle NaN values in boolean columns
                bool_columns = (
                    ['webauthn_input_found', 'passkey_button_found', 'error'] +
                    [col for col in flat_doc.keys() if isinstance(col, str) and col.endswith('_found')]
                )
                for bool_col in bool_columns:
                    if bool_col in flat_doc and pd.isna(flat_doc[bool_col]):
                        flat_doc[bool_col] = False
                
                normalized_data.append(flat_doc)
    
    df = pd.DataFrame(normalized_data)
    if df.empty:
        return df, {}
    
    print(f"{len(df)} documents loaded from '{collection_name}'.")
    
    # Group by domain (url_id)
    domain_groups = defaultdict(list)
    for _, row in df.iterrows():
        domain = row.get('url_id', 'unknown')
        domain_groups[domain].append(row)
    
    return df, domain_groups

def count_indicators_for_collection(domain_groups, collection_name):
    """
    STANDARDIZED indicator counting using the new classification logic.
    Count domains in each category using consistent evaluation criteria.
    """
    if not domain_groups:
        return {
            'secure_html_only': 0,
            'secure_specific_only': 0,
            'secure_both': 0,
            'total_secure': 0,
            'no_secure_found': 0,
            'possible': 0,
            'fedcm': 0,
            'weighted_total': 0
        }
    
    # Counters for each classification
    secure_html_only = 0
    secure_specific_only = 0
    secure_both = 0
    possible_count = 0
    fedcm_count = 0
    weighted_total = 0
    
    for domain, domain_data in domain_groups.items():
        classification = evaluate_domain_classification(domain_data, collection_name)
        
        if classification['secure_html_only']:
            secure_html_only += 1
        elif classification['secure_specific_only']:
            secure_specific_only += 1
        elif classification['secure_both']:
            secure_both += 1
            
        if classification['has_possible']:
            possible_count += 1
        if classification['has_fedcm']:
            fedcm_count += 1
            
        weighted_total += classification['weighted_score']
    
    total_secure = secure_html_only + secure_specific_only + secure_both
    no_secure_found = len(domain_groups) - total_secure
    
    return {
        'secure_html_only': secure_html_only,
        'secure_specific_only': secure_specific_only,
        'secure_both': secure_both,
        'total_secure': total_secure,
        'no_secure_found': no_secure_found,
        'possible': possible_count,
        'fedcm': fedcm_count,
        'weighted_total': weighted_total
    }

def get_timeout_compliance_data(db, collections, target_tasks):
    """Collects and categorizes timeout compliance data for all tasks."""
    
    timeout_data = []
    base_query = {'result.task_name': {'$in': target_tasks}} if target_tasks else {}
    
    for col_name in collections:
        collection = db[col_name]
        task_names_in_col = collection.distinct('result.task_name', base_query)
        
        for task_name in task_names_in_col:
            task_query = {'result.task_name': task_name}
            docs = collection.find(task_query, {'result.duration_seconds': 1, 'result.timeout': 1})
            
            counts = {"0-50%": 0, "50-100%": 0, "100-150%": 0, ">150%": 0}
            total_docs = 0
            timeout_val = None
            total_duration = 0
            
            # Find timeout value for this task
            first_doc = collection.find_one(task_query, {'result.timeout': 1})
            if first_doc and 'result' in first_doc and 'timeout' in first_doc['result']:
                timeout_val = first_doc['result']['timeout']
                if timeout_val is not None and timeout_val > 0:
                    timeout_val = float(timeout_val)
            
            if timeout_val:
                # Calculate compliance
                docs_for_compliance = collection.find(task_query, {'result.duration_seconds': 1})
                for doc in docs_for_compliance:
                    total_docs += 1
                    duration = doc.get('result', {}).get('duration_seconds')
                    if duration is not None:
                        total_duration += duration
                        compliance = duration / timeout_val
                        if compliance <= 0.5:
                            counts["0-50%"] += 1
                        elif compliance <= 1.0:
                            counts["50-100%"] += 1
                        elif compliance <= 1.5:
                            counts["100-150%"] += 1
                        else:
                            counts[">150%"] += 1
                
                if total_docs > 0:
                    # Format duration as hours:minutes:seconds
                    hours, remainder = divmod(int(total_duration), 3600)
                    minutes, seconds = divmod(remainder, 60)
                    duration_formatted = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
                    
                    timeout_data.append({
                        'task_label': f"{task_name}\n({col_name} / {int(timeout_val)}s)",
                        'total': total_docs,
                        'total_duration': total_duration,
                        'duration_formatted': duration_formatted,
                        **counts
                    })
    
    return timeout_data

def analyze_task_based_detection_rates(db, collections, target_tasks):
    """Analyze detection success rates for each task, showing percentage of domains with secure indicators."""
    task_detection_rates = []
    
    for collection_name in collections:
        print(f"🔍 Analyzing task detection rates for {collection_name}...")
        
        # FIXED: Use the same data loading logic as the main comparison
        df, domain_groups = load_and_process_collection_data(db, collection_name, target_tasks)
        
        if not domain_groups:
            continue
            
        # Get unique task names from the loaded data
        if not df.empty and 'task_name' in df.columns:
            task_names = df['task_name'].unique()
        else:
            continue
        
        for task_name in task_names:
            # Filter domain groups for this specific task
            task_domain_groups = defaultdict(list)
            
            for domain, domain_data in domain_groups.items():
                # Filter URLs for this task
                task_urls = [url_data for url_data in domain_data if url_data.get('task_name') == task_name]
                if task_urls:
                    task_domain_groups[domain] = task_urls
            
            total_domains = len(task_domain_groups)
            if total_domains == 0:
                continue
            
            # FIXED: Use the same standardized counting logic as the main comparison
            indicator_counts = count_indicators_for_collection(task_domain_groups, collection_name)
            domains_with_indicators_count = indicator_counts['total_secure']
            
            # Calculate success rate
            success_rate = (domains_with_indicators_count / total_domains) * 100
            
            print(f"   • Task: {task_name}")
            print(f"     - Total domains: {total_domains}")
            print(f"     - Domains with secure indicators: {domains_with_indicators_count}")
            print(f"     - Success rate: {success_rate:.1f}%")
            
            task_detection_rates.append({
                'collection': collection_name,
                'task_name': task_name,
                'total_domains': total_domains,
                'domains_with_indicators': domains_with_indicators_count,
                'success_rate': success_rate
            })
    
    return task_detection_rates

def save_figure_as_png(fig, filename, dpi=300):
    """Save a matplotlib figure as a PNG file with high resolution"""
    fig.savefig(filename, bbox_inches='tight', dpi=dpi)
    print(f"Saved: {filename}")

def create_comprehensive_visualization(comparison_data, timeout_data, task_detection_rates):
    """Create the complete visualization with 4 charts including weighted scores"""
    # Create the main figure with all charts
    fig = plt.figure(figsize=(24, 18))
    
    # Chart 1: Domain-based Comparison with Enhanced Stacked Bar for Secure Indicators + Weighted Scores
    ax1 = plt.subplot(2, 2, 1)
    collections = list(comparison_data.keys())
    
    # Extract data for the visualization
    secure_html_counts = [comparison_data[col]['secure_html_only'] for col in collections]
    secure_specific_counts = [comparison_data[col]['secure_specific_only'] for col in collections]
    secure_both_counts = [comparison_data[col]['secure_both'] for col in collections]
    possible_counts = [comparison_data[col]['possible_passkey'] for col in collections]
    fedcm_counts = [comparison_data[col]['fedcm'] for col in collections]
    weighted_totals = [comparison_data[col]['weighted_total'] for col in collections]
    
    x = np.arange(len(collections))
    width = 0.2
    
    # Stacked bar for secure indicators with three segments
    bars1_html = ax1.bar(x - width, secure_html_counts, width, 
                         label='HTML Elements Only (1pt)', 
                         color='#3498db', alpha=0.7, edgecolor='white', linewidth=1)
    
    bars1_specific = ax1.bar(x - width, secure_specific_counts, width, 
                            bottom=secure_html_counts,
                            label='Specific Indicators Only (1pt)',
                            color='#2ecc71', alpha=0.7, edgecolor='white', linewidth=1)
    
    # Calculate bottom position for the third segment (both)
    bottom_both = [h + s for h, s in zip(secure_html_counts, secure_specific_counts)]
    
    bars1_both = ax1.bar(x - width, secure_both_counts, width,
                        bottom=bottom_both,
                        label='Both (HTML + Specific) (2pts)',
                        color='#9b59b6', alpha=0.7, edgecolor='white', linewidth=1)
    
    bars2 = ax1.bar(x, possible_counts, width, 
                   label='Possible Indicators',
                   color='orange', alpha=0.7, edgecolor='white', linewidth=1)
    
    bars3 = ax1.bar(x + width, fedcm_counts, width, 
                   label='FedCM Indicators',
                   color='blue', alpha=0.7, edgecolor='white', linewidth=1)
    
    ax1.set_xlabel('Scraper Type')
    ax1.set_ylabel('Number of Domains')
    ax1.set_title('Standardized Domain-Based Comparison: Secure vs Possible Indicators')
    ax1.set_xticks(x)
    ax1.set_xticklabels([col.replace('_', ' ').title() for col in collections])
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Add value labels and weighted scores
    for i, collection in enumerate(collections):
        # HTML only segment
        if secure_html_counts[i] > 0:
            height = secure_html_counts[i] / 2
            ax1.text(i - width, height, str(secure_html_counts[i]), 
                    ha='center', va='center', fontweight='bold', 
                    color='white' if secure_html_counts[i] > 2 else 'black')
        
        # Specific indicators segment
        if secure_specific_counts[i] > 0:
            height = secure_html_counts[i] + secure_specific_counts[i] / 2
            ax1.text(i - width, height, str(secure_specific_counts[i]), 
                    ha='center', va='center', fontweight='bold', 
                    color='white' if secure_specific_counts[i] > 2 else 'black')
        
        # Both indicators segment
        if secure_both_counts[i] > 0:
            height = secure_html_counts[i] + secure_specific_counts[i] + secure_both_counts[i] / 2
            ax1.text(i - width, height, str(secure_both_counts[i]), 
                    ha='center', va='center', fontweight='bold', 
                    color='white' if secure_both_counts[i] > 2 else 'black')
        
        # Total count with weighted score
        total_secure = secure_html_counts[i] + secure_specific_counts[i] + secure_both_counts[i]
        if total_secure > 0:
            ax1.text(i - width, total_secure + 0.1, f'{total_secure}\n({weighted_totals[i]}pts)', 
                    ha='center', va='bottom', fontweight='bold', fontsize=9)
        
        # Other indicators
        if possible_counts[i] > 0:
            ax1.text(i, possible_counts[i] + 0.1, str(possible_counts[i]), 
                    ha='center', va='bottom', fontweight='bold')
        
        if fedcm_counts[i] > 0:
            ax1.text(i + width, fedcm_counts[i] + 0.1, str(fedcm_counts[i]), 
                    ha='center', va='bottom', fontweight='bold')
    
    # Chart 2: Total Domains vs URLs
    ax2 = plt.subplot(2, 2, 2)
    total_domains = [comparison_data[col]['total_domains'] for col in collections]
    total_urls = [comparison_data[col]['total_urls'] for col in collections]
    
    x2 = np.arange(len(collections))
    width2 = 0.35
    
    ax2.bar(x2 - width2/2, total_domains, width2, label='Unique Domains', color='skyblue', alpha=0.7)
    ax2.bar(x2 + width2/2, total_urls, width2, label='Total URLs', color='lightcoral', alpha=0.7)
    
    ax2.set_xlabel('Scraper Type')
    ax2.set_ylabel('Count')
    ax2.set_title('Domains vs URLs per Scraper')
    ax2.set_xticks(x2)
    ax2.set_xticklabels([col.replace('_', ' ').title() for col in collections])
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    for i, (domains, urls) in enumerate(zip(total_domains, total_urls)):
        ax2.text(i - width2/2, domains + 1, str(domains), ha='center', va='bottom', fontweight='bold')
        ax2.text(i + width2/2, urls + 1, str(urls), ha='center', va='bottom', fontweight='bold')
    
    # Chart 3: Task Detection Success Rate
    ax3 = plt.subplot(2, 2, 3)
    
    if not task_detection_rates or len(task_detection_rates) == 0:
        ax3.text(0.5, 0.5, 'No task data available', ha='center', va='center', transform=ax3.transAxes)
        ax3.set_title('Task-Based Detection Success Rate')
    else:
        # Sort by success rate for better visualization
        task_detection_rates.sort(key=lambda x: x['success_rate'], reverse=True)
        
        # Limit to top 15 tasks for readability
        show_tasks = task_detection_rates[:15]
        
        tasks = [f"{item['task_name']} ({item['collection']})" for item in show_tasks]
        success_rates = [item['success_rate'] for item in show_tasks]
        colors = []
        
        # Assign colors based on collection
        for item in show_tasks:
            if item['collection'] == 'network_scraper':
                colors.append('#3498db')
            elif item['collection'] == 'api_scraper':
                colors.append('#2ecc71')
            else:  # term_scraper
                colors.append('#9b59b6')
        
        # Create horizontal bar chart
        y_pos = np.arange(len(tasks))
        bars = ax3.barh(y_pos, success_rates, align='center', color=colors, alpha=0.7)
        
        ax3.set_yticks(y_pos)
        ax3.set_yticklabels(tasks)
        ax3.set_xlabel('Success Rate (%)')
        ax3.set_title('Task-Based Detection Success Rate (% of Domains with Secure Indicators)')
        ax3.set_xlim(0, 100)
        ax3.grid(axis='x', alpha=0.3)
        
        # Add value labels
        for i, bar in enumerate(bars):
            width = bar.get_width()
            indicator_count = show_tasks[i]['domains_with_indicators']
            total_count = show_tasks[i]['total_domains']
            ax3.text(width + 1, bar.get_y() + bar.get_height()/2, 
                   f'{width:.1f}% ({indicator_count}/{total_count})', 
                   ha='left', va='center', fontweight='bold')
    
    # Chart 4: Timeout Compliance
    ax4 = plt.subplot(2, 2, 4)
    
    if timeout_data:
        timeout_df = pd.DataFrame(timeout_data)
        
        enhanced_labels = [f"{label}\nTotal: {row['duration_formatted']} ({int(row['total_duration'])}s)" 
                          for label, row in zip(timeout_df['task_label'], timeout_df.to_dict('records'))]
        
        tasks = enhanced_labels
        categories = ['0-50%', '50-100%', '100-150%', '>150%']
        colors_timeout = ['lightgreen', 'green', 'yellow', 'darkred']
        
        # Main chart
        bottom = np.zeros(len(tasks))
        for i, category in enumerate(categories):
            values = timeout_df[category].tolist()
            ax4.bar(range(len(tasks)), values, bottom=bottom, label=category, 
                   color=colors_timeout[i], alpha=0.7)
            bottom += values
        
        ax4.set_xlabel('Task (Scraper / Timeout / Total Duration)')
        ax4.set_ylabel('Number of URLs')
        ax4.set_title('Timeout Compliance Distribution with Total Task Duration')
        ax4.set_xticks(range(len(tasks)))
        ax4.set_xticklabels(tasks, rotation=45, ha='right')
        ax4.legend(title='Timeout Usage')
    else:
        ax4.text(0.5, 0.5, 'No timeout data available', ha='center', va='center', transform=ax4.transAxes)
        ax4.set_title('Timeout Analysis')
    
    # Display the combined figure
    plt.tight_layout()
    plt.show()
    
    return fig

def calculate_average_duration(df):
    """Calculate average duration per URL from the dataframe"""
    if df.empty:
        return 0.0
    
    durations = df['duration_seconds'].dropna()
    if len(durations) == 0:
        return 0.0
    
    return durations.mean()

def create_summary_table(comparison_data):
    """Create a summary table of results including weighted scores"""
    table_html = '''<h3>📊 Summary of Standardized Domain-based Results</h3>
    <table style="border-collapse: collapse; width: 100%; margin: 20px 0;">
        <tr style="background-color: #f2f2f2;">
            <th style="border: 1px solid #ddd; padding: 12px; text-align: left;">Scraper</th>
            <th style="border: 1px solid #ddd; padding: 12px; text-align: center;">Domains</th>
            <th style="border: 1px solid #ddd; padding: 12px; text-align: center;">URLs</th>
            <th style="border: 1px solid #ddd; padding: 12px; text-align: center;">Secure Indicators</th>
            <th style="border: 1px solid #ddd; padding: 12px; text-align: center;">Weighted Score</th>
            <th style="border: 1px solid #ddd; padding: 12px; text-align: center;">Possible</th>
            <th style="border: 1px solid #ddd; padding: 12px; text-align: center;">FedCM</th>
            <th style="border: 1px solid #ddd; padding: 12px; text-align: center;">Success Rate</th>
        </tr>'''
    
    for scraper, data in comparison_data.items():
        # FIXED: Use consistent variable name 'secure_passkey' which contains the total_secure count
        success_rate = (data['secure_passkey'] / data['total_domains'] * 100) if data['total_domains'] > 0 else 0
        avg_duration = data.get('avg_duration_per_url', 0)
        table_html += f'''
        <tr>
            <td style="border: 1px solid #ddd; padding: 12px; font-weight: bold;">{scraper.replace('_', ' ').title()}</td>
            <td style="border: 1px solid #ddd; padding: 12px; text-align: center;">{data['total_domains']}</td>
            <td style="border: 1px solid #ddd; padding: 12px; text-align: center;">{data['total_urls']}</td>
            <td style="border: 1px solid #ddd; padding: 12px; text-align: center; color: green; font-weight: bold;">{data['secure_passkey']}</td>
            <td style="border: 1px solid #ddd; padding: 12px; text-align: center; color: purple; font-weight: bold;">{data['weighted_total']}</td>
            <td style="border: 1px solid #ddd; padding: 12px; text-align: center; color: orange; font-weight: bold;">{data['possible_passkey']}</td>
            <td style="border: 1px solid #ddd; padding: 12px; text-align: center; color: blue; font-weight: bold;">{data['fedcm']}</td>
            <td style="border: 1px solid #ddd; padding: 12px; text-align: center; font-weight: bold;">{success_rate:.1f}%</td>
        </tr>'''
    
    table_html += '</table>'
    table_html += '''
    <p><strong>Weighted Scoring:</strong></p>
    <ul>
        <li>HTML Elements Only OR Specific Indicators Only: 1 point</li>
        <li>Both HTML Elements AND Specific Indicators: 2 points</li>
        <li>No Secure Indicators: 0 points</li>
    </ul>
    <p><strong>Note:</strong> Success Rate and Task-Based Detection Rate should now be identical when only one task per scraper type exists.</p>
    '''
    return table_html

# ====================== MAIN EXECUTION ======================

try:
    # Connect to MongoDB
    client = pymongo.MongoClient(f'mongodb://{MONGO_USER}:{MONGO_PASS}@{MONGO_HOST}:{MONGO_PORT}/')
    db = client[MONGO_DB_NAME]
    
    print("🔍 Loading and processing data from all scrapers...")
    print("🎯 Using STANDARDIZED evaluation logic across all scrapers:")
    print("   • Secure HTML: webauthn_input_found OR passkey_button_found")  
    print("   • Secure Specific: API calls with publicKey / Terms with publicKey / Secure network requests")
    print("   • Domain Classification: HTML only (1pt) | Specific only (1pt) | Both (2pts) | None (0pts)")
    
    comparison_data = {}
    all_domain_groups = {}  # Store for task analysis
    
    for collection_name in COLLECTIONS_TO_ANALYZE:
        print(f"\n📊 Processing {collection_name} with standardized logic...")
        
        # Load data with domain-based processing
        df, domain_groups = load_and_process_collection_data(
            db, collection_name, TARGET_TASK_NAMES
        )
        
        # Store for task analysis to ensure consistency
        all_domain_groups[collection_name] = (df, domain_groups)
        
        if domain_groups:
            # Count indicators per domain using STANDARDIZED logic
            indicator_counts = count_indicators_for_collection(domain_groups, collection_name)
            
            # Calculate average duration per URL
            avg_duration = calculate_average_duration(df)
            
            comparison_data[collection_name] = {
                'total_domains': len(domain_groups),
                'total_urls': len(df),
                'avg_duration_per_url': avg_duration,
                'secure_html_only': indicator_counts['secure_html_only'],
                'secure_specific_only': indicator_counts['secure_specific_only'],
                'secure_both': indicator_counts['secure_both'],
                'secure_passkey': indicator_counts['total_secure'],
                'possible_passkey': indicator_counts['possible'],
                'fedcm': indicator_counts['fedcm'],
                'weighted_total': indicator_counts['weighted_total']
            }
            
            print(f"   • Domains: {len(domain_groups)}, URLs: {len(df)}")
            print(f"   • HTML only: {indicator_counts['secure_html_only']} (1pt each)")
            print(f"   • Specific only: {indicator_counts['secure_specific_only']} (1pt each)") 
            print(f"   • Both: {indicator_counts['secure_both']} (2pts each)")
            print(f"   • Total secure: {indicator_counts['total_secure']}, Weighted: {indicator_counts['weighted_total']} pts")
            print(f"   • Possible: {indicator_counts['possible']}, FedCM: {indicator_counts['fedcm']}")
        else:
            comparison_data[collection_name] = {
                'total_domains': 0, 'total_urls': 0, 'avg_duration_per_url': 0.0,
                'secure_html_only': 0, 'secure_specific_only': 0, 'secure_both': 0,
                'secure_passkey': 0, 'possible_passkey': 0, 'fedcm': 0, 'weighted_total': 0
            }
            print(f"   • No data found")
    
    # Get timeout compliance data
    print("\n⏱️ Analyzing timeout compliance...")
    timeout_data = get_timeout_compliance_data(db, COLLECTIONS_TO_ANALYZE, TARGET_TASK_NAMES)
    
    # FIXED: Analyze task-based detection rates using the same loaded data
    print("\n📊 Analyzing task-based detection success rates...")
    task_detection_rates = []
    
    for collection_name, (df, domain_groups) in all_domain_groups.items():
        if domain_groups and not df.empty and 'task_name' in df.columns:
            print(f"🔍 Analyzing task detection rates for {collection_name}...")
            
            task_names = df['task_name'].unique()
            
            for task_name in task_names:
                # Filter domain groups for this specific task
                task_domain_groups = defaultdict(list)
                
                for domain, domain_data in domain_groups.items():
                    # Filter URLs for this task
                    task_urls = [url_data for url_data in domain_data if url_data.get('task_name') == task_name]
                    if task_urls:
                        task_domain_groups[domain] = task_urls
                
                total_domains = len(task_domain_groups)
                if total_domains == 0:
                    continue
                
                # Use the same standardized counting logic
                indicator_counts = count_indicators_for_collection(task_domain_groups, collection_name)
                domains_with_indicators_count = indicator_counts['total_secure']
                
                # Calculate success rate
                success_rate = (domains_with_indicators_count / total_domains) * 100
                
                print(f"   • Task: {task_name}")
                print(f"     - Total domains: {total_domains}")
                print(f"     - Domains with secure indicators: {domains_with_indicators_count}")
                print(f"     - Success rate: {success_rate:.1f}%")
                
                # Verify consistency with main analysis
                if len(task_names) == 1:  # Only one task per scraper
                    main_total = comparison_data[collection_name]['total_domains']
                    main_secure = comparison_data[collection_name]['secure_passkey']
                    main_rate = (main_secure / main_total * 100) if main_total > 0 else 0
                    
                    if abs(success_rate - main_rate) > 0.1:
                        print(f"   ⚠️  WARNING: Inconsistency detected!")
                        print(f"     - Main analysis: {main_secure}/{main_total} = {main_rate:.1f}%")
                        print(f"     - Task analysis: {domains_with_indicators_count}/{total_domains} = {success_rate:.1f}%")
                
                task_detection_rates.append({
                    'collection': collection_name,
                    'task_name': task_name,
                    'total_domains': total_domains,
                    'domains_with_indicators': domains_with_indicators_count,
                    'success_rate': success_rate
                })
    
    # Create visualizations for display
    print("\n📈 Creating standardized visualizations...")
    main_figure = create_comprehensive_visualization(comparison_data, timeout_data, task_detection_rates)
    
    # Display summary table
    print("\n📋 Creating summary table...")
    summary_html = create_summary_table(comparison_data)
    display(HTML(summary_html))
    
    print("\n✅ Standardized analysis complete! All scrapers now use consistent evaluation:")
    print("   🎯 Domain-based aggregation with standardized classification")
    print("   📊 Weighted scoring system: 1pt single indicator, 2pts both indicators") 
    print("   📈 Consistent visualization across all notebooks")
    print("   🔽 Run the next cell to save all visualizations to PNG files.")

except Exception as e:
    print(f"❌ Error connecting to database or processing data: {e}")
    print("Please check if MongoDB is running and accessible.")
    raise

In [None]:
# Save Combined Visualization - Grafiken und HTML Export
import os
from datetime import datetime
import matplotlib
matplotlib.rcParams['font.family'] = 'DejaVu Sans'  # Fix font warning

# Erstelle Output-Ordner
output_dir = r"scraper_comparison_results"
os.makedirs(output_dir, exist_ok=True)

# Timestamp für eindeutige Dateinamen
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

try:
    # 1. Speichere die Hauptvisualisierung aus der ersten Zelle
    if 'main_figure' in locals() and main_figure:
        print("💾 Saving combined visualization...")
        
        # PNG für hohe Qualität
        png_path = os.path.join(output_dir, f"scraper_comparison_complete_{timestamp}.png")
        main_figure.savefig(png_path, dpi=300, bbox_inches='tight', facecolor='white', edgecolor='none')
        
        # PDF für Vektorgrafik
        pdf_path = os.path.join(output_dir, f"scraper_comparison_complete_{timestamp}.pdf")
        main_figure.savefig(pdf_path, bbox_inches='tight', facecolor='white', edgecolor='none')
        
        print(f"   ✅ Hauptvisualisierung gespeichert: {os.path.basename(png_path)}")
    else:
        print("⚠️ Keine Hauptvisualisierung zum Speichern gefunden. Führe zuerst die erste Zelle aus.")

    # 2. Speichere HTML-Zusammenfassung
    if 'summary_html' in locals() and summary_html:
        html_path = os.path.join(output_dir, f"scraper_comparison_summary_{timestamp}.html")
        with open(html_path, 'w', encoding='utf-8') as f:
            f.write(f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>Scraper Comparison Results - {timestamp}</title>
</head>
<body>
{summary_html}
</body>
</html>""")
        print(f"📋 HTML-Zusammenfassung gespeichert: {os.path.basename(html_path)}")

    # 3. Zusätzlich: JSON-Export der Vergleichsdaten
    if 'comparison_data' in locals() and comparison_data:
        import json
        
        json_path = os.path.join(output_dir, f"scraper_comparison_data_{timestamp}.json")
        with open(json_path, 'w', encoding='utf-8') as f:
            json.dump(comparison_data, f, indent=2, ensure_ascii=False)
        
        print(f"📊 Vergleichsdaten gespeichert: {os.path.basename(json_path)}")

    # 4. Task Detection Rates als CSV
    if 'task_detection_rates' in locals() and task_detection_rates:
        import pandas as pd
        
        csv_path = os.path.join(output_dir, f"task_detection_rates_{timestamp}.csv")
        pd.DataFrame(task_detection_rates).to_csv(csv_path, index=False, encoding='utf-8')
        print(f"📈 Task Detection Rates gespeichert: {os.path.basename(csv_path)}")

    # 5. Timeout Data als CSV
    if 'timeout_data' in locals() and timeout_data:
        import pandas as pd
        
        timeout_csv_path = os.path.join(output_dir, f"timeout_compliance_{timestamp}.csv")
        pd.DataFrame(timeout_data).to_csv(timeout_csv_path, index=False, encoding='utf-8')
        print(f"⏱️ Timeout Compliance gespeichert: {os.path.basename(timeout_csv_path)}")

    # 6. Statistik-Zusammenfassung als TXT
    if 'comparison_data' in locals() and comparison_data:
        stats_path = os.path.join(output_dir, f"scraper_comparison_statistics_{timestamp}.txt")
        
        with open(stats_path, 'w', encoding='utf-8') as f:
            f.write(f"Scraper Comparison Results Summary - {timestamp}\n")
            f.write("="*60 + "\n\n")
            
            for scraper, data in comparison_data.items():
                f.write(f"{scraper.replace('_', ' ').title()}:\n")
                f.write(f"  Total Domains: {data['total_domains']}\n")
                f.write(f"  Total URLs: {data['total_urls']}\n")
                f.write(f"  Average Duration/URL: {data['avg_duration_per_url']:.1f}s\n")
                f.write(f"  Secure Indicators: {data['secure_passkey']}\n")
                f.write(f"  Possible Indicators: {data['possible_passkey']}\n")
                f.write(f"  FedCM Indicators: {data['fedcm']}\n")
                success_rate = (data['secure_passkey'] / data['total_domains'] * 100) if data['total_domains'] > 0 else 0
                f.write(f"  Success Rate: {success_rate:.1f}%\n\n")
        
        print(f"📊 Statistik-Zusammenfassung gespeichert: {os.path.basename(stats_path)}")

    print(f"\n✅ Alle Dateien gespeichert in: {output_dir}")
    print("📁 Gespeicherte Dateien:")
    for file in sorted(os.listdir(output_dir)):
        if timestamp in file:
            file_path = os.path.join(output_dir, file)
            file_size = os.path.getsize(file_path) / 1024  # KB
            print(f"   📄 {file} ({file_size:.1f} KB)")

except Exception as e:
    print(f"❌ Fehler beim Speichern: {e}")
    print("💡 Stelle sicher, dass die erste Zelle erfolgreich ausgeführt wurde.")