In [1]:
# Phase 4: Umfassende Erweiterte Analysen - Komplette MTR Anycast Studie
# ===============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Erweiterte Bibliotheken für komplexe Analysen
from scipy import stats
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
from collections import defaultdict, Counter
import networkx as nx
import re

plt.style.use('default')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (15, 10)

print("=== PHASE 4: UMFASSENDE ERWEITERTE ANALYSEN ===")
print("Netzwerk-Topologie, Anomalie-Deep-Dive, Predictive Analytics & Qualitätsanalysen")
print("="*85)

# ================================================================
# 1. NETZWERK-TOPOLOGIE & INFRASTRUKTUR-ANALYSE
# ================================================================

def analyze_network_topology(df, protocol_name):
    """Detaillierte Netzwerk-Topologie und Infrastruktur-Analyse"""
    print(f"\n1. NETZWERK-TOPOLOGIE & INFRASTRUKTUR - {protocol_name}")
    print("-" * 60)
    
    # Service-Klassifikation
    SERVICE_MAPPING = {
        # IPv4
        '1.1.1.1': {'name': 'Cloudflare DNS', 'type': 'anycast', 'provider': 'Cloudflare'},
        '8.8.8.8': {'name': 'Google DNS', 'type': 'anycast', 'provider': 'Google'}, 
        '9.9.9.9': {'name': 'Quad9 DNS', 'type': 'anycast', 'provider': 'Quad9'},
        '104.16.123.96': {'name': 'Cloudflare CDN', 'type': 'anycast', 'provider': 'Cloudflare'},
        '2.16.241.219': {'name': 'Akamai CDN', 'type': 'pseudo-anycast', 'provider': 'Akamai'},
        '193.99.144.85': {'name': 'Heise', 'type': 'unicast', 'provider': 'Heise'},
        '169.229.128.134': {'name': 'Berkeley NTP', 'type': 'unicast', 'provider': 'UC Berkeley'},
        
        # IPv6
        '2606:4700:4700::1111': {'name': 'Cloudflare DNS', 'type': 'anycast', 'provider': 'Cloudflare'},
        '2001:4860:4860::8888': {'name': 'Google DNS', 'type': 'anycast', 'provider': 'Google'},
        '2620:fe::fe:9': {'name': 'Quad9 DNS', 'type': 'anycast', 'provider': 'Quad9'}, 
        '2606:4700::6810:7b60': {'name': 'Cloudflare CDN', 'type': 'anycast', 'provider': 'Cloudflare'},
        '2a02:26f0:3500:1b::1724:a393': {'name': 'Akamai CDN', 'type': 'pseudo-anycast', 'provider': 'Akamai'},
        '2a02:2e0:3fe:1001:7777:772e:2:85': {'name': 'Heise', 'type': 'unicast', 'provider': 'Heise'},
        '2607:f140:ffff:8000:0:8006:0:a': {'name': 'Berkeley NTP', 'type': 'unicast', 'provider': 'UC Berkeley'}
    }
    
    df['service_info'] = df['dst'].map(SERVICE_MAPPING)
    df['service_name'] = df['service_info'].apply(lambda x: x['name'] if x else 'Unknown')
    df['service_type'] = df['service_info'].apply(lambda x: x['type'] if x else 'Unknown')
    df['provider'] = df['service_info'].apply(lambda x: x['provider'] if x else 'Unknown')
    
    # Netzwerk-Pfad-Extraktion
    network_paths = []
    asn_analysis = defaultdict(lambda: defaultdict(set))
    tier_classification = defaultdict(set)
    
    print(f"\n🌐 HOP-BY-HOP NETZWERK-ANALYSE:")
    
    for _, row in df.iterrows():
        try:
            if row['hubs'] is not None and len(row['hubs']) > 0:
                path_info = {
                    'service': row['service_name'],
                    'service_type': row['service_type'],
                    'provider': row['provider'],
                    'region': row['region'],
                    'hops': [],
                    'asns': [],
                    'latencies': [],
                    'geographic_indicators': []
                }
                
                for i, hop in enumerate(row['hubs']):
                    if hop:
                        hop_info = {
                            'hop_number': i + 1,
                            'hostname': hop.get('host', '???'),
                            'asn': hop.get('ASN', 'AS???'),
                            'latency': hop.get('Avg', 0),
                            'loss': hop.get('Loss%', 0)
                        }
                        
                        path_info['hops'].append(hop_info)
                        
                        # ASN-Analyse
                        if hop.get('ASN') and hop.get('ASN') != 'AS???':
                            asn = hop.get('ASN')
                            path_info['asns'].append(asn)
                            asn_analysis[row['service_name']][row['region']].add(asn)
                            
                            # Tier-Klassifikation basierend auf bekannten ASNs
                            if asn in ['AS174', 'AS3356', 'AS1299', 'AS3257', 'AS6453', 'AS5511']:
                                tier_classification['Tier-1'].add(asn)
                            elif asn in ['AS16509', 'AS13335', 'AS15169']:  # AWS, Cloudflare, Google
                                tier_classification['Hyperscaler'].add(asn)
                        
                        # Geografische Indikatoren aus Hostnames
                        hostname = hop.get('host', '')
                        if hostname and hostname != '???':
                            geo_indicators = extract_geographic_indicators(hostname)
                            path_info['geographic_indicators'].extend(geo_indicators)
                        
                        if hop.get('Avg', 0) > 0:
                            path_info['latencies'].append(hop.get('Avg', 0))
                
                network_paths.append(path_info)
        except:
            continue
    
    print(f"Netzwerk-Pfade analysiert: {len(network_paths):,}")
    
    # ASN-Diversität-Analyse
    print(f"\n📊 ASN-DIVERSITÄT PRO SERVICE:")
    
    for service in asn_analysis.keys():
        total_asns = set()
        for region_asns in asn_analysis[service].values():
            total_asns.update(region_asns)
        
        avg_asns_per_region = np.mean([len(asns) for asns in asn_analysis[service].values()])
        print(f"  {service}:")
        print(f"    Gesamte ASNs: {len(total_asns)}")
        print(f"    Durchschn. ASNs/Region: {avg_asns_per_region:.1f}")
        
        # ASN-Überlappung zwischen Regionen
        all_region_asns = list(asn_analysis[service].values())
        if len(all_region_asns) > 1:
            intersection = set.intersection(*all_region_asns)
            print(f"    Gemeinsame ASNs: {len(intersection)} ({len(intersection)/len(total_asns)*100:.1f}%)")
    
    # Tier-Analyse
    print(f"\n🏢 TRANSIT-PROVIDER-TIER-ANALYSE:")
    for tier, asns in tier_classification.items():
        print(f"  {tier}: {len(asns)} ASNs ({', '.join(sorted(asns)[:5])}...)")
    
    # Hop-Count-Analyse
    print(f"\n🔢 HOP-COUNT-ANALYSE:")
    
    hop_analysis = defaultdict(list)
    for path in network_paths:
        hop_analysis[path['service_type']].append(len(path['hops']))
    
    for service_type, hop_counts in hop_analysis.items():
        if hop_counts:
            print(f"  {service_type.upper()}:")
            print(f"    Durchschn. Hops: {np.mean(hop_counts):.1f}")
            print(f"    Min-Max Hops: {min(hop_counts)}-{max(hop_counts)}")
            print(f"    Std.Dev.: {np.std(hop_counts):.1f}")
    
    return network_paths, asn_analysis

def extract_geographic_indicators(hostname):
    """Extrahiert geografische Indikatoren aus Hostnames"""
    geo_patterns = {
        'cities': ['nyc', 'lax', 'dfw', 'ord', 'iad', 'lhr', 'fra', 'nrt', 'sin', 'syd'],
        'countries': ['us', 'de', 'uk', 'jp', 'au', 'ca', 'fr', 'nl', 'se', 'br'],
        'regions': ['east', 'west', 'north', 'south', 'central', 'europe', 'asia', 'america']
    }
    
    indicators = []
    hostname_lower = hostname.lower()
    
    for category, patterns in geo_patterns.items():
        for pattern in patterns:
            if pattern in hostname_lower:
                indicators.append(f"{category}:{pattern}")
    
    return indicators

# ================================================================
# 2. ANOMALIE-DEEP-DIVE UND KLASSIFIKATION
# ================================================================

def comprehensive_anomaly_analysis(df, protocol_name):
    """Umfassende Anomalie-Analyse und Klassifikation"""
    print(f"\n2. ANOMALIE-DEEP-DIVE UND KLASSIFIKATION - {protocol_name}")
    print("-" * 60)
    
    # Service-Klassifikation (gleich wie oben)
    SERVICE_MAPPING = {
        # [Same mapping as above]
        '1.1.1.1': {'name': 'Cloudflare DNS', 'type': 'anycast', 'provider': 'Cloudflare'},
        '8.8.8.8': {'name': 'Google DNS', 'type': 'anycast', 'provider': 'Google'}, 
        '9.9.9.9': {'name': 'Quad9 DNS', 'type': 'anycast', 'provider': 'Quad9'},
        '104.16.123.96': {'name': 'Cloudflare CDN', 'type': 'anycast', 'provider': 'Cloudflare'},
        '2.16.241.219': {'name': 'Akamai CDN', 'type': 'pseudo-anycast', 'provider': 'Akamai'},
        '193.99.144.85': {'name': 'Heise', 'type': 'unicast', 'provider': 'Heise'},
        '169.229.128.134': {'name': 'Berkeley NTP', 'type': 'unicast', 'provider': 'UC Berkeley'},
        '2606:4700:4700::1111': {'name': 'Cloudflare DNS', 'type': 'anycast', 'provider': 'Cloudflare'},
        '2001:4860:4860::8888': {'name': 'Google DNS', 'type': 'anycast', 'provider': 'Google'},
        '2620:fe::fe:9': {'name': 'Quad9 DNS', 'type': 'anycast', 'provider': 'Quad9'}, 
        '2606:4700::6810:7b60': {'name': 'Cloudflare CDN', 'type': 'anycast', 'provider': 'Cloudflare'},
        '2a02:26f0:3500:1b::1724:a393': {'name': 'Akamai CDN', 'type': 'pseudo-anycast', 'provider': 'Akamai'},
        '2a02:2e0:3fe:1001:7777:772e:2:85': {'name': 'Heise', 'type': 'unicast', 'provider': 'Heise'},
        '2607:f140:ffff:8000:0:8006:0:a': {'name': 'Berkeley NTP', 'type': 'unicast', 'provider': 'UC Berkeley'}
    }
    
    df['service_info'] = df['dst'].map(SERVICE_MAPPING)
    df['service_name'] = df['service_info'].apply(lambda x: x['name'] if x else 'Unknown')
    df['service_type'] = df['service_info'].apply(lambda x: x['type'] if x else 'Unknown')
    df['provider'] = df['service_info'].apply(lambda x: x['provider'] if x else 'Unknown')
    df['utctime'] = pd.to_datetime(df['utctime'])
    
    # Performance-Metriken extrahieren
    performance_data = []
    
    for _, row in df.iterrows():
        try:
            if row['hubs'] is not None and len(row['hubs']) > 0:
                final_latency = None
                final_loss = None
                final_jitter = None
                routing_changes = 0
                intermediate_failures = 0
                
                # Erweiterte Metriken
                for i, hop in enumerate(row['hubs']):
                    if hop and hop.get('Loss%', 0) == 100:
                        intermediate_failures += 1
                    
                    if hop and hop.get('Avg') and hop.get('Avg') > 0:
                        final_latency = hop.get('Avg', 0)
                        final_loss = hop.get('Loss%', 0)
                        final_jitter = hop.get('Javg', 0)
                
                if final_latency is not None:
                    performance_data.append({
                        'timestamp': row['utctime'],
                        'service_name': row['service_name'],
                        'service_type': row['service_type'],
                        'provider': row['provider'],
                        'region': row['region'],
                        'latency': final_latency,
                        'packet_loss': final_loss,
                        'jitter': final_jitter if final_jitter else 0,
                        'intermediate_failures': intermediate_failures,
                        'total_hops': len(row['hubs'])
                    })
        except:
            continue
    
    perf_df = pd.DataFrame(performance_data)
    
    if len(perf_df) == 0:
        print("Keine Performance-Daten für Anomalie-Analyse")
        return None
    
    print(f"Performance-Daten für Anomalie-Analyse: {len(perf_df):,}")
    
    # Erweiterte Anomalie-Klassifikation
    anomalies = []
    
    print(f"\n🚨 ERWEITERTE ANOMALIE-KLASSIFIKATION:")
    
    for service in perf_df['service_name'].unique():
        service_data = perf_df[perf_df['service_name'] == service].copy()
        
        if len(service_data) < 100:
            continue
        
        # Multiple Anomalie-Typen
        
        # 1. Latenz-Anomalien (IQR-Methode)
        Q1 = service_data['latency'].quantile(0.25)
        Q3 = service_data['latency'].quantile(0.75)
        IQR = Q3 - Q1
        upper_bound = Q3 + 3 * IQR
        
        latency_anomalies = service_data[service_data['latency'] > upper_bound]
        
        # 2. Jitter-Anomalien
        jitter_threshold = service_data['jitter'].quantile(0.95)
        jitter_anomalies = service_data[service_data['jitter'] > jitter_threshold]
        
        # 3. Packet-Loss-Anomalien
        loss_anomalies = service_data[service_data['packet_loss'] > 50]
        
        # 4. Routing-Anomalien (ungewöhnliche Hop-Counts)
        hop_median = service_data['total_hops'].median()
        routing_anomalies = service_data[abs(service_data['total_hops'] - hop_median) > 5]
        
        # Anomalien sammeln
        for _, row in latency_anomalies.iterrows():
            anomalies.append({
                'service': service,
                'service_type': row['service_type'],
                'provider': row['provider'],
                'region': row['region'],
                'timestamp': row['timestamp'],
                'type': 'latency_spike',
                'value': row['latency'],
                'threshold': upper_bound,
                'severity': 'high' if row['latency'] > upper_bound * 2 else 'medium'
            })
        
        for _, row in jitter_anomalies.iterrows():
            anomalies.append({
                'service': service,
                'service_type': row['service_type'],
                'provider': row['provider'],
                'region': row['region'],
                'timestamp': row['timestamp'],
                'type': 'jitter_spike',
                'value': row['jitter'],
                'threshold': jitter_threshold,
                'severity': 'medium'
            })
        
        for _, row in loss_anomalies.iterrows():
            anomalies.append({
                'service': service,
                'service_type': row['service_type'],
                'provider': row['provider'],
                'region': row['region'],
                'timestamp': row['timestamp'],
                'type': 'packet_loss',
                'value': row['packet_loss'],
                'threshold': 50,
                'severity': 'high' if row['packet_loss'] > 80 else 'medium'
            })
        
        for _, row in routing_anomalies.iterrows():
            anomalies.append({
                'service': service,
                'service_type': row['service_type'],
                'provider': row['provider'],
                'region': row['region'],
                'timestamp': row['timestamp'],
                'type': 'routing_change',
                'value': row['total_hops'],
                'threshold': hop_median,
                'severity': 'low'
            })
    
    if anomalies:
        anomalies_df = pd.DataFrame(anomalies)
        
        print(f"Gesamte Anomalien: {len(anomalies):,}")
        
        # Anomalie-Typ-Verteilung
        print(f"\nAnomalie-Typen:")
        type_counts = anomalies_df['type'].value_counts()
        for anomaly_type, count in type_counts.items():
            print(f"  {anomaly_type}: {count:,} ({count/len(anomalies)*100:.1f}%)")
        
        # Geografische Anomalie-Hotspots
        print(f"\n🌍 GEOGRAFISCHE ANOMALIE-HOTSPOTS:")
        region_anomalies = anomalies_df.groupby('region').size().sort_values(ascending=False)
        for region, count in region_anomalies.head(10).items():
            total_measurements = len(perf_df[perf_df['region'] == region])
            rate = (count / total_measurements) * 100 if total_measurements > 0 else 0
            print(f"  {region}: {count:,} Anomalien ({rate:.2f}% Rate)")
        
        # Service-Typ Anomalie-Verhalten
        print(f"\n📊 ANOMALIE-VERHALTEN PRO SERVICE-TYP:")
        
        type_analysis = anomalies_df.groupby(['service_type', 'type']).size().unstack(fill_value=0)
        print(type_analysis)
        
        # Temporale Anomalie-Cluster
        print(f"\n⏰ TEMPORALE ANOMALIE-CLUSTER:")
        anomalies_df['hour'] = anomalies_df['timestamp'].dt.hour
        anomalies_df['day_of_week'] = anomalies_df['timestamp'].dt.day_name()
        
        hourly_anomalies = anomalies_df.groupby('hour').size()
        peak_hours = hourly_anomalies.nlargest(3)
        print(f"Peak Anomalie-Stunden: {dict(peak_hours)}")
        
        # Provider-spezifische Anomalie-Raten
        print(f"\n🏢 PROVIDER-ANOMALIE-PROFILE:")
        
        for provider in perf_df['provider'].unique():
            provider_data = perf_df[perf_df['provider'] == provider]
            provider_anomalies = anomalies_df[anomalies_df['provider'] == provider]
            
            if len(provider_data) > 0:
                anomaly_rate = len(provider_anomalies) / len(provider_data) * 100
                print(f"  {provider}: {anomaly_rate:.2f}% Anomalie-Rate")
                
                if len(provider_anomalies) > 0:
                    severity_dist = provider_anomalies['severity'].value_counts()
                    print(f"    Schweregrade: {dict(severity_dist)}")
        
        return anomalies_df, perf_df
    
    else:
        print("Keine signifikanten Anomalien entdeckt")
        return None, perf_df

# ================================================================
# 3. PROVIDER-INFRASTRUKTUR-MAPPING
# ================================================================

def map_provider_infrastructure(network_paths, anomalies_df, protocol_name):
    """Detailliertes Provider-Infrastruktur-Mapping"""
    print(f"\n3. PROVIDER-INFRASTRUKTUR-MAPPING - {protocol_name}")
    print("-" * 55)
    
    if not network_paths:
        print("Keine Netzwerk-Pfad-Daten verfügbar")
        return
    
    print(f"\n🏗️ PROVIDER-EDGE-INFRASTRUKTUR-ANALYSE:")
    
    # Provider-Edge-Density-Mapping
    provider_edges = defaultdict(lambda: defaultdict(set))
    geographic_coverage = defaultdict(lambda: defaultdict(set))
    
    for path in network_paths:
        if path['service_type'] == 'anycast':
            provider = path['provider']
            region = path['region']
            
            # Finale Hops als Edge-Server identifizieren
            if path['hops']:
                final_hop = path['hops'][-1]
                if final_hop['hostname'] != '???':
                    provider_edges[provider][region].add(final_hop['hostname'])
            
            # Geografische Indikatoren sammeln
            for geo_indicator in path['geographic_indicators']:
                geographic_coverage[provider][region].add(geo_indicator)
    
    # Edge-Density-Analyse
    print(f"\n📍 EDGE-SERVER-DENSITY:")
    
    for provider in sorted(provider_edges.keys()):
        print(f"\n  {provider}:")
        total_edges = sum(len(edges) for edges in provider_edges[provider].values())
        regions_covered = len(provider_edges[provider])
        
        print(f"    Gesamte Edge-Server: {total_edges}")
        print(f"    Regionen abgedeckt: {regions_covered}")
        print(f"    Durchschn. Edges/Region: {total_edges/regions_covered:.1f}")
        
        # Top-Regionen für diesen Provider
        region_edge_counts = {region: len(edges) for region, edges in provider_edges[provider].items()}
        top_regions = sorted(region_edge_counts.items(), key=lambda x: x[1], reverse=True)[:3]
        
        print(f"    Top-Regionen: {dict(top_regions)}")
        
        # Geografische Abdeckung
        all_geo = set()
        for geo_set in geographic_coverage[provider].values():
            all_geo.update(geo_set)
        print(f"    Geografische Indikatoren: {len(all_geo)}")
    
    # Provider-Vergleichsmatrix
    print(f"\n📊 PROVIDER-INFRASTRUKTUR-VERGLEICH:")
    
    comparison_matrix = []
    
    for provider in sorted(provider_edges.keys()):
        total_edges = sum(len(edges) for edges in provider_edges[provider].values())
        regions = len(provider_edges[provider])
        avg_edges = total_edges / regions if regions > 0 else 0
        geo_coverage = len(set().union(*geographic_coverage[provider].values()))
        
        # Anomalie-Rate für diesen Provider
        anomaly_rate = 0
        if anomalies_df is not None and len(anomalies_df) > 0:
            provider_anomalies = len(anomalies_df[anomalies_df['provider'] == provider])
            # Approximiere Gesamtmessungen
            provider_measurements = len([p for p in network_paths if p['provider'] == provider])
            anomaly_rate = (provider_anomalies / provider_measurements * 100) if provider_measurements > 0 else 0
        
        comparison_matrix.append({
            'Provider': provider,
            'Total_Edges': total_edges,
            'Regions': regions,
            'Avg_Edges_Per_Region': avg_edges,
            'Geo_Coverage': geo_coverage,
            'Anomaly_Rate_%': anomaly_rate
        })
    
    comparison_df = pd.DataFrame(comparison_matrix)
    print(comparison_df.round(2))
    
    # Infrastruktur-Effizienz-Score
    print(f"\n⭐ INFRASTRUKTUR-EFFIZIENZ-RANKING:")
    
    for _, row in comparison_df.iterrows():
        # Score basierend auf Edges, Abdeckung und niedrige Anomalien
        edge_score = min(row['Total_Edges'] / 10, 10)  # Max 10 Punkte
        coverage_score = min(row['Geo_Coverage'] / 2, 10)  # Max 10 Punkte
        stability_score = max(0, 10 - row['Anomaly_Rate_%'] / 2)  # Max 10 Punkte
        
        total_score = (edge_score + coverage_score + stability_score) / 3
        
        print(f"  {row['Provider']}: {total_score:.1f}/10")
        print(f"    Edge-Density: {edge_score:.1f}/10")
        print(f"    Geo-Coverage: {coverage_score:.1f}/10")
        print(f"    Stabilität: {stability_score:.1f}/10")
    
    return comparison_df

# ================================================================
# 4. STATISTISCHE & PRÄDIKTIVE ANALYSEN
# ================================================================

def statistical_predictive_analysis(perf_df, protocol_name):
    """Statistische Korrelationen und prädiktive Modellierung"""
    print(f"\n4. STATISTISCHE & PRÄDIKTIVE ANALYSEN - {protocol_name}")
    print("-" * 55)
    
    if perf_df is None or len(perf_df) == 0:
        print("Keine Performance-Daten verfügbar")
        return
    
    # Numerische Korrelationsanalyse
    print(f"\n📈 KORRELATIONSANALYSE:")
    
    numeric_cols = ['latency', 'packet_loss', 'jitter', 'intermediate_failures', 'total_hops']
    available_cols = [col for col in numeric_cols if col in perf_df.columns]
    
    if len(available_cols) >= 2:
        correlation_matrix = perf_df[available_cols].corr()
        
        print(f"Korrelations-Matrix:")
        print(correlation_matrix.round(3))
        
        # Signifikante Korrelationen identifizieren
        print(f"\n🔍 SIGNIFIKANTE KORRELATIONEN (|r| > 0.3):")
        
        for i in range(len(correlation_matrix.columns)):
            for j in range(i+1, len(correlation_matrix.columns)):
                corr = correlation_matrix.iloc[i, j]
                if abs(corr) > 0.3:
                    var1 = correlation_matrix.columns[i]
                    var2 = correlation_matrix.columns[j]
                    print(f"  {var1} ↔ {var2}: {corr:.3f}")
    
    # Time-Series-Clustering
    print(f"\n🕰️ TIME-SERIES-CLUSTERING:")
    
    anycast_data = perf_df[perf_df['service_type'] == 'anycast'].copy()
    
    if len(anycast_data) > 0:
        # Erstelle Provider-Zeit-Features
        anycast_data['hour'] = anycast_data['timestamp'].dt.hour
        anycast_data['day_of_week'] = anycast_data['timestamp'].dt.dayofweek
        anycast_data['minute_of_day'] = anycast_data['hour'] * 60 + anycast_data['timestamp'].dt.minute
        
        # Performance-Profile pro Provider
        provider_profiles = []
        
        for provider in anycast_data['provider'].unique():
            provider_data = anycast_data[anycast_data['provider'] == provider]
            
            if len(provider_data) > 100:
                # Temporale Features
                hourly_avg = provider_data.groupby('hour')['latency'].mean()
                daily_avg = provider_data.groupby('day_of_week')['latency'].mean()
                
                profile = {
                    'provider': provider,
                    'avg_latency': provider_data['latency'].mean(),
                    'latency_std': provider_data['latency'].std(),
                    'avg_loss': provider_data['packet_loss'].mean(),
                    'avg_jitter': provider_data['jitter'].mean(),
                    'peak_hour_variance': hourly_avg.max() - hourly_avg.min(),
                    'weekend_weekday_diff': daily_avg[[5, 6]].mean() - daily_avg[[0, 1, 2, 3, 4]].mean()
                }
                
                provider_profiles.append(profile)
        
        if provider_profiles:
            profiles_df = pd.DataFrame(provider_profiles)
            
            # K-Means Clustering der Provider-Profile
            feature_cols = [col for col in profiles_df.columns if col != 'provider']
            scaler = StandardScaler()
            scaled_features = scaler.fit_transform(profiles_df[feature_cols])
            
            # Optimale Cluster-Anzahl bestimmen
            n_clusters = min(3, len(profiles_df))
            kmeans = KMeans(n_clusters=n_clusters, random_state=42)
            clusters = kmeans.fit_predict(scaled_features)
            
            profiles_df['cluster'] = clusters
            
            print(f"Provider-Performance-Cluster:")
            for cluster_id in range(n_clusters):
                cluster_providers = profiles_df[profiles_df['cluster'] == cluster_id]['provider'].tolist()
                print(f"  Cluster {cluster_id}: {cluster_providers}")
                
                cluster_data = profiles_df[profiles_df['cluster'] == cluster_id]
                print(f"    Durchschn. Latenz: {cluster_data['avg_latency'].mean():.2f}ms")
                print(f"    Durchschn. Stabilität: {cluster_data['latency_std'].mean():.2f}ms")
    
    # Predictive Modeling
    print(f"\n🔮 PREDICTIVE MODELING:")
    
    try:
        # Einfaches Latenz-Vorhersage-Modell
        modeling_data = perf_df.copy()
        
        # Sichere Zeitstempel-Verarbeitung
        if 'hour' not in modeling_data.columns:
            modeling_data['timestamp'] = pd.to_datetime(modeling_data['timestamp'])
            modeling_data['hour'] = modeling_data['timestamp'].dt.hour
            modeling_data['day_of_week_num'] = modeling_data['timestamp'].dt.dayofweek
        else:
            # Falls hour schon existiert, erstelle day_of_week_num
            if 'day_of_week_num' not in modeling_data.columns:
                modeling_data['timestamp'] = pd.to_datetime(modeling_data['timestamp'])
                modeling_data['day_of_week_num'] = modeling_data['timestamp'].dt.dayofweek
        
        # Features für Modellierung
        feature_columns = ['hour', 'day_of_week_num', 'total_hops']
        
        # Kategorische Variablen zu numerisch
        service_type_mapping = {'anycast': 0, 'pseudo-anycast': 1, 'unicast': 2}
        modeling_data['service_type_numeric'] = modeling_data['service_type'].map(service_type_mapping)
        feature_columns.append('service_type_numeric')
        
        # Entferne Zeilen mit fehlenden Werten
        modeling_data = modeling_data.dropna(subset=feature_columns + ['latency'])
        
        if len(modeling_data) > 1000:
            try:
                from sklearn.ensemble import RandomForestRegressor
                from sklearn.model_selection import train_test_split
                from sklearn.metrics import mean_squared_error, r2_score
                
                X = modeling_data[feature_columns]
                y = modeling_data['latency']
                
                # Train-Test Split
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
                
                # Random Forest Modell
                rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
                rf_model.fit(X_train, y_train)
                
                # Vorhersagen
                y_pred = rf_model.predict(X_test)
                
                # Modell-Evaluation
                mse = mean_squared_error(y_test, y_pred)
                r2 = r2_score(y_test, y_pred)
                
                print(f"  Latenz-Vorhersage-Modell:")
                print(f"    R² Score: {r2:.3f}")
                print(f"    RMSE: {np.sqrt(mse):.3f}ms")
                
                # Feature-Wichtigkeit
                feature_importance = rf_model.feature_importances_
                print(f"    Feature-Wichtigkeit:")
                for feat, imp in zip(feature_columns, feature_importance):
                    print(f"      {feat}: {imp:.3f}")
                
                # Performance-Kategorien vorhersagen
                print(f"\n  Performance-Kategorie-Vorhersage:")
                
                # Kategorisiere Latenz
                latency_categories = pd.cut(modeling_data['latency'], 
                                          bins=[0, 5, 20, 100, float('inf')], 
                                          labels=['Excellent', 'Good', 'Fair', 'Poor'])
                
                modeling_data['latency_category'] = latency_categories
                
                # Häufigkeitsverteilung
                category_dist = modeling_data['latency_category'].value_counts()
                print(f"    Latenz-Kategorien:")
                for category, count in category_dist.items():
                    print(f"      {category}: {count:,} ({count/len(modeling_data)*100:.1f}%)")
                
            except ImportError:
                print(f"  ⚠️ Scikit-learn nicht verfügbar - überspringe Machine Learning")
            except Exception as e:
                print(f"  ⚠️ Predictive Modeling fehlgeschlagen: {e}")
        else:
            print(f"  ⚠️ Nicht genügend Daten für Predictive Modeling ({len(modeling_data)} < 1000)")
    
    except Exception as e:
        print(f"  ⚠️ Predictive Modeling komplett fehlgeschlagen: {e}")
        print(f"  Verfügbare Spalten: {list(perf_df.columns) if perf_df is not None else 'Keine'}")

# ================================================================
# 5. QUALITÄTS- UND SLA-ANALYSEN
# ================================================================

def quality_sla_analysis(perf_df, protocol_name):
    """Service-Quality und SLA-Compliance-Analyse"""
    print(f"\n5. QUALITÄTS- UND SLA-ANALYSEN - {protocol_name}")
    print("-" * 45)
    
    if perf_df is None or len(perf_df) == 0:
        print("Keine Performance-Daten verfügbar")
        return
    
    # Standard SLA-Schwellwerte definieren
    SLA_THRESHOLDS = {
        'latency_excellent': 5,      # < 5ms
        'latency_good': 20,          # < 20ms
        'latency_acceptable': 100,    # < 100ms
        'packet_loss_max': 1,        # < 1%
        'availability_min': 99.9     # > 99.9%
    }
    
    print(f"\n📊 SLA-COMPLIANCE-ANALYSE:")
    print(f"SLA-Schwellwerte:")
    for threshold, value in SLA_THRESHOLDS.items():
        print(f"  {threshold}: {value}")
    
    # Service-Level-Analyse
    sla_results = []
    
    for service_type in perf_df['service_type'].unique():
        type_data = perf_df[perf_df['service_type'] == service_type]
        
        if len(type_data) == 0:
            continue
        
        # Latenz-SLA-Compliance
        excellent_latency = (type_data['latency'] < SLA_THRESHOLDS['latency_excellent']).mean() * 100
        good_latency = (type_data['latency'] < SLA_THRESHOLDS['latency_good']).mean() * 100
        acceptable_latency = (type_data['latency'] < SLA_THRESHOLDS['latency_acceptable']).mean() * 100
        
        # Packet-Loss-SLA
        low_loss = (type_data['packet_loss'] < SLA_THRESHOLDS['packet_loss_max']).mean() * 100
        
        # Availability (basierend auf erfolgreichen Messungen)
        availability = ((type_data['packet_loss'] < 100).mean() * 100)
        
        sla_result = {
            'Service_Type': service_type,
            'Excellent_Latency_%': excellent_latency,
            'Good_Latency_%': good_latency,
            'Acceptable_Latency_%': acceptable_latency,
            'Low_PacketLoss_%': low_loss,
            'Availability_%': availability
        }
        
        sla_results.append(sla_result)
    
    sla_df = pd.DataFrame(sla_results)
    print(f"\nSLA-Compliance-Übersicht:")
    print(sla_df.round(1))
    
    # Provider-spezifische SLA-Analyse
    print(f"\n🏢 PROVIDER-SLA-SCORECARD:")
    
    anycast_data = perf_df[perf_df['service_type'] == 'anycast']
    
    for provider in anycast_data['provider'].unique():
        provider_data = anycast_data[anycast_data['provider'] == provider]
        
        if len(provider_data) > 0:
            # SLA-Metriken berechnen
            excellent_rate = (provider_data['latency'] < 5).mean() * 100
            availability = (provider_data['packet_loss'] < 100).mean() * 100
            reliability = (provider_data['packet_loss'] < 1).mean() * 100
            
            # Worst-Case-Szenarien
            p95_latency = provider_data['latency'].quantile(0.95)
            p99_latency = provider_data['latency'].quantile(0.99)
            max_latency = provider_data['latency'].max()
            
            print(f"\n  {provider}:")
            print(f"    Excellent Performance: {excellent_rate:.1f}%")
            print(f"    Availability: {availability:.1f}%")
            print(f"    Reliability: {reliability:.1f}%")
            print(f"    95th Percentile Latenz: {p95_latency:.1f}ms")
            print(f"    99th Percentile Latenz: {p99_latency:.1f}ms")
            print(f"    Worst-Case Latenz: {max_latency:.1f}ms")
            
            # Overall SLA-Score
            sla_score = (excellent_rate * 0.4 + availability * 0.3 + reliability * 0.3)
            print(f"    📋 Overall SLA-Score: {sla_score:.1f}/100")
    
    # Performance-Degradation-Analyse
    print(f"\n📉 PERFORMANCE-DEGRADATION-ANALYSE:")
    
    # Sichere Zeitstempel-Verarbeitung
    try:
        if 'hour' not in perf_df.columns:
            perf_df['timestamp'] = pd.to_datetime(perf_df['timestamp'])
            perf_df['hour'] = perf_df['timestamp'].dt.hour
        
        if 'day_of_week' not in perf_df.columns:
            perf_df['day_of_week'] = perf_df['timestamp'].dt.day_name()
    except Exception as e:
        print(f"    ⚠️ Zeitstempel-Verarbeitung fehlgeschlagen: {e}")
        print(f"    Überspringe Performance-Degradation-Analyse")
        return sla_df
    
    # Peak vs. Off-Peak für Anycast
    if len(anycast_data) > 0:
        try:
            # Business Hours (9-17 UTC) vs. Off-Hours
            anycast_with_time = anycast_data.copy()
            if 'hour' not in anycast_with_time.columns:
                anycast_with_time['timestamp'] = pd.to_datetime(anycast_with_time['timestamp'])
                anycast_with_time['hour'] = anycast_with_time['timestamp'].dt.hour
                anycast_with_time['day_of_week'] = anycast_with_time['timestamp'].dt.day_name()
            
            business_hours = anycast_with_time[anycast_with_time['hour'].between(9, 17)]
            off_hours = anycast_with_time[~anycast_with_time['hour'].between(9, 17)]
            
            if len(business_hours) > 0 and len(off_hours) > 0:
                business_latency = business_hours['latency'].mean()
                off_hours_latency = off_hours['latency'].mean()
                degradation = ((business_latency - off_hours_latency) / off_hours_latency) * 100
                
                print(f"  Business Hours vs. Off-Hours:")
                print(f"    Business Hours Latenz: {business_latency:.1f}ms")
                print(f"    Off-Hours Latenz: {off_hours_latency:.1f}ms")
                print(f"    Performance-Degradation: {degradation:.1f}%")
            
            # Wochenende vs. Werktage
            weekend_data = anycast_with_time[anycast_with_time['day_of_week'].isin(['Saturday', 'Sunday'])]
            weekday_data = anycast_with_time[~anycast_with_time['day_of_week'].isin(['Saturday', 'Sunday'])]
            
            if len(weekend_data) > 0 and len(weekday_data) > 0:
                weekend_latency = weekend_data['latency'].mean()
                weekday_latency = weekday_data['latency'].mean()
                weekend_effect = ((weekday_latency - weekend_latency) / weekend_latency) * 100
                
                print(f"  Wochenende vs. Werktage:")
                print(f"    Wochenende Latenz: {weekend_latency:.1f}ms")
                print(f"    Werktage Latenz: {weekday_latency:.1f}ms")
                print(f"    Wochenend-Effekt: {weekend_effect:.1f}%")
        except Exception as e:
            print(f"    ⚠️ Performance-Degradation-Analyse fehlgeschlagen: {e}")
            print(f"    Verfügbare Spalten: {list(anycast_data.columns)}")
    else:
        print(f"    Keine Anycast-Daten verfügbar")
    
    return sla_df

# ================================================================
# 6. AKAMAI-PROBLEM DEEP-DIVE
# ================================================================

def akamai_deep_dive_analysis(network_paths, perf_df, protocol_name):
    """Detaillierte Analyse des Akamai Pseudo-Anycast Problems"""
    print(f"\n6. AKAMAI-PROBLEM DEEP-DIVE - {protocol_name}")
    print("-" * 45)
    
    if not network_paths or perf_df is None:
        print("Keine Daten für Akamai-Analyse verfügbar")
        return
    
    print(f"\n🔍 AKAMAI vs. ECHTE ANYCAST ARCHITEKTUR-VERGLEICH:")
    
    # Akamai-Pfade vs. echte Anycast-Pfade
    akamai_paths = [p for p in network_paths if p['provider'] == 'Akamai']
    cloudflare_paths = [p for p in network_paths if p['provider'] == 'Cloudflare']
    google_paths = [p for p in network_paths if p['provider'] == 'Google']
    
    print(f"\nRouting-Diversität-Vergleich:")
    print(f"  Akamai Pfade: {len(akamai_paths)}")
    print(f"  Cloudflare Pfade: {len(cloudflare_paths)}")
    print(f"  Google Pfade: {len(google_paths)}")
    
    # ASN-Diversitäts-Analyse
    def analyze_asn_diversity(paths, provider_name):
        if not paths:
            return
        
        all_asns = set()
        region_asns = defaultdict(set)
        final_destinations = defaultdict(set)
        
        for path in paths:
            region = path['region']
            for asn in path['asns']:
                all_asns.add(asn)
                region_asns[region].add(asn)
            
            # Finale Destination
            if path['hops']:
                final_hop = path['hops'][-1]['hostname']
                final_destinations[region].add(final_hop)
        
        print(f"\n  {provider_name}:")
        print(f"    Gesamte ASNs: {len(all_asns)}")
        print(f"    Durchschn. ASNs/Region: {np.mean([len(asns) for asns in region_asns.values()]):.1f}")
        
        # Finale Destination-Diversität
        total_destinations = sum(len(dests) for dests in final_destinations.values())
        print(f"    Finale Destinations: {total_destinations}")
        print(f"    Durchschn. Destinations/Region: {total_destinations/len(final_destinations):.1f}")
        
        # Anycast-Beweis: Verschiedene ASNs aber gleiche finale IPs
        unique_final_ips = set()
        for path in paths:
            if path['hops']:
                # Extrahiere IP aus Hostname falls möglich
                hostname = path['hops'][-1]['hostname']
                if '(' in hostname and ')' in hostname:
                    ip = hostname.split('(')[-1].split(')')[0]
                    unique_final_ips.add(ip)
        
        print(f"    Eindeutige finale IPs: {len(unique_final_ips)}")
        
        return {
            'total_asns': len(all_asns),
            'avg_asns_per_region': np.mean([len(asns) for asns in region_asns.values()]),
            'total_destinations': total_destinations,
            'unique_final_ips': len(unique_final_ips)
        }
    
    # Vergleiche Provider
    akamai_stats = analyze_asn_diversity(akamai_paths, "Akamai")
    cloudflare_stats = analyze_asn_diversity(cloudflare_paths, "Cloudflare")
    google_stats = analyze_asn_diversity(google_paths, "Google")
    
    # Performance-Vergleich
    print(f"\n📊 PERFORMANCE-ARCHITEKTUR-KORRELATION:")
    
    akamai_perf = perf_df[perf_df['provider'] == 'Akamai']
    cloudflare_perf = perf_df[perf_df['provider'] == 'Cloudflare']
    google_perf = perf_df[perf_df['provider'] == 'Google']
    
    performance_comparison = []
    
    for provider, data in [('Akamai', akamai_perf), ('Cloudflare', cloudflare_perf), ('Google', google_perf)]:
        if len(data) > 0:
            perf_metrics = {
                'Provider': provider,
                'Avg_Latency': data['latency'].mean(),
                'Latency_Std': data['latency'].std(),
                'P95_Latency': data['latency'].quantile(0.95),
                'Avg_PacketLoss': data['packet_loss'].mean(),
                'Availability': (data['packet_loss'] < 100).mean() * 100
            }
            performance_comparison.append(perf_metrics)
    
    perf_comparison_df = pd.DataFrame(performance_comparison)
    print(perf_comparison_df.round(2))
    
    # Akamai-Problem-Diagnose
    print(f"\n🚨 AKAMAI-PROBLEM-DIAGNOSE:")
    
    if akamai_stats and cloudflare_stats:
        print(f"\n  Routing-Diversität-Defizit:")
        asn_deficit = (cloudflare_stats['total_asns'] - akamai_stats['total_asns']) / cloudflare_stats['total_asns'] * 100
        print(f"    ASN-Diversität-Defizit: {asn_deficit:.1f}% weniger als Cloudflare")
        
        if akamai_stats['unique_final_ips'] == 1:
            print(f"    🔴 PROBLEM: Akamai routet zu nur 1 finaler IP (echtes Unicast)")
        else:
            print(f"    🟡 WARNUNG: Akamai hat {akamai_stats['unique_final_ips']} finale IPs")
    
    # Regionale Akamai-Ineffizienz
    print(f"\n🌍 REGIONALE AKAMAI-INEFFIZIENZ:")
    
    if len(akamai_perf) > 0:
        regional_akamai = akamai_perf.groupby('region')['latency'].agg(['mean', 'std']).round(1)
        regional_akamai = regional_akamai.sort_values('mean', ascending=False)
        
        print(f"Schlechteste Akamai-Regionen:")
        for region, stats in regional_akamai.head(5).iterrows():
            print(f"  {region}: {stats['mean']:.1f}ms (±{stats['std']:.1f}ms)")
    
    # Akamai vs. geografische Referenz
    print(f"\n📍 AKAMAI vs. GEOGRAFISCHE REFERENZ:")
    
    unicast_perf = perf_df[perf_df['service_type'] == 'unicast']
    
    if len(akamai_perf) > 0 and len(unicast_perf) > 0:
        akamai_avg = akamai_perf['latency'].mean()
        unicast_avg = unicast_perf['latency'].mean()
        
        performance_ratio = akamai_avg / unicast_avg
        
        print(f"  Akamai Durchschn. Latenz: {akamai_avg:.1f}ms")
        print(f"  Unicast Durchschn. Latenz: {unicast_avg:.1f}ms")
        print(f"  Performance-Ratio: {performance_ratio:.2f}")
        
        if performance_ratio > 0.8:
            print(f"  🔴 BESTÄTIGT: Akamai verhält sich wie Unicast ({performance_ratio:.2f}x)")
        else:
            print(f"  🟡 TEILWEISE: Akamai besser als Unicast, aber schlechter als echtes Anycast")

# ================================================================
# 7. HAUPTANALYSE-FUNKTION - ALLE ERWEITERTEN ANALYSEN
# ================================================================

def run_comprehensive_analysis():
    """Führt alle erweiterten Analysen durch"""
    
    # WICHTIG: Passen Sie diese Pfade an Ihre Parquet-Files an!
    IPv4_FILE = "../data/IPv4.parquet"  # Bitte anpassen
    IPv6_FILE = "../data/IPv6.parquet"  # Bitte anpassen
    
    # Alternativen für häufige Dateipfade:
    # IPv4_FILE = "ipv4_measurements.parquet"
    # IPv4_FILE = "data/ipv4_data.parquet" 
    # IPv4_FILE = "/full/path/to/ipv4_data.parquet"
    
    print("🔄 LADE DATEN FÜR UMFASSENDE ANALYSE...")
    print(f"Versuche IPv4-Datei zu laden: {IPv4_FILE}")
    print(f"Versuche IPv6-Datei zu laden: {IPv6_FILE}")
    
    try:
        df_ipv4 = pd.read_parquet(IPv4_FILE)
        print(f"✅ IPv4: {df_ipv4.shape[0]:,} Messungen erfolgreich geladen")
    except FileNotFoundError:
        print(f"❌ IPv4-Datei nicht gefunden: {IPv4_FILE}")
        print("💡 LÖSUNG: Passen Sie IPv4_FILE in der Funktion run_comprehensive_analysis() an")
        return
    except Exception as e:
        print(f"❌ Fehler beim Laden der IPv4-Daten: {e}")
        return
    
    try:
        df_ipv6 = pd.read_parquet(IPv6_FILE)
        print(f"✅ IPv6: {df_ipv6.shape[0]:,} Messungen erfolgreich geladen")
    except FileNotFoundError:
        print(f"❌ IPv6-Datei nicht gefunden: {IPv6_FILE}")
        print("💡 LÖSUNG: Passen Sie IPv6_FILE in der Funktion run_comprehensive_analysis() an")
        return
    except Exception as e:
        print(f"❌ Fehler beim Laden der IPv6-Daten: {e}")
        return
    
    print(f"🚀 BEIDE DATEIEN ERFOLGREICH GELADEN - STARTE UMFASSENDE ANALYSE...")
    
    try:
        # Führe alle Analysen für beide Protokolle durch
        for protocol, df in [("IPv4", df_ipv4), ("IPv6", df_ipv6)]:
            print(f"\n{'='*85}")
            print(f"UMFASSENDE ANALYSE FÜR {protocol}")
            print(f"{'='*85}")
            
            try:
                # 1. Netzwerk-Topologie
                print(f"🌐 Starte Netzwerk-Topologie-Analyse...")
                network_paths, asn_analysis = analyze_network_topology(df, protocol)
                
                # 2. Anomalie-Deep-Dive
                print(f"🚨 Starte Anomalie-Deep-Dive...")
                anomalies_df, perf_df = comprehensive_anomaly_analysis(df, protocol)
                
                # 3. Provider-Infrastruktur-Mapping
                print(f"🏗️ Starte Provider-Infrastruktur-Mapping...")
                infrastructure_comparison = map_provider_infrastructure(network_paths, anomalies_df, protocol)
                
                # 4. Statistische & Prädiktive Analysen
                print(f"📊 Starte Statistische & Prädiktive Analysen...")
                statistical_predictive_analysis(perf_df, protocol)
                
                # 5. Qualitäts- und SLA-Analysen
                print(f"📋 Starte Qualitäts- und SLA-Analysen...")
                sla_results = quality_sla_analysis(perf_df, protocol)
                
                # 6. Akamai-Problem Deep-Dive
                print(f"🔍 Starte Akamai-Problem Deep-Dive...")
                akamai_deep_dive_analysis(network_paths, perf_df, protocol)
                
            except Exception as e:
                print(f"⚠️ Fehler in {protocol}-Analyse: {e}")
                print(f"Setze mit nächstem Protokoll fort...")
                continue
        
        print(f"\n{'='*85}")
        print("🎯 ALLE ERWEITERTEN ANALYSEN ABGESCHLOSSEN!")
        print("🏆 VOLLSTÄNDIGE ANYCAST-FORSCHUNGSSTUDIE ERSTELLT!")
        print("="*85)
        
        print(f"\n📋 KOMPLETTE ANALYSE-ÜBERSICHT:")
        analysis_components = [
            "✅ Phase 1: Datenverständnis & Überblick",
            "✅ Phase 2: Geografische Routing-Analyse", 
            "✅ Phase 3: Performance-Trends & Zeitanalyse",
            "✅ Phase 4a: Netzwerk-Topologie & Infrastruktur",
            "✅ Phase 4b: Anomalie-Deep-Dive & Klassifikation",
            "✅ Phase 4c: Provider-Infrastruktur-Mapping",
            "✅ Phase 4d: Statistische & Prädiktive Analysen",
            "✅ Phase 4e: Qualitäts- & SLA-Analysen",
            "✅ Phase 4f: Akamai-Problem Deep-Dive"
        ]
        
        for component in analysis_components:
            print(component)
        
        print(f"\n🚀 BEREIT FÜR:")
        print("  • Wissenschaftliche Publikation")
        print("  • Konferenz-Präsentation")
        print("  • Industry-Report")
        print("  • PhD-Dissertation-Kapitel")
        
    except Exception as e:
        print(f"❌ Unerwarteter Fehler in der Hauptanalyse: {e}")
        import traceback
        traceback.print_exc()

# ANWEISUNGEN ZUM AUSFÜHREN:
print("="*85)
print("📋 ANWEISUNGEN FÜR PHASE 4:")
print("="*85)
print("1. Passen Sie die Dateipfade IPv4_FILE und IPv6_FILE an (Zeile ~970-971)")
print("2. Führen Sie run_comprehensive_analysis() aus")
print("3. Die Analyse dauert mehrere Minuten - seien Sie geduldig!")
print("4. Alle Ergebnisse werden in der Konsole ausgegeben")
print("="*85)

# Führe die umfassende Analyse aus
if __name__ == "__main__":
    run_comprehensive_analysis()

=== PHASE 4: UMFASSENDE ERWEITERTE ANALYSEN ===
Netzwerk-Topologie, Anomalie-Deep-Dive, Predictive Analytics & Qualitätsanalysen
📋 ANWEISUNGEN FÜR PHASE 4:
1. Passen Sie die Dateipfade IPv4_FILE und IPv6_FILE an (Zeile ~970-971)
2. Führen Sie run_comprehensive_analysis() aus
3. Die Analyse dauert mehrere Minuten - seien Sie geduldig!
4. Alle Ergebnisse werden in der Konsole ausgegeben
🔄 LADE DATEN FÜR UMFASSENDE ANALYSE...
Versuche IPv4-Datei zu laden: ../data/IPv4.parquet
Versuche IPv6-Datei zu laden: ../data/IPv6.parquet
✅ IPv4: 160,923 Messungen erfolgreich geladen
✅ IPv6: 160,923 Messungen erfolgreich geladen
🚀 BEIDE DATEIEN ERFOLGREICH GELADEN - STARTE UMFASSENDE ANALYSE...

UMFASSENDE ANALYSE FÜR IPv4
🌐 Starte Netzwerk-Topologie-Analyse...

1. NETZWERK-TOPOLOGIE & INFRASTRUKTUR - IPv4
------------------------------------------------------------

🌐 HOP-BY-HOP NETZWERK-ANALYSE:
Netzwerk-Pfade analysiert: 160,923

📊 ASN-DIVERSITÄT PRO SERVICE:
  Heise:
    Gesamte ASNs: 6
    Durchs