# MITM Proxy Log Visualization Dashboard

This notebook provides comprehensive analysis and visualization of MITM proxy logs. It includes traffic pattern analysis, security monitoring, and interactive dashboards for understanding network behavior.

## Features:
- üìä HTTP Traffic Pattern Analysis
- üîí Security-focused Visualizations
- ‚è±Ô∏è Timing and Performance Analysis  
- üåê Host and Domain Intelligence
- üìà Interactive Dashboards
- üîç Anomaly Detection
- üìã Export and Reporting

---

## 1. Import Required Libraries

In [None]:
# Core data manipulation and analysis
import pandas as pd
import numpy as np
import json
import re
from datetime import datetime, timedelta
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Visualization libraries
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff

# Interactive widgets
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# URL and domain analysis
from urllib.parse import urlparse
import tldextract

# Statistical analysis
from scipy import stats
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

# Set style preferences
plt.style.use('default')
sns.set_palette("husl")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("‚úÖ All libraries imported successfully!")
print(f"üìÖ Analysis started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

## 2. Load and Parse MITM Log Files

In [None]:
def load_mitm_logs(log_path="../data_extraction/logs/mitm_logs/"):
    """
    Load and parse MITM proxy logs from JSONL files
    """
    log_dir = Path(log_path)
    all_logs = []
    
    print(f"üîç Searching for log files in: {log_dir}")
    
    # Find all JSONL files
    jsonl_files = list(log_dir.glob("*.jsonl"))
    
    if not jsonl_files:
        print("‚ùå No JSONL files found!")
        return pd.DataFrame()
    
    print(f"üìÅ Found {len(jsonl_files)} log files:")
    
    for file_path in jsonl_files:
        print(f"  üìÑ Loading: {file_path.name}")
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                for line_num, line in enumerate(f, 1):
                    if line.strip():
                        try:
                            log_entry = json.loads(line.strip())
                            log_entry['source_file'] = file_path.name
                            all_logs.append(log_entry)
                        except json.JSONDecodeError as e:
                            print(f"    ‚ö†Ô∏è  Line {line_num}: JSON decode error - {e}")
                            
        except Exception as e:
            print(f"    ‚ùå Error reading {file_path.name}: {e}")
    
    print(f"\n‚úÖ Loaded {len(all_logs)} log entries total")
    
    if not all_logs:
        return pd.DataFrame()
    
    # Convert to DataFrame
    df = pd.DataFrame(all_logs)
    
    return df

# Load the logs
df_raw = load_mitm_logs()

if not df_raw.empty:
    print(f"\nüìä Dataset Overview:")
    print(f"   ‚Ä¢ Total entries: {len(df_raw):,}")
    print(f"   ‚Ä¢ Columns: {list(df_raw.columns)}")
    print(f"   ‚Ä¢ Memory usage: {df_raw.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
else:
    print("‚ö†Ô∏è  No data loaded. Please check the log file path.")

## 3. Data Preprocessing and Cleaning

In [None]:
def preprocess_mitm_data(df_raw):
    """
    Clean and structure MITM log data for analysis
    """
    if df_raw.empty:
        print("‚ùå No data to process")
        return pd.DataFrame()
    
    print("üîß Preprocessing MITM log data...")
    
    # Create a copy to avoid modifying original
    df = df_raw.copy()
    
    # Parse timestamp
    df['datetime'] = pd.to_datetime(df['timestamp'], errors='coerce')
    
    # Extract request information
    if 'request' in df.columns:
        df['method'] = df['request'].apply(lambda x: x.get('method', '') if isinstance(x, dict) else '')
        df['host'] = df['request'].apply(lambda x: x.get('host', '') if isinstance(x, dict) else '')
        df['path'] = df['request'].apply(lambda x: x.get('path', '') if isinstance(x, dict) else '')
        df['scheme'] = df['request'].apply(lambda x: x.get('scheme', '') if isinstance(x, dict) else '')
        df['port'] = df['request'].apply(lambda x: x.get('port', 0) if isinstance(x, dict) else 0)
        df['user_agent'] = df['request'].apply(lambda x: x.get('headers', {}).get('user-agent', '') if isinstance(x, dict) else '')
        df['req_content_length'] = df['request'].apply(lambda x: int(x.get('headers', {}).get('content-length', 0) or 0) if isinstance(x, dict) else 0)
    
    # Extract response information
    if 'response' in df.columns:
        df['status_code'] = df['response'].apply(lambda x: x.get('status_code', 0) if isinstance(x, dict) else 0)
        df['response_reason'] = df['response'].apply(lambda x: x.get('reason', '') if isinstance(x, dict) else '')
        df['resp_content_length'] = df['response'].apply(lambda x: int(x.get('headers', {}).get('content-length', 0) or 0) if isinstance(x, dict) else 0)
        df['content_type'] = df['response'].apply(lambda x: x.get('headers', {}).get('content-type', '') if isinstance(x, dict) else '')
    
    # Extract timing information
    if 'timings' in df.columns:
        df['request_duration'] = df['timings'].apply(lambda x: 
            (x.get('response_end', 0) - x.get('request_start', 0)) * 1000 
            if isinstance(x, dict) and x.get('request_start') and x.get('response_end') 
            else 0)
    
    # Extract domain and TLD information
    df['domain'] = df['host'].apply(lambda x: tldextract.extract(x).domain if x else '')
    df['tld'] = df['host'].apply(lambda x: tldextract.extract(x).suffix if x else '')
    df['subdomain'] = df['host'].apply(lambda x: tldextract.extract(x).subdomain if x else '')
    
    # Create full URL
    df['full_url'] = df.apply(lambda row: f"{row['scheme']}://{row['host']}{row['path']}" if row['host'] else '', axis=1)
    
    # Categorize HTTP methods
    df['method_category'] = df['method'].apply(lambda x: 
        'READ' if x in ['GET', 'HEAD'] else
        'WRITE' if x in ['POST', 'PUT', 'PATCH'] else
        'DELETE' if x == 'DELETE' else
        'OTHER')
    
    # Categorize status codes
    df['status_category'] = df['status_code'].apply(lambda x:
        'Success' if 200 <= x < 300 else
        'Redirect' if 300 <= x < 400 else
        'Client Error' if 400 <= x < 500 else
        'Server Error' if 500 <= x < 600 else
        'Unknown')
    
    # Identify potentially suspicious patterns
    df['is_api_call'] = df['path'].str.contains(r'/api/|/v\d+/|\.json|\.xml', case=False, na=False)
    df['has_query_params'] = df['path'].str.contains(r'\?', na=False)
    df['is_secure'] = df['scheme'] == 'https'
    df['is_large_request'] = df['req_content_length'] > 1000000  # >1MB
    df['is_large_response'] = df['resp_content_length'] > 1000000  # >1MB
    
    # Time-based features
    df['hour'] = df['datetime'].dt.hour
    df['day_of_week'] = df['datetime'].dt.day_name()
    df['date'] = df['datetime'].dt.date
    
    # Drop rows with invalid timestamps
    df = df.dropna(subset=['datetime'])
    
    print(f"‚úÖ Preprocessing complete!")
    print(f"   ‚Ä¢ Final dataset size: {len(df):,} entries")
    print(f"   ‚Ä¢ Time range: {df['datetime'].min()} to {df['datetime'].max()}")
    print(f"   ‚Ä¢ Unique hosts: {df['host'].nunique()}")
    print(f"   ‚Ä¢ HTTP methods: {df['method'].value_counts().to_dict()}")
    
    return df

# Process the data
df = preprocess_mitm_data(df_raw)

# Display sample data
if not df.empty:
    print(f"\nüìã Sample processed data:")
    display(df[['datetime', 'method', 'host', 'status_code', 'req_content_length', 'resp_content_length']].head())

## 4. Create Basic HTTP Traffic Visualizations

In [None]:
def create_basic_visualizations(df):
    """
    Create comprehensive HTTP traffic visualizations
    """
    if df.empty:
        print("‚ùå No data available for visualization")
        return
    
    print("üìä Creating basic HTTP traffic visualizations...")
    
    # Set up the plotting area
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=[
            'HTTP Methods Distribution', 'Status Code Distribution',
            'Top 10 Hosts by Request Count', 'Request vs Response Size',
            'Traffic by Hour of Day', 'Secure vs Non-Secure Traffic'
        ],
        specs=[[{"type": "pie"}, {"type": "pie"}],
               [{"type": "bar"}, {"type": "scatter"}],
               [{"type": "bar"}, {"type": "bar"}]]
    )
    
    # 1. HTTP Methods Distribution
    method_counts = df['method'].value_counts()
    fig.add_trace(
        go.Pie(labels=method_counts.index, values=method_counts.values, name="Methods"),
        row=1, col=1
    )
    
    # 2. Status Code Distribution
    status_counts = df['status_category'].value_counts()
    fig.add_trace(
        go.Pie(labels=status_counts.index, values=status_counts.values, name="Status"),
        row=1, col=2
    )
    
    # 3. Top 10 Hosts
    top_hosts = df['host'].value_counts().head(10)
    fig.add_trace(
        go.Bar(x=top_hosts.values, y=top_hosts.index, orientation='h', name="Hosts"),
        row=2, col=1
    )
    
    # 4. Request vs Response Size
    sample_df = df.sample(min(1000, len(df)))  # Sample for performance
    fig.add_trace(
        go.Scatter(
            x=sample_df['req_content_length'],
            y=sample_df['resp_content_length'],
            mode='markers',
            marker=dict(
                color=sample_df['status_code'],
                colorscale='Viridis',
                showscale=True,
                size=6,
                opacity=0.6
            ),
            name="Size Correlation"
        ),
        row=2, col=2
    )
    
    # 5. Traffic by Hour
    hourly_traffic = df.groupby('hour').size().reset_index(name='count')
    fig.add_trace(
        go.Bar(x=hourly_traffic['hour'], y=hourly_traffic['count'], name="Hourly Traffic"),
        row=3, col=1
    )
    
    # 6. Secure vs Non-Secure
    security_counts = df['is_secure'].value_counts()
    security_labels = ['HTTPS' if x else 'HTTP' for x in security_counts.index]
    fig.add_trace(
        go.Bar(x=security_labels, y=security_counts.values, name="Security"),
        row=3, col=2
    )
    
    # Update layout
    fig.update_layout(
        height=1200,
        showlegend=False,
        title_text="HTTP Traffic Analysis Dashboard",
        title_x=0.5
    )
    
    fig.show()
    
    # Create summary statistics
    print(f"\nüìà Traffic Summary Statistics:")
    print(f"   ‚Ä¢ Total Requests: {len(df):,}")
    print(f"   ‚Ä¢ Unique Hosts: {df['host'].nunique()}")
    print(f"   ‚Ä¢ Date Range: {df['datetime'].min().date()} to {df['datetime'].max().date()}")
    print(f"   ‚Ä¢ Most Common Method: {df['method'].mode().iloc[0]} ({df['method'].value_counts().iloc[0]} requests)")
    print(f"   ‚Ä¢ Most Common Status: {df['status_code'].mode().iloc[0]} ({df['status_code'].value_counts().iloc[0]} responses)")
    print(f"   ‚Ä¢ Average Request Size: {df['req_content_length'].mean():.2f} bytes")
    print(f"   ‚Ä¢ Average Response Size: {df['resp_content_length'].mean():.2f} bytes")
    print(f"   ‚Ä¢ HTTPS Traffic: {(df['is_secure'].sum() / len(df) * 100):.1f}%")

# Create the basic visualizations
if not df.empty:
    create_basic_visualizations(df)

## 5. Time Series Analysis of Network Traffic

In [None]:
def create_time_series_analysis(df):
    """
    Create comprehensive time series analysis of network traffic
    """
    if df.empty:
        print("‚ùå No data available for time series analysis")
        return
    
    print("‚è±Ô∏è Creating time series analysis...")
    
    # Create time-based aggregations
    df_time = df.set_index('datetime').sort_index()
    
    # Resample by different time periods
    hourly_data = df_time.resample('H').agg({
        'method': 'count',
        'req_content_length': 'sum',
        'resp_content_length': 'sum',
        'request_duration': 'mean',
        'status_code': lambda x: (x >= 400).sum()  # Error count
    }).rename(columns={'method': 'request_count', 'status_code': 'error_count'})
    
    # Create subplots
    fig = make_subplots(
        rows=4, cols=1,
        subplot_titles=[
            'Requests per Hour',
            'Data Transfer (Request vs Response)',
            'Average Response Time',
            'Error Rate Over Time'
        ],
        vertical_spacing=0.08
    )
    
    # 1. Requests per hour
    fig.add_trace(
        go.Scatter(
            x=hourly_data.index,
            y=hourly_data['request_count'],
            mode='lines+markers',
            name='Requests/Hour',
            line=dict(color='blue', width=2)
        ),
        row=1, col=1
    )
    
    # 2. Data transfer
    fig.add_trace(
        go.Scatter(
            x=hourly_data.index,
            y=hourly_data['req_content_length'] / 1024**2,  # Convert to MB
            mode='lines',
            name='Request Data (MB)',
            line=dict(color='green')
        ),
        row=2, col=1
    )
    
    fig.add_trace(
        go.Scatter(
            x=hourly_data.index,
            y=hourly_data['resp_content_length'] / 1024**2,  # Convert to MB
            mode='lines',
            name='Response Data (MB)',
            line=dict(color='orange')
        ),
        row=2, col=1
    )
    
    # 3. Response time
    fig.add_trace(
        go.Scatter(
            x=hourly_data.index,
            y=hourly_data['request_duration'],
            mode='lines+markers',
            name='Avg Response Time (ms)',
            line=dict(color='purple', width=2)
        ),
        row=3, col=1
    )
    
    # 4. Error rate
    fig.add_trace(
        go.Scatter(
            x=hourly_data.index,
            y=hourly_data['error_count'],
            mode='lines+markers',
            name='Errors/Hour',
            line=dict(color='red', width=2),
            fill='tonexty'
        ),
        row=4, col=1
    )
    
    # Update layout
    fig.update_layout(
        height=1000,
        title_text="Time Series Analysis of Network Traffic",
        title_x=0.5,
        showlegend=True
    )
    
    # Update x-axes to show time properly
    for i in range(1, 5):
        fig.update_xaxes(title_text="Time", row=i, col=1)
    
    fig.update_yaxes(title_text="Request Count", row=1, col=1)
    fig.update_yaxes(title_text="Data Transfer (MB)", row=2, col=1)
    fig.update_yaxes(title_text="Response Time (ms)", row=3, col=1)
    fig.update_yaxes(title_text="Error Count", row=4, col=1)
    
    fig.show()
    
    # Traffic pattern analysis
    daily_pattern = df.groupby('hour')['method'].count()
    peak_hour = daily_pattern.idxmax()
    peak_requests = daily_pattern.max()
    
    weekly_pattern = df.groupby('day_of_week')['method'].count()
    busiest_day = weekly_pattern.idxmax()
    
    print(f"\nüìä Traffic Pattern Analysis:")
    print(f"   ‚Ä¢ Peak Hour: {peak_hour}:00 with {peak_requests} requests")
    print(f"   ‚Ä¢ Busiest Day: {busiest_day} with {weekly_pattern.max()} requests")
    print(f"   ‚Ä¢ Total Data Transferred: {(df['req_content_length'].sum() + df['resp_content_length'].sum()) / 1024**3:.2f} GB")
    print(f"   ‚Ä¢ Average Request Duration: {df['request_duration'].mean():.2f} ms")
    
    return hourly_data

# Create time series analysis
if not df.empty:
    hourly_data = create_time_series_analysis(df)

## 6. Security and Anomaly Detection

In [None]:
def security_analysis(df):
    """
    Perform security-focused analysis and anomaly detection
    """
    if df.empty:
        print("‚ùå No data available for security analysis")
        return
    
    print("üîí Performing security analysis and anomaly detection...")
    
    # Security indicators
    suspicious_indicators = {
        'Large Requests': df['is_large_request'].sum(),
        'Large Responses': df['is_large_response'].sum(),
        'Non-HTTPS Traffic': (~df['is_secure']).sum(),
        'Client Errors (4xx)': ((df['status_code'] >= 400) & (df['status_code'] < 500)).sum(),
        'Server Errors (5xx)': (df['status_code'] >= 500).sum(),
        'Unusual Methods': df[~df['method'].isin(['GET', 'POST', 'PUT', 'DELETE'])]['method'].count()
    }
    
    # Domain analysis
    domain_stats = df.groupby('domain').agg({
        'method': 'count',
        'req_content_length': 'sum',
        'resp_content_length': 'sum',
        'is_secure': lambda x: x.sum() / len(x)  # HTTPS ratio
    }).rename(columns={'method': 'requests', 'is_secure': 'https_ratio'})
    
    domain_stats = domain_stats.sort_values('requests', ascending=False)
    
    # Create security dashboard
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            'Security Indicators',
            'Top Domains by Request Volume',
            'Status Code Distribution',
            'HTTPS vs HTTP by Domain (Top 10)'
        ],
        specs=[[{"type": "bar"}, {"type": "bar"}],
               [{"type": "pie"}, {"type": "bar"}]]
    )
    
    # 1. Security indicators
    fig.add_trace(
        go.Bar(
            x=list(suspicious_indicators.keys()),
            y=list(suspicious_indicators.values()),
            marker_color=['red' if val > 0 else 'green' for val in suspicious_indicators.values()],
            name="Security Indicators"
        ),
        row=1, col=1
    )
    
    # 2. Top domains
    top_domains = domain_stats.head(10)
    fig.add_trace(
        go.Bar(
            x=top_domains.index,
            y=top_domains['requests'],
            name="Domain Requests"
        ),
        row=1, col=2
    )
    
    # 3. Status code distribution
    status_dist = df['status_code'].value_counts().head(10)
    fig.add_trace(
        go.Pie(
            labels=[f"{code}" for code in status_dist.index],
            values=status_dist.values,
            name="Status Codes"
        ),
        row=2, col=1
    )
    
    # 4. HTTPS ratio by domain
    top_domains_https = domain_stats.head(10)
    fig.add_trace(
        go.Bar(
            x=top_domains_https.index,
            y=top_domains_https['https_ratio'] * 100,
            marker_color=['green' if ratio > 0.8 else 'orange' if ratio > 0.5 else 'red' 
                         for ratio in top_domains_https['https_ratio']],
            name="HTTPS %"
        ),
        row=2, col=2
    )
    
    fig.update_layout(
        height=800,
        title_text="Security Analysis Dashboard",
        title_x=0.5,
        showlegend=False
    )
    
    fig.update_xaxes(tickangle=45, row=1, col=1)
    fig.update_xaxes(tickangle=45, row=1, col=2)
    fig.update_xaxes(tickangle=45, row=2, col=2)
    fig.update_yaxes(title_text="Count", row=1, col=1)
    fig.update_yaxes(title_text="Requests", row=1, col=2)
    fig.update_yaxes(title_text="HTTPS %", row=2, col=2)
    
    fig.show()
    
    # Anomaly detection using statistical methods
    print(f"\nüö® Security Analysis Results:")
    print(f"   ‚Ä¢ Total Security Incidents: {sum(suspicious_indicators.values())}")
    for indicator, count in suspicious_indicators.items():
        if count > 0:
            print(f"   ‚Ä¢ {indicator}: {count}")
    
    # Find potential data exfiltration (large outbound requests)
    large_requests = df[df['req_content_length'] > df['req_content_length'].quantile(0.95)]
    if not large_requests.empty:
        print(f"\nüì§ Potential Data Exfiltration:")
        print(f"   ‚Ä¢ Large requests detected: {len(large_requests)}")
        print(f"   ‚Ä¢ Top targets: {large_requests['host'].value_counts().head(3).to_dict()}")
    
    # Find unusual user agents
    ua_counts = df['user_agent'].value_counts()
    rare_uas = ua_counts[ua_counts == 1]
    if len(rare_uas) > 0:
        print(f"\nü§ñ Unusual User Agents:")
        print(f"   ‚Ä¢ Unique user agents: {len(rare_uas)}")
        print(f"   ‚Ä¢ Sample: {list(rare_uas.index[:3])}")
    
    return domain_stats, suspicious_indicators

# Perform security analysis
if not df.empty:
    domain_stats, security_indicators = security_analysis(df)

## 7. Interactive Traffic Filtering and Search

In [None]:
def create_interactive_dashboard(df):
    """
    Create interactive dashboard with filtering capabilities
    """
    if df.empty:
        print("‚ùå No data available for interactive dashboard")
        return
    
    print("üéõÔ∏è Creating interactive dashboard...")
    
    # Create filter widgets
    date_range = widgets.SelectionRangeSlider(
        options=[(date.strftime('%Y-%m-%d'), date) for date in sorted(df['date'].unique())],
        index=(0, len(df['date'].unique())-1),
        description='Date Range',
        disabled=False
    )
    
    method_filter = widgets.SelectMultiple(
        options=df['method'].unique().tolist(),
        value=df['method'].unique().tolist(),
        description='HTTP Methods',
        disabled=False
    )
    
    host_filter = widgets.Dropdown(
        options=['All'] + df['host'].value_counts().head(20).index.tolist(),
        value='All',
        description='Host Filter',
    )
    
    status_filter = widgets.SelectMultiple(
        options=['2xx', '3xx', '4xx', '5xx'],
        value=['2xx', '3xx', '4xx', '5xx'],
        description='Status Codes',
        disabled=False
    )
    
    search_box = widgets.Text(
        value='',
        placeholder='Search in URLs or hosts...',
        description='Search:',
        disabled=False
    )
    
    output = widgets.Output()
    
    def update_dashboard(*args):
        with output:
            clear_output(wait=True)
            
            # Apply filters
            filtered_df = df.copy()
            
            # Date filter
            start_date, end_date = date_range.value
            filtered_df = filtered_df[
                (filtered_df['date'] >= start_date) & 
                (filtered_df['date'] <= end_date)
            ]
            
            # Method filter
            filtered_df = filtered_df[filtered_df['method'].isin(method_filter.value)]
            
            # Host filter
            if host_filter.value != 'All':
                filtered_df = filtered_df[filtered_df['host'] == host_filter.value]
            
            # Status filter
            status_conditions = []
            for status in status_filter.value:
                if status == '2xx':
                    status_conditions.append((filtered_df['status_code'] >= 200) & (filtered_df['status_code'] < 300))
                elif status == '3xx':
                    status_conditions.append((filtered_df['status_code'] >= 300) & (filtered_df['status_code'] < 400))
                elif status == '4xx':
                    status_conditions.append((filtered_df['status_code'] >= 400) & (filtered_df['status_code'] < 500))
                elif status == '5xx':
                    status_conditions.append((filtered_df['status_code'] >= 500) & (filtered_df['status_code'] < 600))
            
            if status_conditions:
                status_mask = status_conditions[0]
                for condition in status_conditions[1:]:
                    status_mask |= condition
                filtered_df = filtered_df[status_mask]
            
            # Search filter
            if search_box.value:
                search_mask = (
                    filtered_df['host'].str.contains(search_box.value, case=False, na=False) |
                    filtered_df['path'].str.contains(search_box.value, case=False, na=False) |
                    filtered_df['full_url'].str.contains(search_box.value, case=False, na=False)
                )
                filtered_df = filtered_df[search_mask]
            
            if filtered_df.empty:
                print("üîç No data matches the current filters")
                return
            
            # Create filtered visualizations
            print(f"üìä Filtered Results: {len(filtered_df):,} entries")
            
            # Traffic over time
            time_series = filtered_df.set_index('datetime').resample('H').size()
            
            fig = make_subplots(
                rows=2, cols=2,
                subplot_titles=[
                    'Traffic Over Time (Filtered)',
                    'Method Distribution (Filtered)',
                    'Top Hosts (Filtered)',
                    'Status Codes (Filtered)'
                ]
            )
            
            # Time series
            fig.add_trace(
                go.Scatter(x=time_series.index, y=time_series.values, mode='lines+markers'),
                row=1, col=1
            )
            
            # Methods
            method_counts = filtered_df['method'].value_counts()
            fig.add_trace(
                go.Pie(labels=method_counts.index, values=method_counts.values),
                row=1, col=2
            )
            
            # Hosts
            host_counts = filtered_df['host'].value_counts().head(10)
            fig.add_trace(
                go.Bar(x=host_counts.index, y=host_counts.values),
                row=2, col=1
            )
            
            # Status codes
            status_counts = filtered_df['status_code'].value_counts().head(10)
            fig.add_trace(
                go.Bar(x=[str(x) for x in status_counts.index], y=status_counts.values),
                row=2, col=2
            )
            
            fig.update_layout(height=600, showlegend=False, title_text="Filtered Traffic Analysis")
            fig.update_xaxes(tickangle=45, row=2, col=1)
            
            fig.show()
            
            # Show sample of filtered data
            sample_columns = ['datetime', 'method', 'host', 'path', 'status_code', 'req_content_length', 'resp_content_length']
            display(HTML(f\"<h4>Sample of Filtered Data ({len(filtered_df)} total entries):</h4>\"))
            display(filtered_df[sample_columns].head(10))
    
    # Connect widgets to update function
    date_range.observe(update_dashboard, names='value')
    method_filter.observe(update_dashboard, names='value')
    host_filter.observe(update_dashboard, names='value')
    status_filter.observe(update_dashboard, names='value')
    search_box.observe(update_dashboard, names='value')
    
    # Display widgets and output
    filter_box = widgets.VBox([
        widgets.HTML('<h3>üéõÔ∏è Interactive Traffic Filter Dashboard</h3>'),
        widgets.HBox([date_range, method_filter]),
        widgets.HBox([host_filter, status_filter]),
        search_box,
        widgets.HTML('<hr>')
    ])
    
    display(filter_box)
    display(output)
    
    # Initial update
    update_dashboard()

# Create interactive dashboard
if not df.empty:
    create_interactive_dashboard(df)