In [2]:
# visualization_world_map_4countries_enhanced.py
# Enhanced world map with 4 countries representing different patterns + data quality visualization

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

# Set high quality export settings
pio.kaleido.scope.default_width = 1200
pio.kaleido.scope.default_height = 800
pio.kaleido.scope.default_scale = 2

def analyze_data_quality(df):
    """Analyze and report data quality metrics"""
    
    print("🔍 Analyzing Data Quality...")
    
    # Data completeness analysis
    completeness = {}
    for column in ['HDI', 'Suicide_rate', 'GDP_per_capita', 'income_group_auto', 'continent']:
        if column in df.columns:
            completeness[column] = {
                'total': len(df),
                'non_null': df[column].notna().sum(),
                'null_count': df[column].isna().sum(),
                'completeness_rate': df[column].notna().sum() / len(df) * 100
            }
    
    # Year coverage analysis
    if 'Year' in df.columns:
        year_coverage = df['Year'].value_counts().sort_index()
        countries_per_year = df.groupby('Year')['Country Name'].nunique()
    else:
        year_coverage = pd.Series()
        countries_per_year = pd.Series()
    
    # Data distribution analysis
    distribution_stats = {}
    numeric_columns = ['HDI', 'Suicide_rate', 'GDP_per_capita']
    for col in numeric_columns:
        if col in df.columns:
            distribution_stats[col] = {
                'mean': df[col].mean(),
                'median': df[col].median(),
                'std': df[col].std(),
                'min': df[col].min(),
                'max': df[col].max(),
                'q25': df[col].quantile(0.25),
                'q75': df[col].quantile(0.75)
            }
    
    # Print data quality report
    print("\n📊 DATA QUALITY REPORT:")
    print("=" * 50)
    print("Completeness Analysis:")
    for col, stats in completeness.items():
        print(f"  {col}: {stats['completeness_rate']:.1f}% ({stats['non_null']}/{stats['total']})")
    
    if not year_coverage.empty:
        print(f"\nYear Coverage: {year_coverage.index.min()} - {year_coverage.index.max()}")
        print(f"Countries with most complete data: {countries_per_year.max()} countries in {countries_per_year.idxmax()}")
    
    print("\nData Distribution:")
    for col, stats in distribution_stats.items():
        print(f"  {col}: mean={stats['mean']:.3f}, std={stats['std']:.3f}, range=[{stats['min']:.3f}, {stats['max']:.3f}]")
    
    return completeness, year_coverage, countries_per_year, distribution_stats

def create_data_quality_dashboard(df, completeness, year_coverage, countries_per_year):
    """Create comprehensive data quality visualization dashboard"""
    
    # Create subplots for data quality overview
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            'Data Completeness by Variable',
            'Country Coverage Over Time',
            'Data Distribution - HDI',
            'Data Distribution - Suicide Rates'
        ],
        specs=[
            [{"type": "bar"}, {"type": "scatter"}],
            [{"type": "histogram"}, {"type": "histogram"}]
        ],
        vertical_spacing=0.12,
        horizontal_spacing=0.08
    )
    
    # 1. Data completeness bar chart
    variables = list(completeness.keys())
    completeness_rates = [completeness[var]['completeness_rate'] for var in variables]
    
    fig.add_trace(
        go.Bar(
            x=variables,
            y=completeness_rates,
            marker_color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd'],
            text=[f'{rate:.1f}%' for rate in completeness_rates],
            textposition='auto',
        ),
        row=1, col=1
    )
    
    # 2. Country coverage over time
    if not year_coverage.empty:
        fig.add_trace(
            go.Scatter(
                x=countries_per_year.index,
                y=countries_per_year.values,
                mode='lines+markers',
                line=dict(color='#2ca02c', width=3),
                marker=dict(size=8),
                name='Countries with Data'
            ),
            row=1, col=2
        )
    
    # 3. HDI distribution
    if 'HDI' in df.columns:
        fig.add_trace(
            go.Histogram(
                x=df['HDI'].dropna(),
                nbinsx=30,
                marker_color='#1f77b4',
                opacity=0.7,
                name='HDI Distribution'
            ),
            row=2, col=1
        )
    
    # 4. Suicide rate distribution
    if 'Suicide_rate' in df.columns:
        fig.add_trace(
            go.Histogram(
                x=df['Suicide_rate'].dropna(),
                nbinsx=30,
                marker_color='#d62728',
                opacity=0.7,
                name='Suicide Rate Distribution'
            ),
            row=2, col=2
        )
    
    # Update layout
    fig.update_layout(
        title_text="Data Quality Dashboard: Global Development and Mental Health Dataset",
        height=800,
        showlegend=False,
        annotations=[
            dict(
                x=0.5, y=-0.1,
                xref="paper", yref="paper",
                text="Data Source: World Bank, WHO, UNDP | Analysis: Global Development-Mental Health Study",
                showarrow=False,
                font=dict(size=10, color="gray")
            )
        ]
    )
    
    # Update axes labels
    fig.update_xaxes(title_text="Variables", row=1, col=1)
    fig.update_yaxes(title_text="Completeness Rate (%)", row=1, col=1, range=[0, 100])
    
    if not year_coverage.empty:
        fig.update_xaxes(title_text="Year", row=1, col=2)
        fig.update_yaxes(title_text="Number of Countries", row=1, col=2)
    
    fig.update_xaxes(title_text="HDI Value", row=2, col=1)
    fig.update_yaxes(title_text="Frequency", row=2, col=1)
    
    fig.update_xaxes(title_text="Suicide Rate (per 100,000)", row=2, col=2)
    fig.update_yaxes(title_text="Frequency", row=2, col=2)
    
    return fig

def create_4country_comparison_map(df):
    """Create world map highlighting 4 countries representing different patterns with data quality indicators"""
    
    # Filter for latest year
    if 'Year' in df.columns:
        latest_year = df['Year'].max()
        map_data = df[df['Year'] == latest_year].copy()
    else:
        map_data = df.copy()
    
    print(f"🌍 Creating world map with {len(map_data)} countries")
    
    # Data quality assessment for each country
    map_data['data_quality_score'] = map_data[['HDI', 'Suicide_rate', 'GDP_per_capita']].notna().sum(axis=1)
    map_data['data_quality_category'] = pd.cut(
        map_data['data_quality_score'],
        bins=[0, 1, 2, 3],
        labels=['Low', 'Medium', 'High'],
        right=False
    )
    
    print(f"📊 Data Quality Distribution: {map_data['data_quality_category'].value_counts().to_dict()}")
    
    # Define thresholds
    hdi_high_threshold = 0.75
    hdi_low_threshold = 0.55
    suicide_high_threshold = map_data['Suicide_rate'].quantile(0.75)
    suicide_low_threshold = map_data['Suicide_rate'].quantile(0.25)
    
    print(f"📊 Thresholds - HDI: High>{hdi_high_threshold:.3f}, Low<{hdi_low_threshold:.3f}")
    print(f"📊 Thresholds - Suicide: High>{suicide_high_threshold:.2f}, Low<{suicide_low_threshold:.2f}")
    
    # Categorize countries into 4 groups
    categories = {
        'High HDI, Low Suicide': {
            'condition': (map_data['HDI'] >= hdi_high_threshold) & (map_data['Suicide_rate'] <= suicide_low_threshold),
            'color': 'green',
            'description': 'Successful Development<br>High development with good mental health'
        },
        'High HDI, High Suicide': {
            'condition': (map_data['HDI'] >= hdi_high_threshold) & (map_data['Suicide_rate'] >= suicide_high_threshold),
            'color': 'red',
            'description': 'Development Paradox<br>High development but mental health challenges'
        },
        'Low HDI, High Suicide': {
            'condition': (map_data['HDI'] <= hdi_low_threshold) & (map_data['Suicide_rate'] >= suicide_high_threshold),
            'color': 'orange',
            'description': 'Double Burden<br>Low development with mental health challenges'
        },
        'Low HDI, Low Suicide': {
            'condition': (map_data['HDI'] <= hdi_low_threshold) & (map_data['Suicide_rate'] <= suicide_low_threshold),
            'color': 'blue',
            'description': 'Resilience<br>Low development but good mental health outcomes'
        }
    }
    
    # Select one representative country from each category with good data quality
    selected_countries = {}
    country_explanations = {}
    
    for category, config in categories.items():
        category_data = map_data[config['condition']]
        
        if len(category_data) > 0:
            # Prioritize countries with high data quality
            high_quality_data = category_data[category_data['data_quality_score'] == 3]
            
            if len(high_quality_data) > 0:
                candidate_data = high_quality_data
            else:
                candidate_data = category_data[category_data['data_quality_score'] >= 2]
                if len(candidate_data) == 0:
                    candidate_data = category_data
            
            # Select representative country
            if 'High HDI' in category:
                if 'Low Suicide' in category:
                    selected = candidate_data.nlargest(3, 'HDI').iloc[0]
                else:
                    selected = candidate_data.nlargest(3, 'Suicide_rate').iloc[0]
            else:
                if 'Low Suicide' in category:
                    selected = candidate_data.nsmallest(3, 'Suicide_rate').iloc[0]
                else:
                    selected = candidate_data.nlargest(3, 'Suicide_rate').iloc[0]
            
            selected_countries[category] = selected
            country_explanations[selected['Country Name']] = {
                'category': category,
                'color': config['color'],
                'description': config['description'],
                'data_quality': selected['data_quality_category'],
                'details': f"HDI: {selected['HDI']:.3f}, Suicide: {selected['Suicide_rate']:.1f}, Data Quality: {selected['data_quality_category']}"
            }
    
    print("🎯 Selected countries for comparison:")
    for category, country in selected_countries.items():
        data_quality = country['data_quality_category']
        print(f"   - {category}: {country['Country Name']} (Data Quality: {data_quality})")
    
    # Create the world map with data quality overlay
    fig = px.choropleth(
        map_data,
        locations="ISO3",
        color="Suicide_rate",
        hover_name="Country Name",
        hover_data={
            'HDI': ':.3f',
            'Suicide_rate': ':.1f',
            'GDP_per_capita': ':,.0f',
            'income_group_auto': True,
            'continent': True,
            'data_quality_category': True
        },
        color_continuous_scale="Reds",
        title="Global Suicide Rates: 4-Country Development-Mental Health Comparison<br><sub>Data Quality Indicators: Circle size represents completeness of data (HDI, Suicide Rate, GDP)</sub>",
        projection="natural earth",
        width=1200,
        height=800
    )
    
    # Add data quality indicators as bubble layer
    fig.add_trace(
        go.Scattergeo(
            lon=map_data['Longitude'] if 'Longitude' in map_data.columns else [0] * len(map_data),
            lat=map_data['Latitude'] if 'Latitude' in map_data.columns else [0] * len(map_data),
            text=map_data['Country Name'] + '<br>Data Quality: ' + map_data['data_quality_category'].astype(str),
            mode='markers',
            marker=dict(
                size=map_data['data_quality_score'] * 8,  # Size based on data completeness
                color=map_data['data_quality_score'],
                colorscale=['red', 'yellow', 'green'],
                cmin=1,
                cmax=3,
                opacity=0.6,
                line=dict(width=0.5, color='gray'),
                colorbar=dict(
                    title="Data Quality<br>Score",
                    thickness=15,
                    len=0.35,
                    y=0.85,
                    yanchor='top'
                )
            ),
            hoverinfo='text',
            name='Data Quality'
        )
    )
    
    # Highlight selected countries with different colors for each category
    for category, country_data in selected_countries.items():
        config = categories[category]
        fig.add_trace(
            go.Choropleth(
                locations=[country_data['ISO3']],
                z=[10],
                colorscale=[[0, config['color']], [1, config['color']]],
                showscale=False,
                hoverinfo='skip',
                marker_line_color='black',
                marker_line_width=4,
                name=category
            )
        )
    
    # Create detailed annotations for each country
    annotations = []
    
    # Position annotations in four corners
    annotation_positions = [
        (0.02, 0.98, 'top left'),
        (0.98, 0.98, 'top right'),  
        (0.02, 0.02, 'bottom left'),
        (0.98, 0.02, 'bottom right')
    ]
    
    for i, (category, country_data) in enumerate(selected_countries.items()):
        if i < len(annotation_positions):
            x_pos, y_pos, anchor = annotation_positions[i]
            config = categories[category]
            
            # Data quality indicator
            dq_color = {'High': 'green', 'Medium': 'orange', 'Low': 'red'}.get(str(country_data['data_quality_category']), 'gray')
            dq_symbol = {'High': '✅', 'Medium': '⚠️', 'Low': '❌'}.get(str(country_data['data_quality_category']), '❓')
            
            annotation_text = (
                f"<b>{category}</b><br>"
                f"<b>{country_data['Country Name']}</b> {dq_symbol}<br>"
                f"HDI: {country_data['HDI']:.3f}<br>"
                f"Suicide: {country_data['Suicide_rate']:.1f}<br>"
                f"Income: {country_data.get('income_group_auto', 'N/A')}<br>"
                f"Continent: {country_data.get('continent', 'N/A')}<br>"
                f"Data Quality: <span style='color:{dq_color}'>{country_data['data_quality_category']}</span><br>"
                f"<i>{config['description']}</i>"
            )
            
            annotations.append(
                dict(
                    x=x_pos,
                    y=y_pos,
                    xref="paper",
                    yref="paper",
                    text=annotation_text,
                    showarrow=False,
                    bgcolor="white",
                    bordercolor=config['color'],
                    borderwidth=2,
                    borderpad=8,
                    font=dict(size=11, color="black"),
                    align="left"
                )
            )
    
    # Add quadrant explanation with data quality context
    quadrant_text = (
        "<b>Development-Mental Health Matrix:</b><br>"
        "• 🟢 High HDI, Low Suicide: Successful development<br>"
        "• 🔴 High HDI, High Suicide: Development paradox<br>"  
        "• 🟠 Low HDI, High Suicide: Double burden<br>"
        "• 🔵 Low HDI, Low Suicide: Resilience<br><br>"
        "<b>Data Quality Indicators:</b><br>"
        "• 🔴 Small circle: Missing 2+ variables<br>"
        "• 🟡 Medium circle: Missing 1 variable<br>"
        "• 🟢 Large circle: Complete data"
    )
    
    annotations.append(
        dict(
            x=0.5,
            y=0.95,
            xref="paper",
            yref="paper",
            text=quadrant_text,
            showarrow=False,
            bgcolor="white",
            bordercolor="black",
            borderwidth=1,
            borderpad=8,
            font=dict(size=11, color="black"),
            align="center"
        )
    )
    
    fig.update_layout(
        annotations=annotations,
        coloraxis_colorbar=dict(
            title="Suicide Rate<br>(per 100,000)",
            thickness=20,
            len=0.75
        )
    )
    
    return fig, selected_countries, map_data

def create_comparison_scatter_plot(df, selected_countries):
    """Create a scatter plot highlighting the 4 selected countries with data quality indicators"""
    
    # Filter for latest year
    if 'Year' in df.columns:
        latest_year = df['Year'].max()
        plot_data = df[df['Year'] == latest_year].copy()
    else:
        plot_data = df.copy()
    
    # Calculate data quality score
    plot_data['data_quality_score'] = plot_data[['HDI', 'Suicide_rate', 'GDP_per_capita']].notna().sum(axis=1)
    
    # Create the scatter plot with data quality as symbol
    symbol_map = {1: 'circle-open', 2: 'circle', 3: 'circle-dot'}
    size_map = {1: 8, 2: 10, 3: 12}
    
    fig = px.scatter(
        plot_data,
        x='HDI',
        y='Suicide_rate',
        color='continent',
        size='data_quality_score',
        size_max=15,
        symbol='data_quality_score',
        symbol_map=symbol_map,
        hover_name='Country Name',
        hover_data={
            'GDP_per_capita': ':,.0f',
            'Suicide_rate': ':.2f',
            'HDI': ':.3f',
            'income_group_auto': True,
            'data_quality_score': True
        },
        title="HDI vs Suicide Rate: 4-Country Development Patterns Comparison<br><sub>Symbol size and style indicate data quality (1=Low, 2=Medium, 3=High)</sub>",
        labels={
            'HDI': 'Human Development Index (HDI)',
            'Suicide_rate': 'Suicide Rate (per 100,000)',
            'continent': 'Continent',
            'data_quality_score': 'Data Quality Score'
        },
        opacity=0.7
    )
    
    # Add quadrant lines
    hdi_threshold = 0.65
    suicide_median = plot_data['Suicide_rate'].median()
    
    fig.add_hline(y=suicide_median, line_dash="dash", line_color="gray", opacity=0.7)
    fig.add_vline(x=hdi_threshold, line_dash="dash", line_color="gray", opacity=0.7)
    
    # Highlight the 4 selected countries
    colors = {'High HDI, Low Suicide': 'green', 
              'High HDI, High Suicide': 'red',
              'Low HDI, High Suicide': 'orange', 
              'Low HDI, Low Suicide': 'blue'}
    
    for category, country_data in selected_countries.items():
        dq_score = country_data.get('data_quality_score', 3)
        dq_symbol = {1: '❌', 2: '⚠️', 3: '✅'}.get(dq_score, '❓')
        
        fig.add_trace(
            go.Scatter(
                x=[country_data['HDI']],
                y=[country_data['Suicide_rate']],
                mode='markers+text',
                marker=dict(
                    size=20,
                    color=colors[category],
                    symbol='star',
                    line=dict(color='black', width=3)
                ),
                text=f"{dq_symbol} {country_data['Country Name']}",
                textposition="top center",
                name=f"{category} ({dq_symbol})",
                hovertemplate=(
                    f"<b>{country_data['Country Name']}</b> {dq_symbol}<br>"
                    f"Category: {category}<br>"
                    f"HDI: {country_data['HDI']:.3f}<br>"
                    f"Suicide Rate: {country_data['Suicide_rate']:.1f}<br>"
                    f"GDP per capita: ${country_data['GDP_per_capita']:,.0f}<br>"
                    f"Income Group: {country_data.get('income_group_auto', 'N/A')}<br>"
                    f"Data Quality: {country_data.get('data_quality_category', 'N/A')}"
                )
            )
        )
    
    # Add quadrant annotations
    quadrants = [
        (0.8, 0.9, "High HDI<br>High Suicide", "red"),
        (0.8, 0.1, "High HDI<br>Low Suicide", "green"),
        (0.3, 0.9, "Low HDI<br>High Suicide", "orange"),
        (0.3, 0.1, "Low HDI<br>Low Suicide", "blue")
    ]
    
    for x, y, text, color in quadrants:
        fig.add_annotation(
            x=x, y=y,
            xref="paper", yref="paper",
            text=text,
            showarrow=False,
            bgcolor=color,
            opacity=0.8,
            font=dict(color="white", size=12),
            bordercolor="black",
            borderwidth=1
        )
    
    # Add data quality legend
    fig.add_annotation(
        x=0.02, y=0.98,
        xref="paper", yref="paper",
        text="<b>Data Quality Legend:</b><br>✅ Complete data (3/3)<br>⚠️ Missing 1 variable<br>❌ Missing 2+ variables",
        showarrow=False,
        bgcolor="white",
        bordercolor="black",
        borderwidth=1,
        font=dict(size=10)
    )
    
    fig.update_layout(
        width=1200,
        height=700,
        showlegend=True
    )
    
    return fig

def create_country_comparison_table(selected_countries):
    """Create a detailed comparison table for the 4 countries with data quality assessment"""
    
    comparison_data = []
    for category, country_data in selected_countries.items():
        dq_score = country_data.get('data_quality_score', 3)
        dq_indicator = {1: '❌ Low', 2: '⚠️ Medium', 3: '✅ High'}.get(dq_score, '❓ Unknown')
        
        comparison_data.append({
            'Category': category,
            'Country': country_data['Country Name'],
            'HDI': f"{country_data['HDI']:.3f}",
            'Suicide Rate': f"{country_data['Suicide_rate']:.1f}",
            'GDP per Capita': f"${country_data['GDP_per_capita']:,.0f}",
            'Income Group': country_data.get('income_group_auto', 'N/A'),
            'Continent': country_data.get('continent', 'N/A'),
            'Data Quality': dq_indicator,
            'Missing Data': f"{3 - dq_score} variables"
        })
    
    df_comparison = pd.DataFrame(comparison_data)
    
    # Create table visualization with color coding for data quality
    colors = []
    for dq in df_comparison['Data Quality']:
        if '✅' in dq:
            colors.append('lightgreen')
        elif '⚠️' in dq:
            colors.append('lightyellow')
        else:
            colors.append('lightcoral')
    
    fig = go.Figure(data=[go.Table(
        header=dict(
            values=list(df_comparison.columns),
            fill_color='paleturquoise',
            align='left',
            font=dict(size=12, color='black')
        ),
        cells=dict(
            values=[df_comparison[col] for col in df_comparison.columns],
            fill_color=[colors * len(df_comparison.columns)],  # Apply colors to all columns
            align='left',
            font=dict(size=11)
        )
    )])
    
    fig.update_layout(
        title="Detailed Comparison: 4-Country Development-Mental Health Profiles with Data Quality Assessment",
        width=1200,
        height=400
    )
    
    return fig, df_comparison

def generate_insights_report(selected_countries, completeness, distribution_stats):
    """Generate comprehensive insights report"""
    
    print("\n" + "="*80)
    print("📈 COMPREHENSIVE INSIGHTS REPORT")
    print("="*80)
    
    # Overall data quality insights
    print("\n🔍 DATA QUALITY INSIGHTS:")
    total_countries = completeness['HDI']['total']
    complete_data = completeness['HDI']['non_null']
    print(f"• Dataset covers {total_countries} countries")
    print(f"• {completeness['HDI']['completeness_rate']:.1f}% have HDI data")
    print(f"• {completeness['Suicide_rate']['completeness_rate']:.1f}% have suicide rate data")
    print(f"• {completeness['GDP_per_capita']['completeness_rate']:.1f}% have GDP data")
    
    # Selected countries analysis
    print("\n🎯 SELECTED COUNTRIES ANALYSIS:")
    for category, country_data in selected_countries.items():
        dq = country_data.get('data_quality_category', 'Unknown')
        print(f"\n• {category}:")
        print(f"  └ {country_data['Country Name']} (Data Quality: {dq})")
        print(f"    HDI: {country_data['HDI']:.3f} | Suicide: {country_data['Suicide_rate']:.1f}")
    
    # Development patterns insights
    print("\n🌍 DEVELOPMENT PATTERNS INSIGHTS:")
    
    # Calculate averages for comparison
    hdi_values = [c['HDI'] for c in selected_countries.values()]
    suicide_values = [c['Suicide_rate'] for c in selected_countries.values()]
    
    print(f"• HDI Range: {min(hdi_values):.3f} - {max(hdi_values):.3f}")
    print(f"• Suicide Rate Range: {min(suicide_values):.1f} - {max(suicide_values):.1f}")
    
    # Key findings
    print("\n💡 KEY FINDINGS:")
    print("1. Development Paradox: High-income countries can still face mental health challenges")
    print("2. Resilience Patterns: Some lower-income countries achieve good mental health outcomes")
    print("3. Data Limitations: Interpretation should consider data quality variations")
    print("4. Policy Implications: Different patterns require tailored intervention strategies")
    
    # Recommendations
    print("\n🎯 RECOMMENDATIONS:")
    print("• For High HDI/High Suicide: Focus on mental health infrastructure and stigma reduction")
    print("• For Low HDI/High Suicide: Address both development and mental health simultaneously")
    print("• For Low HDI/Low Suicide: Study resilience factors for replication")
    print("• Data Improvement: Prioritize data collection in underrepresented regions")

def main():
    """Main function to generate enhanced 4-country comparison visualizations"""
    
    print("🚀 Loading data and generating enhanced 4-country comparison visualizations...")
    
    # Load data
    try:
        df = pd.read_csv("../../Final/final_clean_dataset.csv")
        print(f"✅ Data loaded successfully: {len(df)} rows, {df['Country Name'].nunique()} countries")
    except Exception as e:
        print(f"❌ Error loading data: {e}")
        return
    
    # Analyze data quality
    completeness, year_coverage, countries_per_year, distribution_stats = analyze_data_quality(df)
    
    # Generate data quality dashboard
    print("📊 Generating Data Quality Dashboard...")
    fig_dq_dashboard = create_data_quality_dashboard(df, completeness, year_coverage, countries_per_year)
    fig_dq_dashboard.write_image("data_quality_dashboard.png", scale=2)
    fig_dq_dashboard.write_html("data_quality_dashboard.html")
    print("✅ Data quality dashboard saved!")
    
    # Generate enhanced visualizations
    print("🌍 Generating Enhanced World Map with Data Quality...")
    fig_map, selected_countries, map_data = create_4country_comparison_map(df)
    fig_map.write_image("world_map_4countries_enhanced.png", scale=2)
    fig_map.write_html("world_map_4countries_enhanced.html")
    print("✅ Enhanced world map saved!")
    
    print("📊 Generating Enhanced Comparison Scatter Plot...")
    fig_scatter = create_comparison_scatter_plot(df, selected_countries)
    fig_scatter.write_image("scatter_4countries_enhanced.png", scale=2)
    fig_scatter.write_html("scatter_4countries_enhanced.html")
    print("✅ Enhanced scatter plot saved!")
    
    print("📋 Generating Enhanced Comparison Table...")
    fig_table, df_table = create_country_comparison_table(selected_countries)
    fig_table.write_image("table_4countries_enhanced.png", scale=2)
    fig_table.write_html("table_4countries_enhanced.html")
    df_table.to_csv("comparison_table_4countries_enhanced.csv", index=False)
    print("✅ Enhanced comparison table saved!")
    
    # Generate insights report
    generate_insights_report(selected_countries, completeness, distribution_stats)
    
    # Print summary
    print("\n📁 FILES CREATED:")
    print("   - data_quality_dashboard.png/.html")
    print("   - world_map_4countries_enhanced.png/.html")
    print("   - scatter_4countries_enhanced.png/.html") 
    print("   - table_4countries_enhanced.png/.html")
    print("   - comparison_table_4countries_enhanced.csv")
    
    print("\n🎉 Enhanced 4-country comparison visualizations completed!")
    print("💡 All visualizations now include data quality indicators and comprehensive insights")

if __name__ == "__main__":
    main()



Use of plotly.io.kaleido.scope.default_width is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_width instead.




Use of plotly.io.kaleido.scope.default_height is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_height instead.




Use of plotly.io.kaleido.scope.default_scale is deprecated and support will be removed after September 2025.
Please use plotly.io.defaults.default_scale instead.




🚀 Loading data and generating enhanced 4-country comparison visualizations...
✅ Data loaded successfully: 985 rows, 205 countries
🔍 Analyzing Data Quality...

📊 DATA QUALITY REPORT:
Completeness Analysis:
  HDI: 100.0% (985/985)
  Suicide_rate: 100.0% (985/985)
  GDP_per_capita: 100.0% (985/985)
  income_group_auto: 96.9% (954/985)
  continent: 96.9% (954/985)

Year Coverage: 2019 - 2023
Countries with most complete data: 205 countries in 2020

Data Distribution:
  HDI: mean=0.726, std=0.151, range=[0.380, 0.972]
  Suicide_rate: mean=10.986, std=37.816, range=[0.770, 837.000]
  GDP_per_capita: mean=13550.174, std=18481.220, range=[258.422, 107599.576]
📊 Generating Data Quality Dashboard...
✅ Data quality dashboard saved!
🌍 Generating Enhanced World Map with Data Quality...
🌍 Creating world map with 192 countries
📊 Data Quality Distribution: {'Low': 0, 'Medium': 0, 'High': 0}
📊 Thresholds - HDI: High>0.750, Low<0.550
📊 Thresholds - Suicide: High>12.33, Low<4.89
🎯 Selected countries for 