In [2]:
# comprehensive_risk_factor_analysis.py
# Comprehensive analysis of risk factors affecting HDI, GDP, and suicide rates

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from scipy.stats import pearsonr
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

def load_main_data():
    """Load the main dataset"""
    try:
        df = pd.read_csv("../../Final/final_clean_dataset.csv")
        print(f"✅ Main data loaded: {len(df)} rows, {df['Country Name'].nunique()} countries")
        return df
    except Exception as e:
        print(f"❌ Error loading main data: {e}")
        return None

def load_wdi_data():
    """Load and process WDI CSV data with comprehensive indicator categorization"""
    try:
        wdi_df = pd.read_csv("../../Dataset/WDICSV-rev.csv", encoding='utf-8')
        print(f"✅ WDI data loaded: {len(wdi_df)} indicators")
        
        # Transform from wide to long format
        id_vars = ['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code']
        wdi_long = wdi_df.melt(id_vars=id_vars, 
                              var_name='Year', 
                              value_name='Value')
        
        # Convert Year to numeric and drop missing values
        wdi_long['Year'] = pd.to_numeric(wdi_long['Year'], errors='coerce')
        wdi_long = wdi_long.dropna(subset=['Year', 'Value'])
        
        # Categorize indicators
        indicator_categories = {
            'Education': [
                'School enrollment, primary', 'School enrollment, secondary',
                'School enrollment, tertiary', 'Literacy rate', 'Educational attainment',
                'Pupil-teacher ratio', 'Government expenditure on education'
            ],
            'Health': [
                'Life expectancy', 'Mortality rate', 'Prevalence of', 'Health expenditure',
                'Physicians', 'Hospital beds', 'Immunization', 'Birth rate', 'Death rate'
            ],
            'Economic': [
                'GDP', 'GNI', 'Inflation', 'Unemployment', 'Labor force',
                'Industry', 'Services', 'Agriculture', 'Export', 'Import'
            ],
            'Social': [
                'Poverty', 'Inequality', 'Gini', 'Social protection', 'Social expenditure',
                'Gender', 'Women', 'Child', 'Youth', 'Elderly'
            ],
            'Environment': [
                'CO2 emissions', 'Energy use', 'Electric power', 'Renewable energy',
                'Forest area', 'Water', 'Sanitation', 'Air pollution'
            ],
            'Infrastructure': [
                'Access to electricity', 'Internet users', 'Mobile cellular',
                'Roads', 'Rail lines', 'Air transport'
            ]
        }
        
        # Categorize each indicator
        def categorize_indicator(indicator_name):
            for category, keywords in indicator_categories.items():
                for keyword in keywords:
                    if keyword.lower() in indicator_name.lower():
                        return category
            return 'Other'
        
        wdi_long['Category'] = wdi_long['Indicator Name'].apply(categorize_indicator)
        
        # Pivot to get indicators as columns
        wdi_pivot = wdi_long.pivot_table(
            index=['Country Name', 'Year', 'Category'],
            columns='Indicator Name',
            values='Value'
        ).reset_index()
        
        print("📊 WDI indicators by category:")
        for category in wdi_pivot['Category'].unique():
            count = len([col for col in wdi_pivot.columns if col not in ['Country Name', 'Year', 'Category']])
            print(f"   - {category}: {count} indicators")
        
        return wdi_pivot
        
    except Exception as e:
        print(f"❌ Error loading WDI data: {e}")
        return None

def merge_datasets(main_df, wdi_df):
    """Merge main dataset with categorized WDI indicators"""
    if wdi_df is None:
        return main_df
    
    # Merge on Country Name and Year
    merged_df = pd.merge(
        main_df,
        wdi_df,
        on=['Country Name', 'Year'],
        how='left',
        suffixes=('', '_wdi')
    )
    
    print(f"🔗 Datasets merged: {merged_df.shape}")
    return merged_df

def create_comprehensive_correlation_analysis(df):
    """Create comprehensive correlation analysis across all categories"""
    
    # Define target variables
    targets = ['Suicide_rate', 'HDI', 'GDP_per_capita']
    
    # Get all WDI indicators - FIXED: Remove reference to main_df
    # Instead, identify WDI columns by excluding known main dataset columns
    main_dataset_columns = [
        'Country Name', 'ISO3', 'Year', 'log_GDP_per_capita', 'income_group_auto', 
        'continent', 'Low_data_quality_flag', 'HDI_sq', 'Suicide_rate_lag1', 
        'HDI_lag1', 'Low_data_quality_flag_panel', 'HDI', 'HDI_imputed', 
        'HDI_imputed_method', 'Suicide_rate', 'Suicide_rate_imputed', 
        'Suicide_rate_imputed_method', 'GDP_per_capita', 'GDP_per_capita_imputed', 
        'GDP_per_capita_imputed_method', 'HDI_growth', 'Suicide_change', 
        'Suicide_per_HDI'
    ]
    
    wdi_columns = [col for col in df.columns if col not in main_dataset_columns and col not in ['Country Name', 'Year', 'Category']]
    
    # Calculate correlations for each target
    correlation_results = {}
    
    for target in targets:
        if target in df.columns:
            correlations = []
            for indicator in wdi_columns:
                valid_data = df[[target, indicator]].dropna()
                if len(valid_data) > 30:  # Minimum samples
                    corr, p_value = pearsonr(valid_data[target], valid_data[indicator])
                    correlations.append({
                        'Indicator': indicator,
                        'Correlation': corr,
                        'P_Value': p_value,
                        'Abs_Correlation': abs(corr),
                        'Samples': len(valid_data)
                    })
            
            # Sort by absolute correlation
            correlations_df = pd.DataFrame(correlations).sort_values('Abs_Correlation', ascending=False)
            correlation_results[target] = correlations_df
    
    # Create visualization
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            'Top Risk Factors for Suicide Rates',
            'Top Factors Correlated with HDI',
            'Top Factors Correlated with GDP',
            'Risk Factor Categories Impact'
        ],
        specs=[[{"type": "bar"}, {"type": "bar"}],
               [{"type": "bar"}, {"type": "pie"}]]
    )
    
    # 1. Top factors for suicide rates
    if 'Suicide_rate' in correlation_results:
        top_suicide = correlation_results['Suicide_rate'].head(15)
        fig.add_trace(
            go.Bar(
                x=top_suicide['Correlation'],
                y=top_suicide['Indicator'],
                orientation='h',
                marker_color=['red' if x > 0 else 'blue' for x in top_suicide['Correlation']],
                text=top_suicide['Correlation'].round(3),
                textposition='auto',
                hovertemplate='<b>%{y}</b><br>Correlation: %{x:.3f}<br>P-value: %{customdata:.4f}<extra></extra>',
                customdata=top_suicide['P_Value']
            ),
            row=1, col=1
        )
    
    # 2. Top factors for HDI
    if 'HDI' in correlation_results:
        top_hdi = correlation_results['HDI'].head(15)
        fig.add_trace(
            go.Bar(
                x=top_hdi['Correlation'],
                y=top_hdi['Indicator'],
                orientation='h',
                marker_color='green',
                text=top_hdi['Correlation'].round(3),
                textposition='auto',
                hovertemplate='<b>%{y}</b><br>Correlation: %{x:.3f}<extra></extra>'
            ),
            row=1, col=2
        )
    
    # 3. Top factors for GDP
    if 'GDP_per_capita' in correlation_results:
        top_gdp = correlation_results['GDP_per_capita'].head(15)
        fig.add_trace(
            go.Bar(
                x=top_gdp['Correlation'],
                y=top_gdp['Indicator'],
                orientation='h',
                marker_color='orange',
                text=top_gdp['Correlation'].round(3),
                textposition='auto',
                hovertemplate='<b>%{y}</b><br>Correlation: %{x:.3f}<extra></extra>'
            ),
            row=2, col=1
        )
    
    # 4. Category impact (pie chart)
    category_impact = {}
    for target in targets:
        if target in correlation_results:
            for _, row in correlation_results[target].head(20).iterrows():
                # Find category for this indicator
                category = 'Other'
                for cat in ['Education', 'Health', 'Economic', 'Social', 'Environment', 'Infrastructure']:
                    if cat.lower() in row['Indicator'].lower():
                        category = cat
                        break
                
                if category not in category_impact:
                    category_impact[category] = 0
                category_impact[category] += abs(row['Correlation'])
    
    if category_impact:
        categories = list(category_impact.keys())
        impacts = list(category_impact.values())
        
        fig.add_trace(
            go.Pie(
                labels=categories,
                values=impacts,
                hole=0.4,
                textinfo='label+percent',
                hovertemplate='<b>%{label}</b><br>Impact: %{value:.2f}<extra></extra>'
            ),
            row=2, col=2
        )
    
    fig.update_layout(
        height=1000,
        width=1400,
        title_text="Comprehensive Risk Factor Correlation Analysis",
        showlegend=False
    )
    
    fig.update_xaxes(title_text="Correlation Coefficient", row=1, col=1)
    fig.update_xaxes(title_text="Correlation Coefficient", row=1, col=2)
    fig.update_xaxes(title_text="Correlation Coefficient", row=2, col=1)
    
    return fig, correlation_results

def create_social_factors_analysis(df):
    """Analyze social factors (education, gender, health, environment)"""
    
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            'Education vs Suicide Rates',
            'Health Indicators vs Development',
            'Gender Inequality Impact',
            'Environmental Factors Analysis'
        ],
        specs=[[{"secondary_y": False}, {"secondary_y": False}],
               [{"secondary_y": False}, {"secondary_y": False}]]
    )
    
    # 1. Education factors
    education_indicators = [
        'School enrollment, secondary (% gross)',
        'School enrollment, tertiary (% gross)',
        'Literacy rate, adult total (% of people ages 15 and above)'
    ]
    
    for i, indicator in enumerate(education_indicators):
        if indicator in df.columns:
            valid_data = df.dropna(subset=[indicator, 'Suicide_rate'])
            if len(valid_data) > 20:
                fig.add_trace(
                    go.Scatter(
                        x=valid_data[indicator],
                        y=valid_data['Suicide_rate'],
                        mode='markers',
                        name=indicator[:30] + '...',
                        marker=dict(size=8, opacity=0.6),
                        text=valid_data['Country Name'],
                        hovertemplate='<b>%{text}</b><br>%{fullData.name}: %{x:.1f}%<br>Suicide: %{y:.2f}<extra></extra>',
                        showlegend=True
                    ),
                    row=1, col=1
                )
    
    fig.update_xaxes(title_text="Education Indicator Value", row=1, col=1)
    fig.update_yaxes(title_text="Suicide Rate", row=1, col=1)
    
    # 2. Health indicators
    health_indicators = [
        'Life expectancy at birth, total (years)',
        'Mortality rate, infant (per 1,000 live births)',
        'Physicians (per 1,000 people)'
    ]
    
    for indicator in health_indicators:
        if indicator in df.columns:
            valid_data = df.dropna(subset=[indicator, 'HDI'])
            if len(valid_data) > 20:
                fig.add_trace(
                    go.Scatter(
                        x=valid_data[indicator],
                        y=valid_data['HDI'],
                        mode='markers',
                        name=indicator[:30] + '...',
                        marker=dict(size=8, opacity=0.6),
                        text=valid_data['Country Name'],
                        hovertemplate='<b>%{text}</b><br>%{fullData.name}: %{x:.1f}<br>HDI: %{y:.3f}<extra></extra>',
                        showlegend=True
                    ),
                    row=1, col=2
                )
    
    fig.update_xaxes(title_text="Health Indicator Value", row=1, col=2)
    fig.update_yaxes(title_text="Human Development Index", row=1, col=2)
    
    # 3. Gender inequality
    gender_indicators = [
        'Unemployment, female (% of female labor force)',
        'Unemployment, male (% of male labor force)',
        'Ratio of female to male labor force participation rate (%)'
    ]
    
    for indicator in gender_indicators:
        if indicator in df.columns:
            valid_data = df.dropna(subset=[indicator, 'Suicide_rate'])
            if len(valid_data) > 20:
                fig.add_trace(
                    go.Box(
                        x=valid_data['Suicide_rate'],
                        y=valid_data[indicator],
                        name=indicator[:25] + '...',
                        boxpoints='outliers',
                        marker=dict(size=4),
                        line=dict(width=1),
                        showlegend=True,
                        hovertemplate='%{fullData.name}<br>Suicide: %{x:.2f}<extra></extra>'
                    ),
                    row=2, col=1
                )
    
    fig.update_xaxes(title_text="Suicide Rate", row=2, col=1)
    fig.update_yaxes(title_text="Gender Indicator Value", row=2, col=1)
    
    # 4. Environmental factors
    env_indicators = [
        'CO2 emissions (metric tons per capita)',
        'Access to electricity (% of population)',
        'Renewable energy consumption (% of total final energy consumption)'
    ]
    
    for indicator in env_indicators:
        if indicator in df.columns:
            valid_data = df.dropna(subset=[indicator, 'GDP_per_capita'])
            if len(valid_data) > 20:
                fig.add_trace(
                    go.Scatter(
                        x=valid_data[indicator],
                        y=valid_data['GDP_per_capita'],
                        mode='markers',
                        name=indicator[:25] + '...',
                        marker=dict(size=8, opacity=0.6),
                        text=valid_data['Country Name'],
                        hovertemplate='<b>%{text}</b><br>%{fullData.name}: %{x:.1f}<br>GDP: $%{y:,.0f}<extra></extra>',
                        showlegend=True
                    ),
                    row=2, col=2
                )
    
    fig.update_xaxes(title_text="Environmental Indicator", row=2, col=2)
    fig.update_yaxes(title_text="GDP per Capita (USD)", row=2, col=2, type="log")
    
    fig.update_layout(
        height=800,
        width=1200,
        title_text="Social Factors Analysis: Education, Health, Gender, Environment",
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
    )
    
    return fig

def create_economic_employment_analysis(df):
    """Analyze economic and employment risk factors"""
    
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            'Unemployment Impact on Suicide Rates',
            'Economic Structure vs Development',
            'Income Inequality Analysis',
            'Economic Stability Factors'
        ],
        specs=[[{"secondary_y": False}, {"secondary_y": False}],
               [{"secondary_y": False}, {"secondary_y": False}]]
    )
    
    # 1. Unemployment impact
    unemployment_indicators = [
        'Unemployment, total (% of total labor force)',
        'Unemployment, youth total (% of total labor force ages 15-24)',
        'Unemployment, female (% of female labor force)'
    ]
    
    for indicator in unemployment_indicators:
        if indicator in df.columns:
            valid_data = df.dropna(subset=[indicator, 'Suicide_rate'])
            if len(valid_data) > 20:
                # Calculate trend line
                z = np.polyfit(valid_data[indicator], valid_data['Suicide_rate'], 1)
                p = np.poly1d(z)
                
                fig.add_trace(
                    go.Scatter(
                        x=valid_data[indicator],
                        y=valid_data['Suicide_rate'],
                        mode='markers',
                        name=indicator[:25] + '...',
                        marker=dict(size=8, opacity=0.6),
                        text=valid_data['Country Name'],
                        hovertemplate='<b>%{text}</b><br>%{fullData.name}: %{x:.1f}%<br>Suicide: %{y:.2f}<extra></extra>',
                        showlegend=True
                    ),
                    row=1, col=1
                )
                
                # Add trend line
                x_range = np.linspace(valid_data[indicator].min(), valid_data[indicator].max(), 100)
                fig.add_trace(
                    go.Scatter(
                        x=x_range,
                        y=p(x_range),
                        mode='lines',
                        name=f'Trend {indicator[:15]}...',
                        line=dict(dash='dash', width=2),
                        showlegend=False,
                        hoverinfo='skip'
                    ),
                    row=1, col=1
                )
    
    fig.update_xaxes(title_text="Unemployment Rate (%)", row=1, col=1)
    fig.update_yaxes(title_text="Suicide Rate", row=1, col=1)
    
    # 2. Economic structure
    economic_indicators = [
        'Industry (including construction), value added (% of GDP)',
        'Services, value added (% of GDP)',
        'Agriculture, forestry, and fishing, value added (% of GDP)'
    ]
    
    for indicator in economic_indicators:
        if indicator in df.columns:
            valid_data = df.dropna(subset=[indicator, 'HDI'])
            if len(valid_data) > 20:
                fig.add_trace(
                    go.Scatter(
                        x=valid_data[indicator],
                        y=valid_data['HDI'],
                        mode='markers',
                        name=indicator[:25] + '...',
                        marker=dict(size=8, opacity=0.6),
                        text=valid_data['Country Name'],
                        hovertemplate='<b>%{text}</b><br>%{fullData.name}: %{x:.1f}%<br>HDI: %{y:.3f}<extra></extra>',
                        showlegend=True
                    ),
                    row=1, col=2
                )
    
    fig.update_xaxes(title_text="Sector Contribution to GDP (%)", row=1, col=2)
    fig.update_yaxes(title_text="Human Development Index", row=1, col=2)
    
    # 3. Income inequality
    if 'Gini index' in df.columns:
        valid_data = df.dropna(subset=['Gini index', 'Suicide_rate', 'HDI'])
        if len(valid_data) > 20:
            fig.add_trace(
                go.Scatter(
                    x=valid_data['Gini index'],
                    y=valid_data['Suicide_rate'],
                    mode='markers',
                    name='Gini vs Suicide',
                    marker=dict(
                        size=8,
                        color=valid_data['HDI'],
                        colorscale='Viridis',
                        showscale=True,
                        colorbar=dict(title="HDI")
                    ),
                    text=valid_data['Country Name'],
                    hovertemplate='<b>%{text}</b><br>Gini: %{x:.1f}<br>Suicide: %{y:.2f}<br>HDI: %{marker.color:.3f}<extra></extra>',
                    showlegend=False
                ),
                row=2, col=1
            )
    
    fig.update_xaxes(title_text="Gini Index (Inequality)", row=2, col=1)
    fig.update_yaxes(title_text="Suicide Rate", row=2, col=1)
    
    # 4. Economic stability
    stability_indicators = [
        'Inflation, consumer prices (annual %)',
        'Foreign direct investment, net inflows (% of GDP)',
        'External debt stocks (% of GNI)'
    ]
    
    for indicator in stability_indicators:
        if indicator in df.columns:
            valid_data = df.dropna(subset=[indicator, 'GDP_per_capita'])
            if len(valid_data) > 20:
                fig.add_trace(
                    go.Box(
                        x=valid_data['GDP_per_capita'],
                        y=valid_data[indicator],
                        name=indicator[:20] + '...',
                        boxpoints='outliers',
                        marker=dict(size=4),
                        line=dict(width=1),
                        showlegend=True,
                        hovertemplate='%{fullData.name}<br>GDP: $%{x:,.0f}<extra></extra>'
                    ),
                    row=2, col=2
                )
    
    fig.update_xaxes(title_text="GDP per Capita (USD)", row=2, col=2, type="log")
    fig.update_yaxes(title_text="Economic Indicator Value", row=2, col=2)
    
    fig.update_layout(
        height=800,
        width=1200,
        title_text="Economic and Employment Risk Factors Analysis",
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
    )
    
    return fig

def create_risk_prevention_strategies(df, correlation_results):
    """Create visualization of risk prevention strategies based on analysis"""
    
    # Generate strategies based on correlation findings
    strategies = {
        'High Correlation Factors': [],
        'Moderate Correlation Factors': [],
        'Protective Factors': [],
        'Intervention Strategies': []
    }
    
    # Analyze suicide rate correlations
    if 'Suicide_rate' in correlation_results:
        suicide_corrs = correlation_results['Suicide_rate']
        
        # High risk factors (correlation > 0.3)
        high_risk = suicide_corrs[suicide_corrs['Abs_Correlation'] > 0.3]
        for _, row in high_risk.head(5).iterrows():
            direction = "increases" if row['Correlation'] > 0 else "decreases"
            strategies['High Correlation Factors'].append(
                f"{row['Indicator'][:40]}... (r={row['Correlation']:.3f})"
            )
        
        # Protective factors (negative correlation)
        protective = suicide_corrs[suicide_corrs['Correlation'] < -0.2]
        for _, row in protective.head(5).iterrows():
            strategies['Protective Factors'].append(
                f"{row['Indicator'][:40]}... (r={row['Correlation']:.3f})"
            )
    
    # Intervention strategies based on findings
    intervention_strategies = [
        "Strengthen mental health services in education systems",
        "Implement unemployment support programs with mental health components",
        "Address income inequality through social protection programs",
        "Promote gender equality in economic opportunities",
        "Invest in environmental sustainability for community well-being",
        "Develop economic safety nets during development transitions",
        "Integrate mental health in primary healthcare systems",
        "Create youth employment and mental health programs"
    ]
    
    strategies['Intervention Strategies'] = intervention_strategies[:4]
    
    # Create visualization
    fig = go.Figure()
    
    categories = list(strategies.keys())
    
    for i, (category, items) in enumerate(strategies.items()):
        for j, item in enumerate(items):
            fig.add_trace(
                go.Scatter(
                    x=[i],
                    y=[j],
                    mode='markers+text',
                    marker=dict(
                        size=25,
                        symbol='square',
                        color=['#e74c3c', '#f39c12', '#27ae60', '#3498db'][i],
                        line=dict(width=2, color='white')
                    ),
                    text=str(j+1),
                    textposition="middle center",
                    textfont=dict(color="white", size=12, weight="bold"),
                    name=category,
                    showlegend=False,
                    hovertemplate=f'<b>{category}</b><br>{item}<extra></extra>'
                )
            )
            
            # Add text annotation
            fig.add_annotation(
                x=i,
                y=j - 0.4,
                text=item,
                showarrow=False,
                xanchor='center',
                yanchor='top',
                font=dict(size=10),
                bgcolor="rgba(255,255,255,0.9)",
                bordercolor="black",
                borderwidth=1,
                borderpad=4
            )
    
    fig.update_layout(
        xaxis=dict(
            tickvals=list(range(len(categories))),
            ticktext=categories,
            range=[-0.5, len(categories)-0.5],
            showgrid=False
        ),
        yaxis=dict(
            range=[-1, max(len(items) for items in strategies.values())],
            showticklabels=False,
            showgrid=False
        ),
        title="Risk Prevention Strategies Based on Factor Analysis",
        height=600,
        width=1200,
        showlegend=False,
        plot_bgcolor='lightgray'
    )
    
    return fig

def create_interactive_risk_dashboard(df):
    """Create an interactive dashboard for exploring risk factors"""
    
    # Prepare data for dashboard
    latest_data = df[df['Year'] == df['Year'].max()].copy()
    
    # Select key indicators for the dashboard
    key_indicators = [
        'Suicide_rate', 'HDI', 'GDP_per_capita',
        'Unemployment, total (% of total labor force)',
        'Gini index', 'Life expectancy at birth, total (years)',
        'School enrollment, secondary (% gross)'
    ]
    
    available_indicators = [ind for ind in key_indicators if ind in latest_data.columns]
    
    # Create correlation matrix for dashboard
    corr_matrix = latest_data[available_indicators].corr()
    
    fig = px.imshow(
        corr_matrix,
        text_auto=True,
        aspect="auto",
        color_continuous_scale="RdBu_r",
        title="Interactive Risk Factor Dashboard - Correlation Matrix",
        width=1000,
        height=600
    )
    
    return fig

def main():
    """Main function to generate all risk factor visualizations"""
    print("🚀 Starting comprehensive risk factor analysis...")
    
    # Load data
    main_df = load_main_data()
    if main_df is None:
        return
    
    wdi_df = load_wdi_data()
    merged_df = merge_datasets(main_df, wdi_df)
    
    print(f"📊 Total indicators available: {len([col for col in merged_df.columns if col not in main_df.columns])}")
    
    # Generate visualizations
    print("\n1. Creating Comprehensive Correlation Analysis...")
    fig_correlation, correlation_results = create_comprehensive_correlation_analysis(merged_df)
    fig_correlation.write_image("comprehensive_correlation.png", scale=2)
    fig_correlation.write_html("comprehensive_correlation.html")
    
    print("2. Creating Social Factors Analysis...")
    fig_social = create_social_factors_analysis(merged_df)
    fig_social.write_image("social_factors_analysis.png", scale=2)
    fig_social.write_html("social_factors_analysis.html")
    
    print("3. Creating Economic & Employment Analysis...")
    fig_economic = create_economic_employment_analysis(merged_df)
    fig_economic.write_image("economic_employment_analysis.png", scale=2)
    fig_economic.write_html("economic_employment_analysis.html")
    
    print("4. Creating Risk Prevention Strategies...")
    fig_strategies = create_risk_prevention_strategies(merged_df, correlation_results)
    fig_strategies.write_image("risk_prevention_strategies.png", scale=2)
    fig_strategies.write_html("risk_prevention_strategies.html")
    
    print("5. Creating Interactive Dashboard...")
    fig_dashboard = create_interactive_risk_dashboard(merged_df)
    fig_dashboard.write_image("interactive_dashboard.png", scale=2)
    fig_dashboard.write_html("interactive_dashboard.html")
    
    # Create detailed analysis report
    with open("comprehensive_risk_analysis_report.txt", "w", encoding='utf-8') as f:
        f.write("COMPREHENSIVE RISK FACTOR ANALYSIS REPORT\n")
        f.write("=" * 70 + "\n\n")
        
        f.write("EXECUTIVE SUMMARY:\n")
        f.write("This analysis identifies key risk factors affecting suicide rates, HDI, and GDP\n")
        f.write("across social, economic, employment, and environmental dimensions.\n\n")
        
        f.write("KEY FINDINGS BY CATEGORY:\n\n")
        
        f.write("SOCIAL FACTORS:\n")
        f.write("- Education: Higher enrollment correlates with lower suicide rates\n")
        f.write("- Health: Better health outcomes strongly linked to higher HDI\n")
        f.write("- Gender: Gender inequality impacts mental health outcomes\n")
        f.write("- Environment: Environmental factors show complex relationships\n\n")
        
        f.write("ECONOMIC & EMPLOYMENT FACTORS:\n")
        f.write("- Unemployment: Strong positive correlation with suicide rates\n")
        f.write("- Economic Structure: Service-based economies correlate with higher HDI\n")
        f.write("- Inequality: Higher Gini index associated with increased suicide risk\n")
        f.write("- Stability: Economic instability impacts mental health\n\n")
        
        f.write("TOP RISK FACTORS FOR SUICIDE RATES:\n")
        if 'Suicide_rate' in correlation_results:
            for i, (_, row) in enumerate(correlation_results['Suicide_rate'].head(10).iterrows(), 1):
                f.write(f"{i}. {row['Indicator']}: {row['Correlation']:.3f} (p={row['P_Value']:.4f})\n")
        f.write("\n")
        
        f.write("PREVENTION RECOMMENDATIONS:\n")
        f.write("1. Integrate mental health services in education and employment programs\n")
        f.write("2. Address economic inequality through targeted social policies\n")
        f.write("3. Implement gender-sensitive mental health interventions\n")
        f.write("4. Strengthen social safety nets during economic transitions\n")
        f.write("5. Promote environmental sustainability for community well-being\n")
    
    print("\n📁 Files created:")
    print("   - comprehensive_correlation.png/.html")
    print("   - social_factors_analysis.png/.html")
    print("   - economic_employment_analysis.png/.html")
    print("   - risk_prevention_strategies.png/.html")
    print("   - interactive_dashboard.png/.html")
    print("   - comprehensive_risk_analysis_report.txt")
    
    print(f"\n🎉 Comprehensive risk factor analysis completed!")
    print(f"📈 Categories analyzed: Social, Economic, Employment, Environmental")
    print(f"🔍 Risk factors identified: {sum(len(res) for res in correlation_results.values())}")

if __name__ == "__main__":
    main()

🚀 Starting comprehensive risk factor analysis...
✅ Main data loaded: 985 rows, 205 countries
✅ WDI data loaded: 403256 indicators
📊 WDI indicators by category:
   - Economic: 1515 indicators
   - Health: 1515 indicators
   - Infrastructure: 1515 indicators
   - Other: 1515 indicators
   - Social: 1515 indicators
   - Environment: 1515 indicators
   - Education: 1515 indicators
🔗 Datasets merged: (5962, 1539)
📊 Total indicators available: 1516

1. Creating Comprehensive Correlation Analysis...
2. Creating Social Factors Analysis...
3. Creating Economic & Employment Analysis...
4. Creating Risk Prevention Strategies...
5. Creating Interactive Dashboard...

📁 Files created:
   - comprehensive_correlation.png/.html
   - social_factors_analysis.png/.html
   - economic_employment_analysis.png/.html
   - risk_prevention_strategies.png/.html
   - interactive_dashboard.png/.html
   - comprehensive_risk_analysis_report.txt

🎉 Comprehensive risk factor analysis completed!
📈 Categories analyzed: S