In [4]:
# export_visualizations.py
# Comprehensive visualization export for "The Price of Progress" presentation
# UPDATED VERSION - Fixed color conversion error and optimized for your dataset

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
import statsmodels.api as sm
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Load the final clean dataset
def load_data():
    """Load the final clean dataset"""
    try:
        df = pd.read_csv("../Final/final_clean_dataset.csv")
        print(f"✅ Data loaded successfully: {len(df)} rows, {df['Country Name'].nunique()} countries")
        return df
    except Exception as e:
        print(f"❌ Error loading data: {e}")
        return None

# Helper function for color conversion
def hex_to_rgba(hex_color, alpha=0.1):
    """Convert hex color to rgba format"""
    hex_color = hex_color.lstrip('#')
    if len(hex_color) == 6:
        r = int(hex_color[0:2], 16)
        g = int(hex_color[2:4], 16)
        b = int(hex_color[4:6], 16)
        return f'rgba({r},{g},{b},{alpha})'
    return 'rgba(0,0,0,0.1)'

# Visualization 1: Enhanced Scatter Plot with Threshold Lines
def create_enhanced_scatter_plot(df):
    """Create scatter plot with threshold lines and continent coloring"""
    
    # Use HDI column (from your dataset)
    hdi_col = 'HDI'
    
    # Filter data for latest year
    if 'Year' in df.columns:
        latest_year = df['Year'].max()
        plot_data = df[df['Year'] == latest_year].copy()
        year_title = f" ({latest_year})"
    else:
        plot_data = df.copy()
        year_title = ""
    
    print(f"📊 Creating scatter plot with {len(plot_data)} countries for year {latest_year}")
    
    # Create the scatter plot
    fig = px.scatter(
        plot_data,
        x=hdi_col,
        y='Suicide_rate',
        color='continent',
        size='GDP_per_capita',
        hover_name='Country Name',
        hover_data={
            'GDP_per_capita': ':,',
            'Suicide_rate': ':.2f',
            hdi_col: ':.3f',
            'income_group_auto': True,
            'continent': True
        },
        title=f"HDI vs Suicide Rate by Continent{year_title}",
        labels={
            hdi_col: 'Human Development Index (HDI)',
            'Suicide_rate': 'Suicide Rate (per 100,000)',
            'continent': 'Continent',
            'GDP_per_capita': 'GDP per Capita'
        },
        size_max=20,
        opacity=0.7
    )
    
    # Add threshold lines and annotations
    thresholds = [0.55, 0.70, 0.80]  # HDI development thresholds
    
    # Calculate positioning for annotations
    max_suicide = plot_data['Suicide_rate'].max()
    
    for i, threshold in enumerate(thresholds):
        # Add vertical threshold line
        fig.add_vline(
            x=threshold, 
            line_dash="dash", 
            line_color="red",
            line_width=2,
            opacity=0.7
        )
        
        # Add threshold annotations
        fig.add_annotation(
            x=threshold,
            y=max_suicide * (0.9 - (i * 0.1)),
            text=f"HDI {threshold}",
            showarrow=True,
            arrowhead=2,
            arrowsize=1,
            arrowwidth=2,
            arrowcolor="red",
            bgcolor="white",
            bordercolor="red",
            borderwidth=1,
            font=dict(size=10)
        )
    
    # Add development stage regions with improved labeling
    development_regions = [
        (0.3, 0.55, "red", "Low Development"),
        (0.55, 0.70, "orange", "Medium Development"),
        (0.70, 0.80, "yellow", "High Development"),
        (0.80, 1.0, "green", "Very High Development")
    ]
    
    for x0, x1, color, label in development_regions:
        fig.add_vrect(
            x0=x0, x1=x1,
            fillcolor=color, opacity=0.1,
            layer="below", line_width=0,
            annotation_text=label, 
            annotation_position="top left",
            annotation_font_size=10
        )
    
    # Update layout for better presentation
    fig.update_layout(
        width=1200,
        height=700,
        font=dict(size=12),
        title_font_size=20,
        showlegend=True,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        )
    )
    
    return fig

# Visualization 2: World Map with Focus Countries
def create_world_map_focus_countries(df):
    """Create world map highlighting two standout countries"""
    
    # Use HDI column
    hdi_col = 'HDI'
    
    # Filter for latest year
    if 'Year' in df.columns:
        latest_year = df['Year'].max()
        map_data = df[df['Year'] == latest_year].copy()
    else:
        map_data = df.copy()
    
    print(f"🌍 Creating world map with {len(map_data)} countries")
    
    # Choose two standout countries based on interesting patterns
    
    # Find countries with interesting patterns
    high_hdi_high_suicide = map_data[
        (map_data[hdi_col] > 0.85) & 
        (map_data['Suicide_rate'] > map_data['Suicide_rate'].quantile(0.75))
    ].nlargest(5, 'Suicide_rate')
    
    high_hdi_low_suicide = map_data[
        (map_data[hdi_col] > 0.85) & 
        (map_data['Suicide_rate'] < map_data['Suicide_rate'].quantile(0.25))
    ].nsmallest(5, 'Suicide_rate')
    
    # Select specific countries for focus
    focus_countries = []
    country_explanations = {}
    
    if len(high_hdi_high_suicide) > 0:
        focus_country1 = high_hdi_high_suicide.iloc[0]['Country Name']
        focus_countries.append(focus_country1)
        country_explanations[focus_country1] = (
            f"<b>{focus_country1}</b><br>"
            f"High HDI ({high_hdi_high_suicide.iloc[0][hdi_col]:.3f}) but "
            f"high suicide rate ({high_hdi_high_suicide.iloc[0]['Suicide_rate']:.1f})<br>"
            "Represents the 'development paradox'"
        )
    
    if len(high_hdi_low_suicide) > 0:
        focus_country2 = high_hdi_low_suicide.iloc[0]['Country Name']
        focus_countries.append(focus_country2)
        country_explanations[focus_country2] = (
            f"<b>{focus_country2}</b><br>"
            f"High HDI ({high_hdi_low_suicide.iloc[0][hdi_col]:.3f}) with "
            f"low suicide rate ({high_hdi_low_suicide.iloc[0]['Suicide_rate']:.1f})<br>"
            "Shows successful mental health integration"
        )
    
    # If we don't have enough high-HDI countries, choose other interesting ones
    if len(focus_countries) < 2:
        # Find countries with medium HDI but high suicide rates
        medium_hdi_high_suicide = map_data[
            (map_data[hdi_col].between(0.6, 0.8)) & 
            (map_data['Suicide_rate'] > map_data['Suicide_rate'].quantile(0.8))
        ].nlargest(3, 'Suicide_rate')
        
        for _, country in medium_hdi_high_suicide.iterrows():
            if country['Country Name'] not in focus_countries and len(focus_countries) < 2:
                focus_countries.append(country['Country Name'])
                country_explanations[country['Country Name']] = (
                    f"<b>{country['Country Name']}</b><br>"
                    f"Medium HDI ({country[hdi_col]:.3f}) with "
                    f"high suicide rate ({country['Suicide_rate']:.1f})<br>"
                    "Shows development transition challenges"
                )
    
    print(f"🎯 Focus countries selected: {focus_countries}")
    
    # Create the world map
    fig = px.choropleth(
        map_data,
        locations="ISO3",
        color="Suicide_rate",
        hover_name="Country Name",
        hover_data={
            hdi_col: ':.3f',
            'Suicide_rate': ':.1f',
            'GDP_per_capita': ':,.0f',
            'income_group_auto': True
        },
        color_continuous_scale="Reds",
        title="Global Suicide Rate Distribution with Focus Countries",
        projection="natural earth"
    )
    
    # Highlight focus countries
    for country in focus_countries:
        country_data = map_data[map_data['Country Name'] == country]
        if len(country_data) > 0:
            fig.add_trace(
                go.Choropleth(
                    locations=country_data['ISO3'],
                    z=[10] * len(country_data),  # Highlight value
                    colorscale=[[0, 'yellow'], [1, 'yellow']],
                    showscale=False,
                    hoverinfo='skip',
                    marker_line_color='black',
                    marker_line_width=3
                )
            )
    
    # Add annotations for focus countries
    annotations = []
    for i, country in enumerate(focus_countries):
        annotations.append(
            dict(
                x=0.02 + (i * 0.48),
                y=0.98,
                xref="paper",
                yref="paper",
                text=country_explanations[country],
                showarrow=False,
                bgcolor="white",
                bordercolor="black",
                borderwidth=1,
                borderpad=4,
                font=dict(size=11, color="black")
            )
        )
    
    fig.update_layout(
        width=1200,
        height=700,
        annotations=annotations,
        coloraxis_colorbar=dict(
            title="Suicide Rate<br>(per 100,000)",
            thickness=20,
            len=0.75
        )
    )
    
    return fig, focus_countries

# Visualization 3: Future Prediction Model
def create_future_prediction_model(df):
    """Create visualization of pessimistic future predictions"""
    
    # Prepare data for modeling
    hdi_col = 'HDI'
    
    # Ensure we have required columns
    required_cols = [hdi_col, 'Suicide_rate', 'GDP_per_capita', 'Year', 'Country Name']
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        print(f"❌ Missing columns for modeling: {missing_cols}")
        return None
    
    model_data = df[required_cols].dropna().copy()
    
    if len(model_data) < 50:
        print("⚠️ Insufficient data for reliable future predictions")
        return None
    
    print(f"🤖 Training model with {len(model_data)} data points")
    
    # Feature engineering for time series prediction
    model_data['HDI_sq'] = model_data[hdi_col] ** 2
    model_data['log_GDP'] = np.log(model_data['GDP_per_capita'])
    
    # Create lag features for time series
    model_data = model_data.sort_values(['Country Name', 'Year'])
    model_data['Suicide_lag1'] = model_data.groupby('Country Name')['Suicide_rate'].shift(1)
    model_data['HDI_lag1'] = model_data.groupby('Country Name')[hdi_col].shift(1)
    
    model_data = model_data.dropna()
    
    if len(model_data) < 30:
        print("⚠️ Insufficient data after creating lag features")
        return None
    
    # Features for prediction
    feature_cols = [hdi_col, 'HDI_sq', 'log_GDP', 'Suicide_lag1', 'HDI_lag1']
    
    X = model_data[feature_cols]
    y = model_data['Suicide_rate']
    
    # Train Random Forest model
    model = RandomForestRegressor(
        n_estimators=100,
        max_depth=10,
        random_state=42,
        min_samples_split=10
    )
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model.fit(X_train, y_train)
    
    # Generate future predictions (pessimistic scenario)
    latest_year = model_data['Year'].max()
    future_years = list(range(latest_year + 1, latest_year + 51))  # 50 years into future
    
    # Select representative countries for prediction
    sample_countries = model_data['Country Name'].value_counts().head(6).index.tolist()
    
    predictions = {}
    
    for country in sample_countries:
        country_data = model_data[model_data['Country Name'] == country].sort_values('Year')
        if len(country_data) < 2:
            continue
            
        latest = country_data.iloc[-1]
        
        # Pessimistic scenario assumptions:
        # - HDI growth slows down or stagnates
        # - Economic pressures increase
        # - Mental health services don't keep pace
        
        current_hdi = latest[hdi_col]
        current_suicide = latest['Suicide_rate']
        current_gdp = latest['GDP_per_capita']
        
        country_predictions = []
        
        for year in future_years:
            # Pessimistic assumptions:
            # HDI grows very slowly or plateaus
            hdi_growth = 0.002 if current_hdi < 0.9 else 0.001
            projected_hdi = min(0.95, current_hdi * (1 + hdi_growth))
            
            # Economic stress increases suicide risk
            economic_stress_factor = 1 + (year - latest_year) * 0.005
            
            # Prepare features for prediction
            pred_features = pd.DataFrame({
                hdi_col: [projected_hdi],
                'HDI_sq': [projected_hdi ** 2],
                'log_GDP': [np.log(current_gdp)],
                'Suicide_lag1': [current_suicide],
                'HDI_lag1': [current_hdi]
            })
            
            # Get prediction
            predicted_suicide = model.predict(pred_features)[0]
            
            # Apply pessimistic adjustments
            predicted_suicide = predicted_suicide * economic_stress_factor
            
            country_predictions.append({
                'Year': year,
                'Country': country,
                'Predicted_Suicide_Rate': predicted_suicide,
                'Projected_HDI': projected_hdi,
                'Scenario': 'Pessimistic'
            })
            
            current_hdi = projected_hdi
            current_suicide = predicted_suicide
        
        predictions[country] = pd.DataFrame(country_predictions)
    
    # Create visualization
    fig = go.Figure()
    
    colors = px.colors.qualitative.Set3
    
    for i, (country, pred_df) in enumerate(predictions.items()):
        if len(pred_df) == 0:
            continue
            
        # Add historical data
        historical_data = model_data[model_data['Country Name'] == country]
        fig.add_trace(go.Scatter(
            x=historical_data['Year'],
            y=historical_data['Suicide_rate'],
            mode='lines+markers',
            name=f'{country} (Historical)',
            line=dict(color=colors[i % len(colors)], width=2, dash='dash'),
            marker=dict(size=4)
        ))
        
        # Add predictions (FIXED: removed fillcolor causing the error)
        fig.add_trace(go.Scatter(
            x=pred_df['Year'],
            y=pred_df['Predicted_Suicide_Rate'],
            mode='lines',
            name=f'{country} (Predicted)',
            line=dict(color=colors[i % len(colors)], width=3)
        ))
    
    # Add vertical line separating historical and future
    fig.add_vline(
        x=latest_year + 0.5,
        line_dash="dash",
        line_color="red",
        line_width=2,
        annotation_text="Prediction Start"
    )
    
    fig.update_layout(
        title="Pessimistic Future Projection: Suicide Rates (50-Year Forecast)<br><sub>Random Forest Model - Assumes Stagnant Development & Increasing Economic Stress</sub>",
        xaxis_title="Year",
        yaxis_title="Suicide Rate (per 100,000)",
        width=1200,
        height=700,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        hovermode='x unified'
    )
    
    return fig, model

# Visualization 4: Risk Factor Analysis
def create_risk_factor_analysis(df):
    """Analyze and visualize key risk factors for suicide"""
    
    # Use HDI column
    hdi_col = 'HDI'
    
    print("🔍 Analyzing risk factors...")
    
    # Calculate correlations with suicide rate
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    suicide_correlations = []
    
    # Focus on meaningful numeric columns (exclude year, flags, etc.)
    exclude_cols = ['Year', 'Suicide_rate_lag1', 'HDI_lag1']
    
    for col in numeric_cols:
        if (col != 'Suicide_rate' and 
            col not in exclude_cols and 
            df[col].notna().sum() > 50):
            
            valid_data = df[['Suicide_rate', col]].dropna()
            if len(valid_data) > 10:  # Minimum samples for correlation
                corr = valid_data.corr().iloc[0,1]
                if not pd.isna(corr):
                    suicide_correlations.append({
                        'Factor': col,
                        'Correlation': corr,
                        'Abs_Correlation': abs(corr)
                    })
    
    if not suicide_correlations:
        print("⚠️ No meaningful correlations found")
        return None, None, []
    
    corr_df = pd.DataFrame(suicide_correlations).sort_values('Abs_Correlation', ascending=False)
    
    print(f"📈 Found {len(corr_df)} factors with correlation data")
    
    # Create correlation visualization
    fig_corr = px.bar(
        corr_df.head(10),
        x='Correlation',
        y='Factor',
        orientation='h',
        title='Top 10 Factors Correlated with Suicide Rates',
        color='Correlation',
        color_continuous_scale='RdBu_r',
        color_continuous_midpoint=0
    )
    
    fig_corr.update_layout(
        width=1000,
        height=600,
        xaxis_title="Correlation Coefficient",
        yaxis_title="Factor"
    )
    
    # Create detailed analysis for top 3 factors
    top_factors = corr_df.head(3)['Factor'].tolist()
    
    # Create subplots for top factors
    fig_factors = make_subplots(
        rows=2, cols=2,
        subplot_titles=[f"Suicide Rate vs {factor}" for factor in top_factors] + ["Risk Factor Summary"],
        specs=[[{"secondary_y": False}, {"secondary_y": False}],
               [{"secondary_y": False}, {"type": "table"}]]
    )
    
    # Add scatter plots for top factors
    for i, factor in enumerate(top_factors[:3]):
        row = (i // 2) + 1
        col = (i % 2) + 1
        
        valid_data = df[[factor, 'Suicide_rate']].dropna()
        
        fig_factors.add_trace(
            go.Scatter(
                x=valid_data[factor],
                y=valid_data['Suicide_rate'],
                mode='markers',
                name=factor,
                marker=dict(
                    size=8,
                    opacity=0.6,
                    color=valid_data[factor],
                    colorscale='Viridis',
                    showscale=True
                )
            ),
            row=row, col=col
        )
        
        # Add trendline
        if len(valid_data) > 2:
            z = np.polyfit(valid_data[factor], valid_data['Suicide_rate'], 1)
            p = np.poly1d(z)
            fig_factors.add_trace(
                go.Scatter(
                    x=valid_data[factor],
                    y=p(valid_data[factor]),
                    mode='lines',
                    name=f'Trend ({factor})',
                    line=dict(color='red', width=2)
                ),
                row=row, col=col
            )
    
    # Add summary table
    summary_data = corr_df.head(5).copy()
    summary_data['Correlation'] = summary_data['Correlation'].round(3)
    summary_data['Strength'] = summary_data['Abs_Correlation'].apply(
        lambda x: 'Strong' if x > 0.5 else 'Moderate' if x > 0.3 else 'Weak'
    )
    summary_data['Direction'] = summary_data['Correlation'].apply(
        lambda x: 'Positive' if x > 0 else 'Negative'
    )
    
    fig_factors.add_trace(
        go.Table(
            header=dict(
                values=['Factor', 'Correlation', 'Strength', 'Direction'],
                fill_color='paleturquoise',
                align='left'
            ),
            cells=dict(
                values=[
                    summary_data['Factor'],
                    summary_data['Correlation'],
                    summary_data['Strength'],
                    summary_data['Direction']
                ],
                fill_color='lavender',
                align='left'
            )
        ),
        row=2, col=2
    )
    
    fig_factors.update_layout(
        height=800,
        width=1200,
        title_text="Comprehensive Risk Factor Analysis for Suicide Rates",
        showlegend=False
    )
    
    return fig_corr, fig_factors, top_factors

# Main execution function
def main():
    """Main function to generate all visualizations"""
    
    print("🚀 Loading data and generating visualizations...")
    
    # Load data
    df = load_data()
    if df is None:
        print("❌ Failed to load data. Please check the file path.")
        return
    
    # Ensure required columns exist
    required_columns = ['Suicide_rate', 'Country Name', 'ISO3', 'HDI', 'continent', 'GDP_per_capita', 'Year']
    
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        print(f"❌ Missing required columns: {missing_columns}")
        return
    
    print("📊 Generating Visualization 1: Enhanced Scatter Plot...")
    try:
        fig_scatter = create_enhanced_scatter_plot(df)
        fig_scatter.write_image("visualization1_scatter_thresholds.png", scale=2)
        fig_scatter.write_html("visualization1_scatter_thresholds.html")
        print("✅ Scatter plot saved!")
    except Exception as e:
        print(f"❌ Error creating scatter plot: {e}")
    
    print("🌍 Generating Visualization 2: World Map with Focus Countries...")
    try:
        fig_map, focus_countries = create_world_map_focus_countries(df)
        fig_map.write_image("visualization2_world_map_focus.png", scale=2)
        fig_map.write_html("visualization2_world_map_focus.html")
        print(f"✅ World map saved! Focus countries: {focus_countries}")
    except Exception as e:
        print(f"❌ Error creating world map: {e}")
        focus_countries = []
    
    print("🔮 Generating Visualization 3: Future Prediction Model...")
    try:
        result = create_future_prediction_model(df)
        if result:
            fig_prediction, model = result
            fig_prediction.write_image("visualization3_future_predictions.png", scale=2)
            fig_prediction.write_html("visualization3_future_predictions.html")
            print("✅ Future predictions saved!")
            
            # Print model info
            if hasattr(model, 'feature_names_in_'):
                feature_importance = pd.DataFrame({
                    'feature': model.feature_names_in_,
                    'importance': model.feature_importances_
                }).sort_values('importance', ascending=False)
                
                print("\n📈 Model Feature Importance:")
                print(feature_importance.to_string(index=False))
        else:
            print("⚠️ Could not generate future predictions")
    except Exception as e:
        print(f"❌ Error creating future predictions: {e}")
    
    print("🎯 Generating Visualization 4: Risk Factor Analysis...")
    try:
        result = create_risk_factor_analysis(df)
        if result:
            fig_corr, fig_factors, top_factors = result
            fig_corr.write_image("visualization4_risk_factors_correlation.png", scale=2)
            fig_corr.write_html("visualization4_risk_factors_correlation.html")
            fig_factors.write_image("visualization4_risk_factors_detailed.png", scale=2)
            fig_factors.write_html("visualization4_risk_factors_detailed.html")
            print(f"✅ Risk factor analysis saved! Top factors: {top_factors}")
        else:
            print("⚠️ Could not generate risk factor analysis")
            top_factors = []
    except Exception as e:
        print(f"❌ Error creating risk factor analysis: {e}")
        top_factors = []
    
    # Create a summary report
    create_summary_report(df, focus_countries, top_factors)
    
    print("\n🎉 All visualizations generated successfully!")
    print("\n📁 Files created:")
    print("   - visualization1_scatter_thresholds.png/.html")
    print("   - visualization2_world_map_focus.png/.html")
    print("   - visualization3_future_predictions.png/.html")
    print("   - visualization4_risk_factors_correlation.png/.html")
    print("   - visualization4_risk_factors_detailed.png/.html")
    print("   - analysis_summary.txt")

def create_summary_report(df, focus_countries, top_factors):
    """Create a text summary of the analysis"""
    
    hdi_col = 'HDI'
    latest_year = df['Year'].max() if 'Year' in df.columns else 'N/A'
    
    with open("analysis_summary.txt", "w") as f:
        f.write("THE PRICE OF PROGRESS - ANALYSIS SUMMARY\n")
        f.write("=" * 50 + "\n\n")
        
        f.write("DATASET OVERVIEW:\n")
        f.write(f"- Total countries: {df['Country Name'].nunique()}\n")
        f.write(f"- Total observations: {len(df)}\n")
        f.write(f"- Time period: {df['Year'].min()} - {latest_year}\n")
        f.write(f"- Average HDI: {df[hdi_col].mean():.3f}\n")
        f.write(f"- Average suicide rate: {df['Suicide_rate'].mean():.2f} per 100,000\n")
        f.write(f"- Data quality flags: {df['Low_data_quality_flag'].value_counts().to_dict()}\n\n")
        
        f.write("KEY FINDINGS:\n")
        f.write("1. Development-Mental Health Relationship:\n")
        f.write("   - Non-linear relationship between HDI and suicide rates\n")
        f.write("   - Threshold effects observed at HDI levels 0.55, 0.70, 0.80\n")
        f.write("   - 'Development paradox' in high-HDI countries\n\n")
        
        f.write("2. Focus Countries Analysis:\n")
        for country in focus_countries:
            country_data = df[df['Country Name'] == country]
            if len(country_data) > 0:
                latest = country_data[country_data['Year'] == latest_year].iloc[0] if 'Year' in df.columns else country_data.iloc[-1]
                f.write(f"   - {country}: HDI={latest[hdi_col]:.3f}, ")
                f.write(f"Suicide Rate={latest['Suicide_rate']:.1f}, ")
                f.write(f"Continent={latest.get('continent', 'N/A')}, ")
                f.write(f"Income Group={latest.get('income_group_auto', 'N/A')}\n")
        f.write("\n")
        
        f.write("3. Future Projections (Pessimistic Scenario):\n")
        f.write("   - Random Forest model trained on historical data\n")
        f.write("   - 50-year projection assuming stagnant development\n")
        f.write("   - Economic stress factors increasing suicide risk\n")
        f.write("   - Highlights need for proactive mental health policies\n\n")
        
        f.write("4. Key Risk Factors Identified:\n")
        for i, factor in enumerate(top_factors, 1):
            factor_corr = df[['Suicide_rate', factor]].corr().iloc[0,1] if factor in df.columns else 0
            f.write(f"   {i}. {factor} (correlation: {factor_corr:.3f})\n")
        f.write("\n")
        
        f.write("POLICY RECOMMENDATIONS:\n")
        f.write("- Integrate mental health monitoring in development programs\n")
        f.write("- Target interventions during development transitions (HDI 0.55-0.80)\n")
        f.write("- Address economic stressors in mental health policies\n")
        f.write("- Learn from countries successfully managing development-mental health balance\n")
        f.write("- Invest in mental health infrastructure during rapid development phases\n")

if __name__ == "__main__":
    main()

🚀 Loading data and generating visualizations...
✅ Data loaded successfully: 985 rows, 205 countries
📊 Generating Visualization 1: Enhanced Scatter Plot...
📊 Creating scatter plot with 192 countries for year 2023
✅ Scatter plot saved!
🌍 Generating Visualization 2: World Map with Focus Countries...
🌍 Creating world map with 192 countries
🎯 Focus countries selected: ['Lithuania', 'Oman']
✅ World map saved! Focus countries: ['Lithuania', 'Oman']
🔮 Generating Visualization 3: Future Prediction Model...
🤖 Training model with 985 data points
✅ Future predictions saved!

📈 Model Feature Importance:
     feature  importance
Suicide_lag1    0.990818
     log_GDP    0.004516
         HDI    0.002257
      HDI_sq    0.001428
    HDI_lag1    0.000982
🎯 Generating Visualization 4: Risk Factor Analysis...
🔍 Analyzing risk factors...
📈 Found 7 factors with correlation data
✅ Risk factor analysis saved! Top factors: ['Suicide_per_HDI', 'log_GDP_per_capita', 'HDI_growth']


UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 24: character maps to <undefined>