In [1]:
# comprehensive_analysis_visualizations.py
# Comprehensive analysis with time series, box plots, country profiles, and prevention strategies

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
from scipy.stats import pearsonr
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

def load_main_data():
    """Load the main dataset"""
    try:
        df = pd.read_csv("../../Final/final_clean_dataset.csv")
        print(f"Main data loaded: {len(df)} rows, {df['Country Name'].nunique()} countries")
        return df
    except Exception as e:
        print(f"Error loading main data: {e}")
        return None

def create_time_series_quadratic_analysis(df):
    """Create time series scatterplot with quadratic fit for 2019-2023"""
    
    # Filter data for 2019-2023
    df_time = df[df['Year'].between(2019, 2023)].copy()
    
    # Create the visualization
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            'HDI vs Suicide Rate: Annual Trends (2019-2023)',
            'Quadratic Fit Evolution Over Time',
            'HDI Growth Patterns by Continent',
            'Suicide Rate Trends by Development Level'
        ],
        specs=[[{"secondary_y": False}, {"secondary_y": False}],
               [{"secondary_y": False}, {"secondary_y": False}]],
        vertical_spacing=0.12,
        horizontal_spacing=0.1
    )
    
    # 1. Main scatterplot with quadratic fit for each year
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']  # Professional color palette
    years = sorted(df_time['Year'].unique())
    
    for i, year in enumerate(years):
        year_data = df_time[df_time['Year'] == year].dropna(subset=['HDI', 'Suicide_rate'])
        
        if len(year_data) > 10:  # Only plot years with sufficient data
            # Add scatter points
            fig.add_trace(
                go.Scatter(
                    x=year_data['HDI'],
                    y=year_data['Suicide_rate'],
                    mode='markers',
                    name=f'{year}',
                    marker=dict(
                        size=10,
                        color=colors[i % len(colors)],
                        opacity=0.7,
                        line=dict(width=1, color='white')
                    ),
                    text=year_data['Country Name'] + '<br>Continent: ' + year_data['continent'].fillna('Unknown'),
                    hovertemplate='<b>%{text}</b><br>Year: ' + str(year) + '<br>HDI: %{x:.3f}<br>Suicide Rate: %{y:.2f}<extra></extra>',
                    showlegend=True
                ),
                row=1, col=1
            )
            
            # Fit quadratic model for this year
            try:
                year_data['HDI_sq'] = year_data['HDI'] ** 2
                model = smf.ols('Suicide_rate ~ HDI + HDI_sq', data=year_data).fit()
                
                # Generate prediction line
                hdi_range = np.linspace(year_data['HDI'].min(), year_data['HDI'].max(), 100)
                hdi_sq_range = hdi_range ** 2
                predictions = model.predict(pd.DataFrame({'HDI': hdi_range, 'HDI_sq': hdi_sq_range}))
                
                # Add quadratic fit line
                fig.add_trace(
                    go.Scatter(
                        x=hdi_range,
                        y=predictions,
                        mode='lines',
                        name=f'Quadratic Fit {year}',
                        line=dict(color=colors[i % len(colors)], width=3, dash='solid'),
                        showlegend=False,
                        hoverinfo='skip'
                    ),
                    row=1, col=1
                )
                
                # Add model R² as annotation
                r_squared = model.rsquared
                fig.add_annotation(
                    x=0.02,
                    y=0.98 - (i * 0.05),
                    xref="paper",
                    yref="paper",
                    text=f"{year} R² = {r_squared:.3f}",
                    showarrow=False,
                    bgcolor=colors[i % len(colors)],
                    bordercolor="black",
                    borderwidth=1,
                    borderpad=2,
                    font=dict(color="white", size=10),
                    row=1, col=1
                )
                
            except Exception as e:
                print(f"Could not fit quadratic model for {year}: {e}")
                continue
    
    fig.update_xaxes(title_text="Human Development Index (HDI)", row=1, col=1, range=[0.3, 1.0])
    fig.update_yaxes(title_text="Suicide Rate (per 100,000)", row=1, col=1)
    
    # 2. Quadratic fit evolution
    fit_evolution_data = []
    for year in years:
        year_data = df_time[df_time['Year'] == year].dropna(subset=['HDI', 'Suicide_rate'])
        if len(year_data) > 10:
            try:
                year_data['HDI_sq'] = year_data['HDI'] ** 2
                model = smf.ols('Suicide_rate ~ HDI + HDI_sq', data=year_data).fit()
                
                # Calculate tipping point
                params = model.params
                if 'HDI_sq' in params and params['HDI_sq'] != 0:
                    tipping_point = -params['HDI'] / (2 * params['HDI_sq'])
                    if 0.3 <= tipping_point <= 1.0:
                        fit_evolution_data.append({
                            'Year': year,
                            'Tipping_Point': tipping_point,
                            'R_Squared': model.rsquared,
                            'Countries': len(year_data)
                        })
            except:
                continue
    
    if fit_evolution_data:
        evolution_df = pd.DataFrame(fit_evolution_data)
        fig.add_trace(
            go.Scatter(
                x=evolution_df['Year'],
                y=evolution_df['Tipping_Point'],
                mode='lines+markers+text',
                name='HDI Tipping Point',
                line=dict(color='red', width=4),
                marker=dict(size=12, color='red'),
                text=evolution_df['Tipping_Point'].round(3),
                textposition="top center",
                hovertemplate='Year: %{x}<br>Tipping Point: %{y:.3f}<br>R²: %{customdata:.3f}<extra></extra>',
                customdata=evolution_df['R_Squared']
            ),
            row=1, col=2
        )
    
    fig.update_xaxes(title_text="Year", row=1, col=2)
    fig.update_yaxes(title_text="HDI Tipping Point", row=1, col=2)
    
    # 3. HDI Growth by Continent
    continent_growth = df_time.groupby(['Year', 'continent'])['HDI'].mean().reset_index()
    
    for continent in continent_growth['continent'].unique():
        continent_data = continent_growth[continent_growth['continent'] == continent]
        fig.add_trace(
            go.Scatter(
                x=continent_data['Year'],
                y=continent_data['HDI'],
                mode='lines+markers',
                name=continent,
                line=dict(width=3),
                marker=dict(size=8),
                showlegend=False,
                hovertemplate=f'{continent}<br>Year: %{{x}}<br>Average HDI: %{{y:.3f}}<extra></extra>'
            ),
            row=2, col=1
        )
    
    fig.update_xaxes(title_text="Year", row=2, col=1)
    fig.update_yaxes(title_text="Average HDI", row=2, col=1)
    
    # 4. Suicide Rate Trends by Development Level
    df_time['Development_Level'] = pd.cut(
        df_time['HDI'],
        bins=[0, 0.55, 0.70, 0.80, 1.0],
        labels=['Low Development', 'Medium Development', 'High Development', 'Very High Development']
    )
    
    dev_trends = df_time.groupby(['Year', 'Development_Level'])['Suicide_rate'].mean().reset_index()
    
    for level in dev_trends['Development_Level'].unique():
        level_data = dev_trends[dev_trends['Development_Level'] == level]
        fig.add_trace(
            go.Scatter(
                x=level_data['Year'],
                y=level_data['Suicide_rate'],
                mode='lines+markers',
                name=level,
                line=dict(width=3),
                marker=dict(size=8),
                showlegend=False,
                hovertemplate=f'{level}<br>Year: %{{x}}<br>Suicide Rate: %{{y:.2f}}<extra></extra>'
            ),
            row=2, col=2
        )
    
    fig.update_xaxes(title_text="Year", row=2, col=2)
    fig.update_yaxes(title_text="Average Suicide Rate", row=2, col=2)
    
    # Update layout
    fig.update_layout(
        height=1000,
        width=1400,
        title_text="Time Series Analysis: HDI and Suicide Rate Relationships (2019-2023)",
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
        font=dict(size=12)
    )
    
    return fig

def create_boxplot_analysis(df):
    """Create comprehensive boxplot analysis by income group and continent"""
    
    # Filter data for 2019-2023
    df_time = df[df['Year'].between(2019, 2023)].copy()
    
    # Create subplots
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            'Suicide Rate Distribution by Income Group (2019-2023)',
            'Suicide Rate Distribution by Continent (2019-2023)',
            'Income Group Trends Over Time',
            'Continental Trends Over Time'
        ],
        specs=[[{"type": "box"}, {"type": "box"}],
               [{"type": "scatter"}, {"type": "scatter"}]]
    )
    
    # 1. Income Group Boxplot
    income_order = ['Low', 'Lower-Middle', 'Upper-Middle', 'High']
    
    for income_group in income_order:
        group_data = df_time[df_time['income_group_auto'] == income_group]['Suicide_rate'].dropna()
        if len(group_data) > 0:
            fig.add_trace(
                go.Box(
                    y=group_data,
                    name=income_group,
                    boxpoints='outliers',
                    marker=dict(size=4, color='#1f77b4'),
                    line=dict(width=2),
                    hovertemplate='<b>%{x}</b><br>Suicide Rate: %{y:.2f}<extra></extra>'
                ),
                row=1, col=1
            )
    
    fig.update_xaxes(title_text="Income Group", row=1, col=1)
    fig.update_yaxes(title_text="Suicide Rate (per 100,000)", row=1, col=1)
    
    # 2. Continent Boxplot
    for continent in df_time['continent'].dropna().unique():
        continent_data = df_time[df_time['continent'] == continent]['Suicide_rate'].dropna()
        if len(continent_data) > 0:
            fig.add_trace(
                go.Box(
                    y=continent_data,
                    name=continent,
                    boxpoints='outliers',
                    marker=dict(size=4, color='#ff7f0e'),
                    line=dict(width=2),
                    showlegend=False,
                    hovertemplate='<b>%{x}</b><br>Suicide Rate: %{y:.2f}<extra></extra>'
                ),
                row=1, col=2
            )
    
    fig.update_xaxes(title_text="Continent", row=1, col=2)
    fig.update_yaxes(title_text="Suicide Rate (per 100,000)", row=1, col=2)
    
    # 3. Income Group Trends Over Time
    income_trends = df_time.groupby(['Year', 'income_group_auto'])['Suicide_rate'].mean().reset_index()
    
    for income_group in income_order:
        group_data = income_trends[income_trends['income_group_auto'] == income_group]
        if len(group_data) > 0:
            fig.add_trace(
                go.Scatter(
                    x=group_data['Year'],
                    y=group_data['Suicide_rate'],
                    mode='lines+markers',
                    name=income_group,
                    line=dict(width=3),
                    marker=dict(size=8),
                    hovertemplate=f'{income_group} Income<br>Year: %{{x}}<br>Suicide Rate: %{{y:.2f}}<extra></extra>'
                ),
                row=2, col=1
            )
    
    fig.update_xaxes(title_text="Year", row=2, col=1)
    fig.update_yaxes(title_text="Average Suicide Rate", row=2, col=1)
    
    # 4. Continental Trends Over Time
    continent_trends = df_time.groupby(['Year', 'continent'])['Suicide_rate'].mean().reset_index()
    
    for continent in continent_trends['continent'].unique():
        continent_data = continent_trends[continent_trends['continent'] == continent]
        fig.add_trace(
            go.Scatter(
                x=continent_data['Year'],
                y=continent_data['Suicide_rate'],
                mode='lines+markers',
                name=continent,
                line=dict(width=3),
                marker=dict(size=8),
                showlegend=False,
                hovertemplate=f'{continent}<br>Year: %{{x}}<br>Suicide Rate: %{{y:.2f}}<extra></extra>'
            ),
            row=2, col=2
        )
    
    fig.update_xaxes(title_text="Year", row=2, col=2)
    fig.update_yaxes(title_text="Average Suicide Rate", row=2, col=2)
    
    fig.update_layout(
        height=900,
        width=1400,
        title_text="Comprehensive Distribution Analysis: Suicide Rates by Income Group and Continent (2019-2023)",
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
    )
    
    return fig

def create_country_profiles_with_data_quality(df):
    """Create 4-country development profiles with detailed data quality analysis"""
    
    # Use latest year data
    latest_year = df['Year'].max()
    latest_data = df[df['Year'] == latest_year].copy()
    
    # Define thresholds
    hdi_high_threshold = 0.75
    hdi_low_threshold = 0.55
    suicide_high_threshold = latest_data['Suicide_rate'].quantile(0.75)
    suicide_low_threshold = latest_data['Suicide_rate'].quantile(0.25)
    
    # Categorize countries
    categories = {
        'High HDI, Low Suicide': {
            'condition': (latest_data['HDI'] >= hdi_high_threshold) & (latest_data['Suicide_rate'] <= suicide_low_threshold),
            'color': '#2ca02c',  # Green
            'description': 'Successful Development: High development with good mental health outcomes'
        },
        'High HDI, High Suicide': {
            'condition': (latest_data['HDI'] >= hdi_high_threshold) & (latest_data['Suicide_rate'] >= suicide_high_threshold),
            'color': '#d62728',  # Red
            'description': 'Development Paradox: High development but mental health challenges'
        },
        'Low HDI, High Suicide': {
            'condition': (latest_data['HDI'] <= hdi_low_threshold) & (latest_data['Suicide_rate'] >= suicide_high_threshold),
            'color': '#ff7f0e',  # Orange
            'description': 'Double Burden: Low development with mental health challenges'
        },
        'Low HDI, Low Suicide': {
            'condition': (latest_data['HDI'] <= hdi_low_threshold) & (latest_data['Suicide_rate'] <= suicide_low_threshold),
            'color': '#1f77b4',  # Blue
            'description': 'Resilience: Low development but good mental health outcomes'
        }
    }
    
    # Select one representative country from each category
    selected_countries = {}
    
    for category, config in categories.items():
        category_data = latest_data[config['condition']]
        
        if len(category_data) > 0:
            # Select based on data quality and representativeness
            if 'High HDI' in category:
                # For high HDI groups, prioritize countries with good data quality
                good_quality = category_data[~category_data['Low_data_quality_flag'].str.contains('Low', na=False)]
                if len(good_quality) > 0:
                    selected = good_quality.nlargest(3, 'HDI' if 'Low Suicide' in category else 'Suicide_rate').iloc[0]
                else:
                    selected = category_data.nlargest(3, 'HDI' if 'Low Suicide' in category else 'Suicide_rate').iloc[0]
            else:
                # For low HDI groups
                good_quality = category_data[~category_data['Low_data_quality_flag'].str.contains('Low', na=False)]
                if len(good_quality) > 0:
                    selected = good_quality.nsmallest(3, 'Suicide_rate' if 'Low Suicide' in category else 'HDI').iloc[0]
                else:
                    selected = category_data.nsmallest(3, 'Suicide_rate' if 'Low Suicide' in category else 'HDI').iloc[0]
            
            selected_countries[category] = selected
    
    # Create comprehensive visualization
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=[
            'Development-Mental Health Country Profiles',
            'Data Quality Assessment by Category',
            'Country Indicators Comparison',
            'Data Reliability Analysis'
        ],
        specs=[[{"type": "scatter"}, {"type": "bar"}],
               [{"type": "bar"}, {"type": "box"}]]
    )
    
    # 1. Development-Mental Health Profiles
    all_countries_data = latest_data.dropna(subset=['HDI', 'Suicide_rate'])
    
    # Add all countries as background
    fig.add_trace(
        go.Scatter(
            x=all_countries_data['HDI'],
            y=all_countries_data['Suicide_rate'],
            mode='markers',
            name='All Countries',
            marker=dict(
                size=6,
                color='lightgray',
                opacity=0.4
            ),
            text=all_countries_data['Country Name'],
            hovertemplate='<b>%{text}</b><br>HDI: %{x:.3f}<br>Suicide Rate: %{y:.2f}<extra></extra>',
            showlegend=False
        ),
        row=1, col=1
    )
    
    # Add selected countries
    for category, country_data in selected_countries.items():
        config = categories[category]
        
        # Data quality indicator
        data_quality = "Good" if 'Sufficient' in str(country_data['Low_data_quality_flag']) else "Low"
        quality_color = 'green' if data_quality == "Good" else 'red'
        
        fig.add_trace(
            go.Scatter(
                x=[country_data['HDI']],
                y=[country_data['Suicide_rate']],
                mode='markers+text',
                name=category,
                marker=dict(
                    size=20,
                    color=config['color'],
                    symbol='star',
                    line=dict(color='black', width=2)
                ),
                text=country_data['Country Name'],
                textposition="top center",
                hovertemplate=f'<b>{country_data["Country Name"]}</b><br>Category: {category}<br>HDI: {country_data["HDI"]:.3f}<br>Suicide Rate: {country_data["Suicide_rate"]:.2f}<br>Data Quality: {data_quality}<extra></extra>',
                showlegend=True
            ),
            row=1, col=1
        )
    
    # Add quadrant lines
    fig.add_hline(y=suicide_high_threshold, line_dash="dash", line_color="gray", opacity=0.7, row=1, col=1)
    fig.add_hline(y=suicide_low_threshold, line_dash="dash", line_color="gray", opacity=0.7, row=1, col=1)
    fig.add_vline(x=hdi_high_threshold, line_dash="dash", line_color="gray", opacity=0.7, row=1, col=1)
    fig.add_vline(x=hdi_low_threshold, line_dash="dash", line_color="gray", opacity=0.7, row=1, col=1)
    
    fig.update_xaxes(title_text="Human Development Index (HDI)", row=1, col=1)
    fig.update_yaxes(title_text="Suicide Rate (per 100,000)", row=1, col=1)
    
    # 2. Data Quality by Category
    quality_by_category = {}
    for category, config in categories.items():
        category_data = latest_data[config['condition']]
        if len(category_data) > 0:
            sufficient_count = len(category_data[~category_data['Low_data_quality_flag'].str.contains('Low', na=False)])
            low_count = len(category_data[category_data['Low_data_quality_flag'].str.contains('Low', na=False)])
            quality_by_category[category] = {'Sufficient': sufficient_count, 'Low': low_count}
    
    for quality_type in ['Sufficient', 'Low']:
        values = [quality_by_category.get(cat, {}).get(quality_type, 0) for cat in categories.keys()]
        fig.add_trace(
            go.Bar(
                x=list(categories.keys()),
                y=values,
                name=f'{quality_type} Data Quality',
                text=values,
                textposition='auto',
                hovertemplate='<b>%{x}</b><br>%{fullData.name}: %{y} countries<extra></extra>'
            ),
            row=1, col=2
        )
    
    fig.update_xaxes(title_text="Development-Mental Health Category", row=1, col=2, tickangle=45)
    fig.update_yaxes(title_text="Number of Countries", row=1, col=2)
    
    # 3. Country Indicators Comparison
    comparison_data = []
    for category, country_data in selected_countries.items():
        comparison_data.append({
            'Category': category,
            'Country': country_data['Country Name'],
            'HDI': country_data['HDI'],
            'Suicide_Rate': country_data['Suicide_rate'],
            'GDP_per_Capita': country_data['GDP_per_capita'],
            'Data_Quality': 'Good' if 'Sufficient' in str(country_data['Low_data_quality_flag']) else 'Low'
        })
    
    if comparison_data:
        comp_df = pd.DataFrame(comparison_data)
        
        # Normalize for comparison
        metrics = ['HDI', 'Suicide_Rate', 'GDP_per_Capita']
        for metric in metrics:
            fig.add_trace(
                go.Bar(
                    x=comp_df['Country'],
                    y=comp_df[metric],
                    name=metric.replace('_', ' '),
                    text=comp_df[metric].round(3) if metric != 'GDP_per_Capita' else comp_df[metric].round(0),
                    textposition='auto',
                    hovertemplate='<b>%{x}</b><br>%{fullData.name}: %{y}<extra></extra>'
                ),
                row=2, col=1
            )
    
    fig.update_xaxes(title_text="Country", row=2, col=1, tickangle=45)
    fig.update_yaxes(title_text="Indicator Value", row=2, col=1)
    
    # 4. Data Reliability Analysis
    reliability_analysis = []
    for category, config in categories.items():
        category_data = latest_data[config['condition']]
        if len(category_data) > 0:
            reliability_analysis.append({
                'Category': category,
                'Total_Countries': len(category_data),
                'Good_Quality_Count': len(category_data[~category_data['Low_data_quality_flag'].str.contains('Low', na=False)]),
                'Good_Quality_Pct': len(category_data[~category_data['Low_data_quality_flag'].str.contains('Low', na=False)]) / len(category_data) * 100,
                'Avg_HDI': category_data['HDI'].mean(),
                'Avg_Suicide_Rate': category_data['Suicide_rate'].mean()
            })
    
    if reliability_analysis:
        rel_df = pd.DataFrame(reliability_analysis)
        
        fig.add_trace(
            go.Bar(
                x=rel_df['Category'],
                y=rel_df['Good_Quality_Pct'],
                name='Data Quality Percentage',
                marker_color=['#2ca02c', '#d62728', '#ff7f0e', '#1f77b4'],
                text=rel_df['Good_Quality_Pct'].round(1),
                textposition='auto',
                hovertemplate='<b>%{x}</b><br>Good Data Quality: %{y:.1f}%<br>Total Countries: %{customdata}<extra></extra>',
                customdata=rel_df['Total_Countries']
            ),
            row=2, col=2
        )
    
    fig.update_xaxes(title_text="Category", row=2, col=2, tickangle=45)
    fig.update_yaxes(title_text="Percentage with Good Data Quality (%)", row=2, col=2)
    
    fig.update_layout(
        height=1000,
        width=1400,
        title_text="Four-Country Development-Mental Health Profiles with Data Quality Assessment",
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
    )
    
    return fig, selected_countries

def create_risk_prevention_strategies_advanced(df):
    """Create advanced risk prevention strategies based on correlation analysis"""
    
    # Calculate correlations with key variables
    risk_factors = []
    targets = ['Suicide_rate', 'HDI', 'GDP_per_capita']
    
    # Potential risk factors from the dataset
    potential_factors = ['HDI', 'GDP_per_capita', 'HDI_growth', 'Suicide_change']
    
    for target in targets:
        for factor in potential_factors:
            if factor in df.columns and target in df.columns:
                valid_data = df[[target, factor]].dropna()
                if len(valid_data) > 30:
                    corr, p_value = pearsonr(valid_data[target], valid_data[factor])
                    risk_factors.append({
                        'Factor': factor,
                        'Target': target,
                        'Correlation': corr,
                        'Abs_Correlation': abs(corr),
                        'P_Value': p_value,
                        'Direction': 'Positive' if corr > 0 else 'Negative'
                    })
    
    risk_df = pd.DataFrame(risk_factors)
    
    # Create strategies based on correlations
    strategies = {
        'High Positive Correlation Factors (Increase Risk)': [],
        'High Negative Correlation Factors (Decrease Risk)': [],
        'Economic Intervention Strategies': [],
        'Social and Health Interventions': []
    }
    
    # High positive correlations (increase risk)
    high_positive = risk_df[(risk_df['Correlation'] > 0.3) & (risk_df['Target'] == 'Suicide_rate')]
    for _, row in high_positive.iterrows():
        strategies['High Positive Correlation Factors (Increase Risk)'].append(
            f"{row['Factor']}: r = {row['Correlation']:.3f}"
        )
    
    # High negative correlations (decrease risk)
    high_negative = risk_df[(risk_df['Correlation'] < -0.2) & (risk_df['Target'] == 'Suicide_rate')]
    for _, row in high_negative.iterrows():
        strategies['High Negative Correlation Factors (Decrease Risk)'].append(
            f"{row['Factor']}: r = {row['Correlation']:.3f}"
        )
    
    # Economic interventions
    economic_factors = risk_df[risk_df['Factor'].str.contains('GDP', na=False)]
    for _, row in economic_factors.iterrows():
        if row['Target'] == 'Suicide_rate':
            action = "Strengthen" if row['Correlation'] < 0 else "Monitor"
            strategies['Economic Intervention Strategies'].append(
                f"{action} economic stability programs (r = {row['Correlation']:.3f})"
            )
    
    # Social and health interventions
    strategies['Social and Health Interventions'] = [
        "Implement mental health screening in primary healthcare",
        "Develop community-based suicide prevention programs",
        "Strengthen social safety nets and support systems",
        "Promote mental health literacy in educational systems"
    ]
    
    # Create visualization
    fig = go.Figure()
    
    categories = list(strategies.keys())
    colors = ['#d62728', '#2ca02c', '#ff7f0e', '#1f77b4']
    
    for i, (category, items) in enumerate(strategies.items()):
        for j, item in enumerate(items):
            fig.add_trace(
                go.Scatter(
                    x=[i],
                    y=[j],
                    mode='markers+text',
                    marker=dict(
                        size=20,
                        symbol='square',
                        color=colors[i],
                        line=dict(width=2, color='white')
                    ),
                    text=str(j+1),
                    textposition="middle center",
                    textfont=dict(color="white", size=10, weight="bold"),
                    name=category,
                    showlegend=False,
                    hovertemplate=f'<b>{category}</b><br>{item}<extra></extra>'
                )
            )
            
            # Add text annotation
            fig.add_annotation(
                x=i,
                y=j - 0.3,
                text=item,
                showarrow=False,
                xanchor='center',
                yanchor='top',
                font=dict(size=9),
                bgcolor="rgba(255,255,255,0.95)",
                bordercolor="black",
                borderwidth=1,
                borderpad=3
            )
    
    fig.update_layout(
        xaxis=dict(
            tickvals=list(range(len(categories))),
            ticktext=categories,
            range=[-0.5, len(categories)-0.5],
            showgrid=False
        ),
        yaxis=dict(
            range=[-1, max(len(items) for items in strategies.values())],
            showticklabels=False,
            showgrid=False
        ),
        title="Risk Prevention Strategies Based on Correlation Analysis<br>Minimize Positive Correlations, Strengthen Negative Correlations",
        height=600,
        width=1200,
        showlegend=False,
        plot_bgcolor='lightgray'
    )
    
    return fig, risk_df

def main():
    """Main function to generate all visualizations"""
    print("Starting comprehensive analysis...")
    
    # Load data
    df = load_main_data()
    if df is None:
        return
    
    print(f"Dataset covers years: {df['Year'].min()} to {df['Year'].max()}")
    print(f"Countries: {df['Country Name'].nunique()}")
    
    # Generate visualizations
    print("\n1. Creating Time Series Quadratic Analysis...")
    fig_time_series = create_time_series_quadratic_analysis(df)
    fig_time_series.write_image("time_series_quadratic_analysis.png", scale=2)
    fig_time_series.write_html("time_series_quadratic_analysis.html")
    
    print("2. Creating Boxplot Analysis...")
    fig_boxplot = create_boxplot_analysis(df)
    fig_boxplot.write_image("boxplot_analysis.png", scale=2)
    fig_boxplot.write_html("boxplot_analysis.html")
    
    print("3. Creating Country Profiles with Data Quality...")
    fig_profiles, selected_countries = create_country_profiles_with_data_quality(df)
    fig_profiles.write_image("country_profiles_data_quality.png", scale=2)
    fig_profiles.write_html("country_profiles_data_quality.html")
    
    print("4. Creating Risk Prevention Strategies...")
    fig_strategies, risk_df = create_risk_prevention_strategies_advanced(df)
    fig_strategies.write_image("risk_prevention_strategies.png", scale=2)
    fig_strategies.write_html("risk_prevention_strategies.html")
    
    # Create summary report
    with open("comprehensive_analysis_report.txt", "w", encoding='utf-8') as f:
        f.write("COMPREHENSIVE ANALYSIS REPORT\n")
        f.write("=" * 60 + "\n\n")
        
        f.write("ANALYSIS OVERVIEW:\n")
        f.write(f"- Time period analyzed: 2019-2023\n")
        f.write(f"- Countries included: {df['Country Name'].nunique()}\n")
        f.write(f"- Total observations: {len(df)}\n\n")
        
        f.write("KEY INSIGHTS:\n")
        f.write("1. Time Series Analysis shows evolving HDI-Suicide relationships\n")
        f.write("2. Boxplot analysis reveals income and continental patterns\n")
        f.write("3. Country profiles identify development-mental health paradoxes\n")
        f.write("4. Risk prevention strategies target key correlation factors\n\n")
        
        f.write("SELECTED COUNTRY PROFILES:\n")
        for category, country_data in selected_countries.items():
            data_quality = "Good" if 'Sufficient' in str(country_data['Low_data_quality_flag']) else "Low"
            f.write(f"- {category}: {country_data['Country Name']} (Data Quality: {data_quality})\n")
            f.write(f"  HDI: {country_data['HDI']:.3f}, Suicide Rate: {country_data['Suicide_rate']:.2f}\n")
        f.write("\n")
        
        f.write("RISK PREVENTION PRIORITIES:\n")
        f.write("- Minimize factors with strong positive correlations to suicide rates\n")
        f.write("- Strengthen factors with negative correlations to suicide rates\n")
        f.write("- Focus on economic stability and social support systems\n")
        f.write("- Implement targeted mental health interventions\n")
    
    print("\nFiles created:")
    print("   - time_series_quadratic_analysis.png/.html")
    print("   - boxplot_analysis.png/.html")
    print("   - country_profiles_data_quality.png/.html")
    print("   - risk_prevention_strategies.png/.html")
    print("   - comprehensive_analysis_report.txt")
    
    print("\nAnalysis completed successfully!")

if __name__ == "__main__":
    main()

Starting comprehensive analysis...
Main data loaded: 985 rows, 205 countries
Dataset covers years: 2019 to 2023
Countries: 205

1. Creating Time Series Quadratic Analysis...
2. Creating Boxplot Analysis...
3. Creating Country Profiles with Data Quality...
4. Creating Risk Prevention Strategies...


ValueError: shapes (985,2) and (985,2) not aligned: 2 (dim 1) != 985 (dim 0)