In [6]:
# ==============================================================================
# 0. SETUP & INSTALLATION
# ==============================================================================
import sys
import subprocess
import importlib

required_libs = ['pandas', 'numpy', 'plotly', 'scikit-learn', 'ipywidgets', 'statsmodels']
for lib in required_libs:
    if importlib.util.find_spec(lib) is None:
        print(f"Installing missing library: {lib}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", lib])

print("‚úÖ Libraries verified.")

# ==============================================================================
# 1. IMPORTS
# ==============================================================================
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from ipywidgets import interact, widgets
import statsmodels.api as sm

# ==============================================================================
# 2. DATA LOADING & PROCESSING
# ==============================================================================
try:
    df = pd.read_csv('../../data_cleaned/combined_urbanization_life_quality_2008_2020.csv')
    print(f"‚úÖ Data Loaded: {len(df)} rows.")
except FileNotFoundError:
    print("‚ùå Error: 'combined_urbanization_life_quality_2008_2020.csv' not found.")
    raise

# -------------------------------------------------------------------------
# CRITICAL FIX: REPLICATE "ALL INDICATORS" CLUSTERING
# -------------------------------------------------------------------------
# 1. Select ALL numeric columns (excluding metadata)
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
cols_to_drop = ['Year', 'Country_Code']
clustering_features = [c for c in numeric_cols if c not in cols_to_drop]

# 2. Create Country Profiles (Average 2008-2020)
country_profiles = df.groupby('Country')[clustering_features].mean().reset_index()

# 3. Filter for Clustering (Drop rows with ANY missing values to match original logic)
# This reduces the set to the ~46 countries used in your original analysis
cluster_data = country_profiles.dropna(subset=clustering_features).copy()
print(f"‚ÑπÔ∏è  Clustering based on {len(cluster_data)} countries using {len(clustering_features)} indicators.")

# 4. Normalize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(cluster_data[clustering_features])

# 5. Run K-Means (k=2)
kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)
cluster_data['Cluster'] = kmeans.fit_predict(X_scaled)

# 6. Assign Labels (Lower Peace Score = Stable)
c0_score = cluster_data[cluster_data['Cluster'] == 0]['overall score'].mean()
c1_score = cluster_data[cluster_data['Cluster'] == 1]['overall score'].mean()

if c0_score < c1_score:
    mapping = {0: 'Stable Urbanizers', 1: 'Volatile Urbanizers'}
else:
    mapping = {1: 'Stable Urbanizers', 0: 'Volatile Urbanizers'}

cluster_data['Cluster_Label'] = cluster_data['Cluster'].map(mapping)

# ==============================================================================
# WCAG-COMPLIANT COLOR PALETTES
# ==============================================================================
# 2-Group Palette (Stable vs Volatile)
cluster_color_map = {
    'Stable Urbanizers': '#377EB8',      # Blue (Group 1)
    'Volatile Urbanizers': '#E41A1C'     # Orange-Red (Group 2)
}

# 3-Group Palette (Urban Levels)
urban_color_map = {
    'Low (<50%)': '#377EB8',             # Blue (Group 1)
    'Medium (50-75%)': '#E41A1C',        # Orange-Red (Group 2)
    'High (>75%)': '#4DAF4A'             # Green (Group 3)
}

# 7. Merge back (Inner join ensures we only keep the valid clustered countries)
df_final = pd.merge(country_profiles[['Country'] + clustering_features], 
                    cluster_data[['Country', 'Cluster_Label']], 
                    on='Country', 
                    how='inner')

# 8. Create Urban Groups (Low/Med/High) & Enforce Order
urban_order = ['Low (<50%)', 'Medium (50-75%)', 'High (>75%)']
df_final['Urban_Group'] = pd.cut(df_final['urban_pop_perc'], 
                                 bins=[-1, 50, 75, 101], 
                                 labels=urban_order)

# ==============================================================================
# 3. COLUMN MAPPING (Strict Sequence)
# ==============================================================================
indicator_map = {
    'Militarization_Index': 'militarisation',
    'weapons exports': 'weapons exports',
    'weapons imports': 'weapons imports',
    'nuclear and heavy weapons': 'nuclear and heavy weapons',
    'overall score': 'overall score',
    'ongoing conflict': 'ongoing conflict',
    'Neighbouring countries relations': 'Neighbouring countries relations',
    'Political instability': 'Political instability',
    'intensity of internal conflict': 'intensity of internal conflict',
    'Instability_Index': 'internal peace'
}
ordered_display_names = list(indicator_map.keys())
display_to_csv = indicator_map
csv_to_display = {v: k for k, v in indicator_map.items()}

# ==============================================================================
# 4. VISUALIZATION 1 PREP
# ==============================================================================
# Filter columns that exist
available_cols = [col for col in indicator_map.values() if col in df_final.columns]

vis1_data = df_final[['Urban_Group'] + available_cols].copy()

# MinMax Scaling (0-1)
min_max_scaler = MinMaxScaler()
vis1_data[available_cols] = min_max_scaler.fit_transform(vis1_data[available_cols])

# Rename & Melt
vis1_data = vis1_data.rename(columns=csv_to_display)
melted_vis1 = vis1_data.melt(id_vars='Urban_Group', var_name='Indicator', value_name='Normalized Score')
grouped_vis1 = melted_vis1.groupby(['Urban_Group', 'Indicator'], observed=True)['Normalized Score'].mean().reset_index()

# Sort strictly by your list
grouped_vis1['Indicator'] = pd.Categorical(grouped_vis1['Indicator'], categories=ordered_display_names, ordered=True)
grouped_vis1['Urban_Group'] = pd.Categorical(grouped_vis1['Urban_Group'], categories=urban_order, ordered=True)
grouped_vis1 = grouped_vis1.sort_values(['Indicator', 'Urban_Group'])

# ==============================================================================
# 5. DASHBOARD GENERATION
# ==============================================================================
dropdown_options = [name for name in ordered_display_names if name in grouped_vis1['Indicator'].unique()]

dropdown = widgets.Dropdown(
    options=dropdown_options,
    value='overall score', # Default
    description='Select Detail:',
    style={'description_width': 'initial'}
)

def render_dashboard(selected_display_name):
    csv_col_name = display_to_csv[selected_display_name]

    # --- CALCULATE CORRELATIONS ---
    corr_global = df_final['urban_pop_perc'].corr(df_final[csv_col_name])
    
    stable_df = df_final[df_final['Cluster_Label'] == 'Stable Urbanizers']
    corr_stable = stable_df['urban_pop_perc'].corr(stable_df[csv_col_name])
    
    volatile_df = df_final[df_final['Cluster_Label'] == 'Volatile Urbanizers']
    corr_volatile = volatile_df['urban_pop_perc'].corr(volatile_df[csv_col_name])

    # --- CHART 1: OVERVIEW (WCAG Colors) ---
    fig1 = px.bar(
        grouped_vis1,
        x='Indicator',
        y='Normalized Score',
        color='Urban_Group',
        barmode='group',
        title='<b>1. Landscape Overview: Security Indicators</b><br><i>(Normalized Scores 0-1 across Urbanization Levels)</i>',
        color_discrete_map=urban_color_map,  # WCAG-compliant 3-group palette
        category_orders={'Urban_Group': urban_order},
        height=500
    )
    fig1.update_xaxes(categoryorder='array', categoryarray=ordered_display_names)
    fig1.update_layout(
        xaxis_tickangle=-45, 
        legend_title="Urban Level",
        font=dict(size=12)
    )
    
    # --- CHART 2: DEEP DIVE (WCAG Colors) ---
    fig2 = px.scatter(
        df_final, 
        x='urban_pop_perc', 
        y=csv_col_name, 
        color='Cluster_Label',
        trendline='ols',
        hover_name='Country',
        title=f"<b>2. Deep Dive: {selected_display_name}</b><br><i>(Global Correlation: {corr_global:+.2f})</i>",
        color_discrete_map=cluster_color_map,  # WCAG-compliant 2-group palette
        labels={'urban_pop_perc': 'Urban Population (%)', csv_col_name: f'{selected_display_name} (Score)'},
        height=500
    )
    
    # Clean up Legend & Tooltips
    fig2.for_each_trace(lambda t: t.update(name=f"Stable (r={corr_stable:+.2f})") if t.name == "Stable Urbanizers" else None)
    fig2.for_each_trace(lambda t: t.update(name=f"Volatile (r={corr_volatile:+.2f})") if t.name == "Volatile Urbanizers" else None)

    for trace in fig2.data:
        if trace.mode == 'lines': 
            if "Stable" in trace.name:
                trace.hovertemplate = f"<b>Stable Trend</b><br>Correlation: {corr_stable:+.3f}<extra></extra>"
            elif "Volatile" in trace.name:
                trace.hovertemplate = f"<b>Volatile Trend</b><br>Correlation: {corr_volatile:+.3f}<extra></extra>"

    # Add Global Trendline (Dark gray for neutrality)
    global_trend = px.scatter(df_final, x='urban_pop_perc', y=csv_col_name, trendline='ols')
    if len(global_trend.data) > 1:
        trace = global_trend.data[1]
        trace.line.color = '#2C3E50'  # Dark gray (WCAG AA compliant)
        trace.line.dash = 'dash'
        trace.line.width = 2.5
        trace.name = f'Global Trend (r={corr_global:+.2f})'
        trace.showlegend = True
        trace.hovertemplate = f"<b>Global Trend</b><br>Correlation: {corr_global:+.3f}<extra></extra>"
        fig2.add_trace(trace)

    fig2.update_layout(legend_title="Correlation Analysis")

    fig1.show()
    fig2.show()

print("‚ú® Generating Dashboard (Using FULL Clustering Logic with WCAG Colors)...")
print("\n" + "="*70)
print("WCAG-COMPLIANT COLOR SCHEME APPLIED")
print("="*70)
print("\nüìä Chart 1 - Urban Groups (3-Group Palette):")
print("   Low (<50%):        Blue (#377EB8)")
print("   Medium (50-75%):   Orange-Red (#E41A1C)")
print("   High (>75%):       Green (#4DAF4A)")
print("\nüìä Chart 2 - Cluster Analysis (2-Group Palette):")
print("   Stable Urbanizers:    Blue (#377EB8)")
print("   Volatile Urbanizers:  Orange-Red (#E41A1C)")
print("   Global Trend:         Dark Gray (#2C3E50)")
print("="*70 + "\n")

interact(render_dashboard, selected_display_name=dropdown);

Installing missing library: scikit-learn...
‚úÖ Libraries verified.
‚úÖ Data Loaded: 767 rows.
‚ÑπÔ∏è  Clustering based on 59 countries using 46 indicators.
‚ú® Generating Dashboard (Using FULL Clustering Logic with WCAG Colors)...

WCAG-COMPLIANT COLOR SCHEME APPLIED

üìä Chart 1 - Urban Groups (3-Group Palette):
   Low (<50%):        Blue (#377EB8)
   Medium (50-75%):   Orange-Red (#E41A1C)
   High (>75%):       Green (#4DAF4A)

üìä Chart 2 - Cluster Analysis (2-Group Palette):
   Stable Urbanizers:    Blue (#377EB8)
   Volatile Urbanizers:  Orange-Red (#E41A1C)
   Global Trend:         Dark Gray (#2C3E50)



interactive(children=(Dropdown(description='Select Detail:', index=4, options=('Militarization_Index', 'weapon‚Ä¶