In [None]:
# Kaggle Environment Setup
from pathlib import Path


# Data paths - supports both Kaggle and local environments
kaggle_path = Path('/kaggle/input/european-citizens-initiatives-2026')
local_path = Path('./csv_files')


if kaggle_path.exists():
    KAGGLE_INPUT = kaggle_path
elif local_path.exists():
    KAGGLE_INPUT = local_path
else:
    raise FileNotFoundError(
        'Data directory not found. Expected one of:\n'
        f'  - Kaggle: {kaggle_path}\n'
        f'  - Local: {local_path}\n'
        'Please ensure CSV files are available in one of these locations.'
    )


if KAGGLE_INPUT == local_path:
    print(f'üìÅ Using data from: {KAGGLE_INPUT}')


<a id='introduction'></a>
# <p style="padding:15px;background-color:#fff798;margin:10px 0;color:#435672;font-family:'Arial',sans-serif;text-align:center;border-radius:15px 50px;overflow:hidden;font-weight:600;font-size:150%;">üá™üá∫‚úçÔ∏è European Citizens' Initiatives: Signature Collection</p>

<div align="center">
  <img src="https://raw.githubusercontent.com/Luk-kar/eu-citizens-initiatives-tracker/main/ECI_initiatives/exploratory_data_analysis/initiatives_campaigns/images/eci_take_initiative_banner.png" alt="ECI Material" width="600">
</div>

<p style="text-align:center;font-size:150%;">
  <i>Source: European Citizens' Initiative | European Commission (CC BY 4.0)</i>
</p>

In [None]:
# ==============================================================================
# WARNINGS CONFIGURATION - Must be FIRST
# ==============================================================================
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', message='.*Plotly version.*Kaleido.*')


# ==============================================================================
# PLOTLY RENDERER SETUP
# ==============================================================================

import plotly.io as pio
pio.renderers.default = "kaggle"

# ==============================================================================
# SETUP: IMPORT LIBRARIES AND LOAD DATA
# ==============================================================================

# ------------------------------------------------------------------------------
# Standard Library Imports
# ------------------------------------------------------------------------------
from collections import Counter
from datetime import datetime
import json
import os
from typing import Tuple
import warnings

# ------------------------------------------------------------------------------
# Data Science
# ------------------------------------------------------------------------------
import numpy as np
import pandas as pd

# ------------------------------------------------------------------------------
# Visualization
# ------------------------------------------------------------------------------
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

# ------------------------------------------------------------------------------
# Configuration
# ------------------------------------------------------------------------------
warnings.filterwarnings('ignore')
pio.templates.default = "plotly_white"
warnings.filterwarnings('ignore', message='.*Plotly version.*Kaleido.*')

# Set colors
viridis_colors = px.colors.sequential.Viridis

# # ------------------------------------------------------------------------------
# # Dev data
# # ------------------------------------------------------------------------------

# # Load the dataset
# data_folder = "../data/2025-09-18_16-33-57"
# data_file = f'{data_folder}/eci_initiatives_2025-11-04_11-59-38.csv'
# df = pd.read_csv(data_file)

# # Extract and print dates from file paths
# folder_date_str = data_folder.split('/')[-1]  # "2025-09-18_16-33-57"
# folder_date = datetime.strptime(folder_date_str, "%Y-%m-%d_%H-%M-%S")

# file_name = os.path.basename(data_file)  # "eci_initiatives_2025-11-04_11-59-38.csv"
# file_date_str = file_name.split('_')[2] + '_' + file_name.split('_')[3].replace('.csv', '')  # "2025-11-04_11-59-38"
# file_date = datetime.strptime(file_date_str, "%Y-%m-%d_%H-%M-%S")

# ------------------------------------------------------------------------------
# Data Loading Functions
# ------------------------------------------------------------------------------


    # Find latest timestamped folder
    # Locate CSV file
    # Load dataframe
    # Extract file date from filename
# ------------------------------------------------------------------------------
# Execute Data Loading
# ------------------------------------------------------------------------------
root_path = Path("../../")
data_directory = "data"


df = pd.read_csv(KAGGLE_INPUT / 'eci_initiatives_2026-02-22_10-43-37.csv')

# ------------------------------------------------------------------------------
# Introduction: ECI Signature Collection Analysis Overview
# ------------------------------------------------------------------------------

from IPython.display import Markdown as md
import pandas as pd
import re

# Create a copy for intro analysis transformations
df_intro = df.copy()

# Parse registration year from timeline_registered
def parse_year(date_str):
    if pd.isna(date_str):
        return None
    # Extract year from DD/MM/YYYY format
    match = re.search(r'\d{4}', str(date_str))
    if match:
        return int(match.group())
    return None

# Parse numeric signature count from signatures_collected
def parse_signatures(sig_str):
    if pd.isna(sig_str):
        return 0
    # Extract numeric value, removing commas
    match = re.search(r'[\d,]+', str(sig_str))
    if match:
        return int(match.group().replace(',', ''))
    return 0

# Parse threshold count from signatures_threshold_met
def parse_threshold(threshold_str):
    if pd.isna(threshold_str):
        return 0
    try:
        return float(threshold_str)
    except:
        return 0

df_intro['registration_date'] = pd.to_datetime(df_intro['timeline_registered'], format='%d/%m/%Y')
df_intro['registration_year'] = df_intro['registration_date'].dt.year

df_intro['signatures_numeric'] = df_intro['signatures_collected'].apply(parse_signatures)
df_intro['signatures_threshold_met_numeric'] = df_intro['signatures_threshold_met'].apply(parse_threshold)

# Calculate boolean columns
df_intro['reached_signatures'] = df_intro['signatures_numeric'] >= 1_000_000
df_intro['met_country_threshold'] = df_intro['signatures_threshold_met_numeric'] >= 7

# Filter for successful ECIs
ecis_successful = df_intro[
    (df_intro['reached_signatures'] == True) & 
    (df_intro['met_country_threshold'] == True)
]

# Calculate dynamic values with Month-Year formatting
min_date = df_intro['registration_date'].min().strftime('%B %Y') # e.g., "May 2012"
max_date = df_intro['registration_date'].max().strftime('%B %Y') # e.g., "February 2026"
total_initiatives = len(df_intro)
successful_count = len(ecis_successful)

font_size = 14

# Render markdown with dynamic values
md(f"""
<span style="font-size: {font_size}px;">
Examines <a href="https://commission.europa.eu/get-involved/engage-eu-policymaking/european-citizens-initiative_en">European Citizens' Initiative proposals</a> submitted between {min_date} and {max_date} to identify patterns of success and failure in signatures collection step. An ECI is a formal mechanism allowing EU citizens to propose legislation if they collect 1 million signatures from at least 7 different member states within (in most cases) 12 months. The dataset tracks {total_initiatives} initiatives from registration through signature collection to European Commission response.

<strong>After meeting the signature threshold, the Commission has sole discretion to decide whether to propose legislation, reject the initiative, or take alternative action.</strong> If the Commission chooses to propose legislation, the proposal enters the normal EU legislative process requiring approval by the European Parliament and Council of the EU.

This analysis focuses exclusively on initiatives that the European Commission successfully registered. It does not cover the registration approval process itself, including which proposed ECIs were <a href="https://citizens-initiative.europa.eu/find-refused-requests-for-registration_en">refused registration</a> or how to prepare a successful registration application, <a href="https://citizens-initiative.europa.eu/how-it-works_en">more about this</a>.

Additionally, this analysis measures success solely by whether an ECI met the formal requirements (1 million signatures and 7-country threshold), not by the content of Commission responses or whether the Commission accepted or rejected the policy proposals.<br>

For detailed analysis of what happens <strong>after</strong> ECIs meet signature thresholds‚Äîincluding Commission response types, implementation timelines, and follow-up actions‚Äîsee the companion study <a href="https://www.kaggle.com/code/lukkardata/eci-commission-response"><strong>üá™üá∫üèõÔ∏è European Citizens' Initiatives: After the Signatures</strong></a>.

The data was <a href="https://github.com/Luk-kar/eu-citizens-initiatives-tracker">scraped</a> directly from the <a href="https://citizens-initiative.europa.eu/find-initiative_en">initiatives site</a>.
""")

<a id='table-of-contents'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">üß≠ Table of content</p>

[üåü **Introduction**](#introduction)

[‚ùì **Questions to Ask:**](#question-1)
- [1. How Many ECIs Succeed vs. Fail?](#question-1)
- [2. How Many ECIs Progress Through Each Stage?](#question-2)
- [3. How Many Signatures Do ECIs Collect?](#question-3)
- [4. Which Countries Drive Participation?](#question-4)
- [5. How Do Success Rates Evolve by Year?](#question-5)
- [6. How Long Do ECIs Take at Each Stage?](#question-6)
- [7. Which Policy Areas Succeed Most Often?](#question-7)
- [8. How Long Do ECIs Wait for Commission Responses?](#question-8)
- [9. How Much Funding Do ECIs Receive?](#question-9)
- [10. What Predicts Whether an ECI Will Succeed?](#question-10)
- [11. Key Findings](#question-11)

[**‚úâÔ∏è Contact**](#contact)

<a id='setup'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">‚öôÔ∏è Setup: Import Libraries and Load Data</p>

In [None]:
# ------------------------------------------------------------------------------
# Code moved to the first code block, due to dynamic intro values calculations
# ------------------------------------------------------------------------------

print(f"‚úì Data loaded from: {KAGGLE_INPUT}")


In [None]:
# ------------------------------------------------------------------------------
# Code moved to the first code block, due to dynamic intro values calculations
# ------------------------------------------------------------------------------

print(f"Dataset loaded: {len(df)} initiatives")
print(f"Columns: {len(df.columns)}")


[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id='data-cleaning'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">üßπ Data Cleaning and Feature Engineering</p>

This section converts raw dates and text into useful measurements for comparing initiatives.

In [None]:
# ==============================================================================
# DATA CLEANING AND FEATURE ENGINEERING
# ==============================================================================

# ------------------------------------------------------------------------------
# Date Parsing Functions and Timeline Date Conversion
# ------------------------------------------------------------------------------

# Parse date strings from DD/MM/YYYY to datetime
def parse_date(date_str):
    if pd.isna(date_str) or date_str == '':
        return pd.NaT
    try:
        return pd.to_datetime(date_str, format='%d/%m/%Y')
    except:
        return pd.NaT

# Parse main timeline dates
df['registered_date'] = df['timeline_registered'].apply(parse_date)
df['collection_start_date'] = df['timeline_collection_start_date'].apply(parse_date)
df['collection_closed_date'] = df['timeline_collection_closed'].apply(parse_date)
df['verification_start_date'] = df['timeline_verification_start'].apply(parse_date)
df['verification_end_date'] = df['timeline_verification_end'].apply(parse_date)
df['commission_response_date'] = df['timeline_response_commission_date'].apply(parse_date)

# Extract year from registration
df['registration_year'] = df['registered_date'].dt.year


# ------------------------------------------------------------------------------
# Duration Calculations
# ------------------------------------------------------------------------------

# Calculate durations
df['collection_duration_days'] = (df['collection_closed_date'] - df['collection_start_date']).dt.days
df['verification_duration_days'] = (df['verification_end_date'] - df['verification_start_date']).dt.days
df['time_to_commission_response_days'] = (df['commission_response_date'] - df['registered_date']).dt.days
df['registration_to_collection_days'] = (df['collection_start_date'] - df['registered_date']).dt.days


# ------------------------------------------------------------------------------
# Signature Data Parsing and Conversion
# ------------------------------------------------------------------------------

# Parse signatures (handle commas and convert to numeric)
def parse_signatures(sig):
    if pd.isna(sig):
        return np.nan
    if isinstance(sig, str):
        return float(sig.replace(',', ''))
    return float(sig)

df['signatures_numeric'] = df['signatures_collected'].apply(parse_signatures)
df['signatures_threshold_met_numeric'] = pd.to_numeric(df['signatures_threshold_met'], errors='coerce')


# ------------------------------------------------------------------------------
# Funding Data Parsing and Conversion
# ------------------------------------------------------------------------------

# Parse funding (handle commas and convert to numeric)
def parse_funding(funding):
    if pd.isna(funding):
        return 0
    if isinstance(funding, str):
        return float(funding.replace(',', ''))
    return float(funding)

df['funding_numeric'] = df['funding_total'].apply(parse_funding)


# ------------------------------------------------------------------------------
# Success Metrics Definition
# ------------------------------------------------------------------------------

# Define success categories
df['reached_signatures'] = df['signatures_numeric'] >= 1000000
df['met_country_threshold'] = df['signatures_threshold_met_numeric'] >= 7
df['successful_eci'] = df['reached_signatures'] & df['met_country_threshold']
df['commission_responded'] = df['final_outcome'] == 'Commission Response'


# ------------------------------------------------------------------------------
# Enhanced Outcome Categorization
# ------------------------------------------------------------------------------

# Create enhanced outcome category
def categorize_outcome(row):
    if row['final_outcome'] == 'Withdrawn':
        return 'Withdrawn'
    elif row['final_outcome'] == 'Commission Response':
        return 'Commission Response'
    elif row['successful_eci']:
        return 'Collected Signatures'
    else:
        return 'Unsuccessful Collection'

df['final_outcome_enhanced'] = df.apply(categorize_outcome, axis=1)


# ------------------------------------------------------------------------------
# Summary Statistics Helper Function and Dataset Overview
# ------------------------------------------------------------------------------

def transpose_summary(df):
    """Transpose a summary DataFrame and return with hidden index."""
    return (
        df.T
        .reset_index()
        .set_axis(["metric", "value"], axis=1)
        .style.hide(axis="index")
    )

def define_correlation(abs_corr: float) -> str:
    """Interpret correlation strength from absolute correlation coefficient."""
    if abs_corr < 0.1:
        return "No correlation"
    elif abs_corr < 0.3:
        return "Weak"
    elif abs_corr < 0.5:
        return "Moderate"
    elif abs_corr < 0.7:
        return "Strong"
    elif abs_corr <= 1.0:
        return "Very strong"
    else:
        raise ValueError(f"Invalid correlation value: {abs_corr}. Absolute correlation must be between 0 and 1.")
        
summary = pd.DataFrame({
    "Total Initiatives": [len(df)],
    "Reached 1 Million Signatures": [df["reached_signatures"].sum()],
    "Met Country Threshold (7+ countries)": [df["met_country_threshold"].sum()],
    "Successful ECI": [df["successful_eci"].sum()],
    "Commission Responded": [df["commission_responded"].sum()],
    "Waiting for Response": [(df["final_outcome_enhanced"] == "Collected Signatures").sum()],
})

# ============================================================================
# CATEGORY ASSIGNMENT
# ============================================================================

# --- 1) Load predefined categories and merge into df ---
# NOTE: Manual categorization chosen for simplicity and speed of development.
# Dataset updates are infrequent, so manual maintenance remains practical.
# Alternative: automated classification (ML model) not justified for this scale.
df_cats = pd.read_csv(
    KAGGLE_INPUT / "eci_categories.csv",
    dtype={"registration_number": "string", "categories": "string"},
    usecols=["registration_number", "categories"],
)

# Ensure merge keys are consistent types
df["registration_number"] = df["registration_number"].astype("string")

# Left-join: keep all initiatives; attach predefined category when available
df = df.merge(df_cats, on="registration_number", how="left")

# Treat empty strings as missing (defensive)
df["categories"] = df["categories"].replace("", pd.NA)

# --- 2) Keyword fallback logic (only used if no predefined category exists) ---
policy_keywords = {
    "Education & Culture": [
        "education", "school", "university", "student", "teacher", "learning",
        "culture", "cultural", "heritage", "art", "museum", "language",
        "erasmus", "youth", "training", "literacy", "academic", "curriculum"
    ],
    
    "Digital & Communications": [
        "digital", "internet", "online", "cyber", "data protection", "privacy",
        "technology", "telecommunication", "broadband", "connectivity",
        "artificial intelligence", "ai", "platform", "social media", "network",
        "electronic", "software", "digital rights", "tech"
    ],
    
    "Social Policy": [
        "social", "poverty", "inequality", "welfare", "housing", "homeless",
        "employment", "unemployment", "worker", "labour", "disability",
        "pension", "retirement", "family", "child", "children", "elderly",
        "inclusion", "discrimination", "gender", "equal", "minimum wage"
    ],
    
    "Agriculture & Fisheries & Animal rights": [
        "agriculture", "farming", "farm", "farmer", "crop", "livestock",
        "fisheries", "fishing", "fish", "aquaculture", "animal", "animals",
        "animal welfare", "animal rights", "wildlife", "bee", "bees",
        "pesticide", "herbicide", "rural", "veterinary", "cattle", "meat",
        "dairy", "fur", "hunting", "animal testing", "vegan", "slaughter", "diary"
    ],
    
    "Health": [
        "health", "healthcare", "medical", "medicine", "hospital", "patient",
        "disease", "pandemic", "epidemic", "vaccine", "vaccination",
        "pharmaceutical", "drug", "mental health", "healthcare system",
        "doctor", "nurse", "cancer", "diabetes", "public health", "covid",
        "tobacco", "smoking", "alcohol", "addiction", "nutrition", "cannabis",
        "psychedelics"
    ],
    
    "Democracy & Citizens' rights": [
        "democracy", "democratic", "citizen", "citizenship", "rights",
        "human rights", "freedom", "vote", "voting", "election", "referendum",
        "participation", "transparency", "accountability", "rule of law",
        "justice", "court", "legal", "civil rights", "fundamental rights",
        "constitution", "treaty", "sovereignty", "parliament", "representation"
    ],
    
    "Environment & Climate": [
        "environment", "environmental", "climate", "climate change", "global warming",
        "pollution", "air quality", "water", "ocean", "sea", "plastic",
        "waste", "recycling", "biodiversity", "ecosystem", "nature",
        "conservation", "deforestation", "forest", "renewable", "energy",
        "sustainability", "sustainable", "carbon", "emission", "green deal",
        "fossil fuel", "coal", "oil", "gas", "nuclear", "solar", "wind"
    ],
    
    "Transport": [
        "transport", "transportation", "traffic", "road", "highway", "railway",
        "train", "aviation", "aircraft", "flight", "airport", "vehicle",
        "car", "automobile", "bus", "truck", "shipping", "maritime",
        "mobility", "infrastructure", "public transport", "metro", "cycling"
    ],
    
    "Economy & Finance": [
        "economy", "economic", "finance", "financial", "tax", "taxation",
        "budget", "fiscal", "bank", "banking", "investment", "currency",
        "euro", "trade", "market", "business", "industry", "growth",
        "inflation", "debt", "deficit", "monetary", "corporate", "vat",
        "subsidy", "funding", "revenue", "economic policy"
    ],
    
    "Consumer protection": [
        "consumer", "consumer rights", "consumer protection", "product safety",
        "food safety", "labelling", "label", "quality", "warranty",
        "advertising", "marketing", "price", "competition", "monopoly",
        "contract", "purchase", "refund", "complaint", "fraud", "scam"
    ],
    
    "Joke ECI": [
        "european day of whatever it takes"
    ],
    
    "Other": [],  # Fallback
}


def categorize_initiative(title, objective):
    text = (str(title) + " " + str(objective)).lower()
    matched = []
    for category, keywords in policy_keywords.items():
        if keywords and any(k.lower() in text for k in keywords):
            matched.append(category)
    return matched if matched else ["Other"]


# Compute fallback categories for all rows
df["policy_areas_fallback"] = df.apply(
    lambda row: categorize_initiative(row["title"], row["objective"]),
    axis=1,
)
df["primary_policy_area_fallback"] = df["policy_areas_fallback"].apply(lambda x: x[0] if x else "Other")

# --- 3) Final policy area: predefined when available, else fallback ---
df["primary_policy_area"] = df["categories"].fillna(df["primary_policy_area_fallback"]).fillna("Other")

# Optional: keep a unified list-form column too
df["policy_areas"] = np.where(
    df["categories"].notna(),
    df["categories"].apply(lambda x: [x]),
    df["policy_areas_fallback"],
)

# Create color mapping for policy areas (fixed to match actual column values)
policy_area_colors = {
    'Agriculture & Fisheries & Animal rights': '#8bc34a',  # Green nature/plants
    'Democracy & Citizens\' rights': '#5c6bc0',  # Indigo official/formal
    'Social Policy': '#ec407a',  # Pink community/people
    'Health': '#ef5350',  # Red medical/health
    'Environment & Climate': '#66bb6a',  # Forest green nature
    'Consumer protection': '#ffa726',  # Orange warning/safety
    'Transport': '#42a5f5',  # Blue sky/movement
    'Economy & Finance': '#fdd835',  # Yellow gold/money
    'Digital & Communications': '#7e57c2',  # Purple technology
    'Education & Culture': '#26c6da',  # Cyan knowledge/creativity
    'Joke ECI': '#bdbdbd',  # Grey neutral
    'Other': '#9e9e9e'  # Grey neutral
}

# ==============================================================================
# REUSABLE PLOTTING HELPER FUNCTIONS
# ==============================================================================

def prepare_eci_list_for_hover(ecis, max_items=15):
    """
    Prepare ECI title list for hover tooltips with truncation.
    
    Parameters:
    - ecis: List of ECI titles
    - max_items: Maximum number of items to show (default: 15)
    
    Returns:
    - Formatted HTML string with ECIs
    """
    if not ecis:
        return "No ECIs"
    elif len(ecis) <= max_items:
        return '<br>'.join(f"‚Ä¢ {title}" for title in ecis)
    else:
        text = '<br>'.join(f"‚Ä¢ {title}" for title in ecis[:max_items])
        text += f"<br><i>... (and {len(ecis) - max_items} more)</i>"
        return text


def create_pie_chart_with_eci_lists(
    df,
    values_column,
    names_column,
    title,
    filter_column,
    filter_values_dict,
    color_map=None,
    hole=0.1,
    height=600
):
    """
    Create a pie chart with ECI lists in hover tooltips.
    
    Parameters:
    - df: Source DataFrame containing ECI data
    - values_column: Column name for pie slice values
    - names_column: Column name for pie slice labels
    - title: Chart title
    - filter_column: Column to filter ECIs for each category
    - filter_values_dict: Dict mapping display names to filter values
    - color_map: Dict mapping category names to colors (optional)
    - hole: Size of center hole (default: 0.1)
    - height: Chart height in pixels (default: 600)
    
    Returns:
    - Plotly figure object
    """
    # Prepare data
    chart_data = df.groupby(names_column).size().reset_index(name='Count')
    chart_data['Percentage'] = (chart_data['Count'] / len(df) * 100).round(2)
    
    # Prepare ECI lists for each category
    eci_lists = []
    for category in chart_data[names_column]:
        filter_value = filter_values_dict.get(category, category)
        ecis = df[df[filter_column] == filter_value]['title'].tolist()
        eci_lists.append(prepare_eci_list_for_hover(ecis))
    
    chart_data['ECI_List'] = eci_lists
    
    # Create color sequence
    if color_map:
        color_sequence = [color_map.get(cat, '#CCCCCC') for cat in chart_data[names_column]]
    else:
        color_sequence = None
    
    # Create pie chart
    fig = go.Figure(go.Pie(
        labels=chart_data[names_column],
        values=chart_data['Count'],
        hole=hole,
        marker=dict(colors=color_sequence) if color_sequence else {},
        customdata=chart_data['ECI_List'],
        hovertemplate='<b>%{label}</b><br>' +
                      'Count: %{value}<br>' +
                      'Percentage: %{percent}<br><br>' +
                      '<b>ECIs:</b><br>%{customdata}' +
                      '<extra></extra>',
        textinfo='percent+label',
        textposition='inside',
        textfont=dict(size=12, color='white', family='Arial Black')
    ))
    
    fig.update_layout(
        title=f'<b>{title}</b>',
        height=height,
        showlegend=True,
        legend=dict(font=dict(size=14))
    )
    
    return fig


def create_horizontal_bar_with_ecis(
    df_sorted,
    x_column,
    y_column,
    title,
    xaxis_title,
    color_column=None,
    color_map=None,
    customdata_columns=None,
    hover_template=None,
    height=600,
    show_legend=True
):
    """
    Create horizontal bar chart with ECI lists in hover.
    
    Parameters:
    - df_sorted: Pre-sorted DataFrame with data to plot
    - x_column: Column for x-axis (bar length)
    - y_column: Column for y-axis (bar labels)
    - title: Chart title
    - xaxis_title: X-axis label
    - color_column: Column to determine bar colors (optional)
    - color_map: Dict mapping color_column values to colors (optional)
    - customdata_columns: List of columns for custom hover data (optional)
    - hover_template: Custom hover template string (optional)
    - height: Chart height (default: 600)
    - show_legend: Whether to show legend (default: True)
    
    Returns:
    - Plotly figure object
    """
    fig = go.Figure()
    
    if color_column and color_map:
        # Create bars with individual colors
        legend_seen = set()
        for _, row in df_sorted.iterrows():
            color = color_map.get(row[color_column], '#9e9e9e')
            show_in_legend = row[color_column] not in legend_seen
            if show_in_legend:
                legend_seen.add(row[color_column])
            
            customdata = [[row[col] for col in customdata_columns]] if customdata_columns else None
            
            fig.add_trace(go.Bar(
                x=[row[x_column]],
                y=[row[y_column]],
                orientation='h',
                name=row[color_column],
                marker_color=color,
                showlegend=show_in_legend and show_legend,
                legendgroup=row[color_column],
                customdata=customdata,
                hovertemplate=hover_template
            ))
    else:
        # Single trace
        customdata = df_sorted[customdata_columns].values if customdata_columns else None
        
        fig.add_trace(go.Bar(
            x=df_sorted[x_column],
            y=df_sorted[y_column],
            orientation='h',
            marker_color='lightblue',
            customdata=customdata,
            hovertemplate=hover_template
        ))
    
    fig.update_layout(
        title=f'<b>{title}</b>',
        xaxis_title=xaxis_title,
        yaxis_title='',
        height=height,
        showlegend=show_legend,
        legend_title_text='Category' if show_legend else None,
        xaxis=dict(tickformat=',', title=dict(font=dict(size=14))),
        yaxis=dict(title=dict(font=dict(size=14)))
    )
    
    return fig


def add_eci_lists_to_dataframe(df, data_df, group_column, filter_func=None):
    """
    Add ECI lists to a DataFrame for hover tooltips.
    
    Parameters:
    - df: Source DataFrame containing all ECI data
    - data_df: Target DataFrame to add ECI lists to
    - group_column: Column name to group/filter ECIs by
    - filter_func: Optional function to filter df before grouping (e.g., lambda df: df[df['successful_eci']])
    
    Returns:
    - data_df with added 'ECI_List' column
    """
    eci_lists = []
    source_df = filter_func(df) if filter_func else df
    
    for value in data_df[group_column]:
        ecis = source_df[source_df[group_column] == value]['title'].tolist()
        eci_lists.append(prepare_eci_list_for_hover(ecis))
    
    data_df['ECI_List'] = eci_lists
    return data_df
transpose_summary(summary)

[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id='question-1'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">1. How Many ECIs Succeed vs. Fail?</p>

Categorizes initiatives by final outcome, distinguishing between those that collected enough signatures versus those that fell short or were withdrawn.

In [None]:
# ==============================================================================
# DATA PREPARATION: STATUS AND OUTCOME DISTRIBUTIONS
# ==============================================================================

# Current Status Distribution with cleaned Verification
status_series = df['current_status'].str.replace(r'\s+', ' ', regex=True).str.strip()
status_series = status_series.where(
    ~status_series.str.contains('Verification', na=False), 
    'Verification'
)
status_dist = status_series.value_counts().reset_index()
status_dist.columns = ['Status', 'Count']
status_dist['Percentage'] = (status_dist['Count'] / len(df) * 100).round(2)

# Final Outcome Distribution with renamed category
outcome_series = df['final_outcome_enhanced'].replace(
    'Collected Signatures', 
    'Collected Signatures / Waiting Response'
)
outcome_dist = outcome_series.value_counts(dropna=False).reset_index()
outcome_dist.columns = ['Outcome', 'Count']
outcome_dist['Percentage'] = (outcome_dist['Count'] / len(df) * 100).round(2)

# Capitalize only first letter
status_dist['Status'] = status_dist['Status'].str.title()


# ==============================================================================
# VISUALIZATION: PIE CHART - FINAL OUTCOMES
# ==============================================================================

# Define custom colors
custom_colors = {
    'Unsuccessful Collection': '#C34242',
    'Commission Response': '#3CA371',
    'Collected Signatures': '#F5A623',
    'Withdrawn': '#909090'
}

# Create pie chart using reusable function
fig = create_pie_chart_with_eci_lists(
    df=df,
    values_column='final_outcome_enhanced',
    names_column='final_outcome_enhanced',
    title='Final Outcome Distribution of All ECI Initiatives',
    filter_column='final_outcome_enhanced',
    filter_values_dict={},  # No mapping needed
    color_map=custom_colors,
    hole=0.1,
    height=600
)

# Rename display labels
fig.data[0].labels = [
    'Waiting Response' if label == 'Collected Signatures' else label 
    for label in fig.data[0].labels
]

fig.show()


# ==============================================================================
# VISUALIZATION: HORIZONTAL BAR CHART - TOP 10 ECIs BY SIGNATURES
# ==============================================================================

# Prepare data
top10_sigs = df.nlargest(10, 'signatures_numeric').copy()
top10_sigs = top10_sigs.sort_values('signatures_numeric', ascending=True)
top10_sigs['title_short'] = top10_sigs['title'].str[:50] + '...'

# Format objectives (keep this specific formatting logic)
def format_objective(objective):
    if pd.isna(objective):
        return "No objective provided"
    obj_str = str(objective)
    bullet_sections = obj_str.split('‚Ä¢')
    formatted_sections = []
    for i, section in enumerate(bullet_sections):
        section = section.strip()
        if not section:
            continue
        if i > 0:
            section = '‚Ä¢ ' + section
        words = section.split()
        lines = [' '.join(words[j:j+11]) for j in range(0, len(words), 11)]
        formatted_sections.append('<br>'.join(lines))
    return '<br>'.join(formatted_sections)

top10_sigs['objective_formatted'] = top10_sigs['objective'].apply(format_objective)

# Create bar chart using reusable function
fig = create_horizontal_bar_with_ecis(
    df_sorted=top10_sigs,
    x_column='signatures_numeric',
    y_column='title_short',
    title='Top 10 ECIs by Signatures Collected',
    xaxis_title='Number of Signatures',
    color_column='primary_policy_area',
    color_map=policy_area_colors,
    customdata_columns=['title', 'signatures_threshold_met_numeric', 'primary_policy_area', 'objective_formatted'],
    hover_template='<b>%{customdata[0]}</b><br>' +
                   '<b>Signatures:</b> %{x:,.0f}<br>' +
                   '<b>Countries Threshold Met:</b> %{customdata[1]}<br>' +
                   '<b>Policy Area:</b> %{customdata[2]}<br><br>' +
                   '<b>Objective:</b><br>%{customdata[3]}' +
                   '<extra></extra>',
    height=600,
    show_legend=True
)

# Add threshold line and annotation
fig.add_shape(
    type='line', x0=1000000, x1=1000000, y0=-0.5, y1=9.5,
    line=dict(color='dark grey', width=2, dash='dash')
)
fig.add_annotation(
    x=1000000, y=9.5, text="1M Threshold",
    showarrow=False, yshift=10, font=dict(color='dark grey', size=12)
)
fig.update_layout(legend_title_text='Policy Area')

fig.show()


# ==============================================================================
# OUTPUT: DISPLAY SUMMARY TABLES
# ==============================================================================

# Display with proper formatting
print("\nCurrent Status:")
display(status_dist.style.hide(axis="index").format({'Percentage': '{:.2f}'}))

print("\nFinal Outcome:")
display(outcome_dist.style.hide(axis="index").format({'Percentage': '{:.2f}'}))

[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id='question-2'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">2. How Many ECIs Progress Through Each Stage?</p>

This section tracks how many initiatives advance through each phase of the ECI process:<br>
`Successful Registration` ‚Üí `Collection Start` ‚Üí `Collection Completion` ‚Üí `Meeting Country Threshold` (minimum signatures from at least seven member states) ‚Üí `Reaching Million-Signature Requirement` ‚Üí `Successful Qualification` (both criteria met) ‚Üí `Commission Response`.<br><br>
The biggest drop-off occurs when fail to reach the one million signature threshold needed for success.<br><br>
NOTE:<br>
> - Not all successful ECIs has received Commission responses yet ‚Äî some are still waiting after meeting both signature and country thresholds.<br>See [the official guide](https://citizens-initiative.europa.eu/how-it-works_en).


In [None]:
# ==============================================================================
# QUESTION 2: OVERALL SUCCESS FUNNEL ANALYSIS
# ==============================================================================

# ------------------------------------------------------------------------------
# Calculate Conversion Metrics at Each Stage
# ------------------------------------------------------------------------------

# Calculate conversion rates at each stage of the ECI process
total_registered = len(df)
started_collection = df['collection_start_date'].notna().sum()
completed_collection = df['collection_closed_date'].notna().sum()
reached_1m = df['reached_signatures'].sum()
met_threshold = df['met_country_threshold'].sum()
both_criteria = df['successful_eci'].sum()
commission_response = df['commission_responded'].sum()


# ------------------------------------------------------------------------------
# Prepare Funnel Data Structure
# ------------------------------------------------------------------------------

# Create funnel dataframe
funnel_data = pd.DataFrame({
    'Stage': [
        '1. Registered',
        '2. Collection Started',
        '3. Collection Completed',
        '4. Met Country Threshold (7+)',
        '5. Reached 1M Signatures',
        '6. Successful (Both Criteria)',
        '7. Commission Response'
    ],
    'Count': [
        total_registered,
        started_collection,
        completed_collection,
        met_threshold,
        reached_1m,
        both_criteria,
        commission_response
    ]
})

funnel_data['Percentage of Registered'] = (funnel_data['Count'] / total_registered * 100).round(2)
funnel_data['Conversion from Previous Stage'] = (funnel_data['Count'] / funnel_data['Count'].shift(1) * 100).round(2)


# ------------------------------------------------------------------------------
# VISUALIZATION: Interactive Funnel Chart
# ------------------------------------------------------------------------------

# Define color scheme from Viridis palette
colors = [viridis_colors[i] for i in [0, 2, 4, 6, 7, 8, 9]]

# Create stage names without numbers for hover
stage_names_no_numbers = funnel_data['Stage']

# Create funnel chart
fig = go.Figure(go.Funnel(
    y = funnel_data['Stage'],
    x = funnel_data['Count'],
    textposition = "inside",
    textinfo = "value+percent previous",
    marker = dict(
        color = colors
    ),
    connector = {
        "fillcolor": "#CEDFF6",  # Light steel blue for connector fill
        "visible": True
    },
    customdata = stage_names_no_numbers,
    hovertemplate = '<b>%{customdata}:</b><br>' +
                    'Count: %{x}<br>' +
                    'Percent of initial: %{percentInitial}<br>' +
                    'Percent of previous: %{percentPrevious}<br>' +
                    '<extra></extra>'
))

fig.update_layout(
    title="<b>ECI Success Funnel: From Registration to Commission Response</b>",
    height=600,
    font=dict(size=14),
    xaxis=dict(title=dict(font=dict(size=14))),
    yaxis=dict(title=dict(font=dict(size=14))),
    legend=dict(
        font=dict(size=14)
    )
)

fig.show()


# ------------------------------------------------------------------------------
# OUTPUT: Display Summary Table and Key Insights
# ------------------------------------------------------------------------------

display(funnel_data.style.hide(axis="index").format({
    'Percentage of Registered': '{:.2f}',
    'Conversion from Previous Stage': '{:.2f}'
}))

print(f"\n\nKey Insights:")
print(f"- Overall success rate (registered ‚Üí commission response): {(commission_response/total_registered*100):.2f}%")
print(f"- Collected signatures rate (registered ‚Üí met both criteria): {(both_criteria/total_registered*100):.2f}%")
print(f"- Response rate (collected signatures ‚Üí commission response): {(commission_response/both_criteria*100):.2f}%")


[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id='question-3'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">3. How Many Signatures Do ECIs Collect?</p>
Analyzes signature collection performance across all initiatives.

In [None]:
# ==============================================================================
# QUESTION 3: SIGNATURE COLLECTION PERFORMANCE ANALYSIS
# ==============================================================================

# ------------------------------------------------------------------------------
# Data Preparation: Filter Initiatives with Signature Data
# ------------------------------------------------------------------------------

initiatives_with_sigs = df[df['signatures_numeric'].notna()].copy()


# ------------------------------------------------------------------------------
# Summary Statistics: Total Signatures Analysis
# ------------------------------------------------------------------------------

# Total signatures analysis (all time)
total_sigs_stats = pd.DataFrame({
    'Metric': [
        'Total Signatures (All ECIs)',
        'Average Signatures per ECI',
        'Median Signatures per ECI',
        'Minimum Signatures',
        'Maximum Signatures',
        'Total ECIs with Any Signatures'
    ],
    'Value': [
        f"{initiatives_with_sigs['signatures_numeric'].sum():,.0f}",
        f"{initiatives_with_sigs['signatures_numeric'].mean():,.0f}",
        f"{initiatives_with_sigs['signatures_numeric'].median():,.0f}",
        f"{initiatives_with_sigs['signatures_numeric'].min():,.0f}",
        f"{initiatives_with_sigs['signatures_numeric'].max():,.0f}",
        len(initiatives_with_sigs)
    ]
})

# Country threshold analysis
threshold_stats = pd.DataFrame({
    'Metric': ['Minimum', 'Maximum', 'Mean', 'Median', 'Max Possible'],
    'Countries Passed Threshold': [
        initiatives_with_sigs['signatures_threshold_met_numeric'].min(),
        initiatives_with_sigs['signatures_threshold_met_numeric'].max(),
        initiatives_with_sigs['signatures_threshold_met_numeric'].mean().round(2),
        initiatives_with_sigs['signatures_threshold_met_numeric'].median(),
        27  # EU member states
    ]
})

# Correlation between collection duration and signatures
initiatives_with_both = initiatives_with_sigs[initiatives_with_sigs['collection_duration_days'].notna()]

# Signature bracket analysis
initiatives_with_sigs['sig_bracket'] = pd.cut(
    initiatives_with_sigs['signatures_numeric'],
    bins=[0, 100000, 250000, 500000, 750000, 1000000, 10000000],
    labels=['<100K', '100K-250K', '250K-500K', '500K-750K', '750K-1M', '1M+']
)
sig_bracket_analysis = initiatives_with_sigs.groupby('sig_bracket', observed=True).agg({
    'registration_number': 'count',
}).reset_index()
sig_bracket_analysis.columns = ['Signature Bracket', 'Count']


# ------------------------------------------------------------------------------
# VISUALIZATION PREPARATION: Histogram with Gradient Colors
# ------------------------------------------------------------------------------

# Create bins manually
num_bins = 50
bins = np.linspace(0, initiatives_with_sigs['signatures_numeric'].max(), num_bins + 1)

threshold = 1000000
below_bins = bins[bins < threshold]
above_bins = bins[bins >= threshold]

# Create histograms
hist_below, bin_edges_below = np.histogram(
    initiatives_with_sigs[initiatives_with_sigs['signatures_numeric'] < threshold]['signatures_numeric'],
    bins=below_bins
)

hist_above, bin_edges_above = np.histogram(
    initiatives_with_sigs[initiatives_with_sigs['signatures_numeric'] >= threshold]['signatures_numeric'],
    bins=above_bins
)

# Function to get ECI titles for a bin
def get_bin_ecis(bin_start, bin_end):
    bin_ecis = initiatives_with_sigs[
        (initiatives_with_sigs['signatures_numeric'] >= bin_start) & 
        (initiatives_with_sigs['signatures_numeric'] <= bin_end)
    ]['title'].tolist()
    
    if not bin_ecis:
        return "No ECIs"
    elif len(bin_ecis) <= 15:
        return '<br>'.join(f"‚Ä¢ {title}" for title in bin_ecis)
    else:
        titles_text = '<br>'.join(f"‚Ä¢ {title}" for title in bin_ecis[:15])
        titles_text += f"<br><i>... (and {len(bin_ecis) - 15} more)</i>"
        return titles_text

# Prepare ECI lists for each bin
eci_lists_below = [get_bin_ecis(bin_edges_below[i], bin_edges_below[i+1]) for i in range(len(bin_edges_below)-1)]
eci_lists_above = [get_bin_ecis(bin_edges_above[i], bin_edges_above[i+1]) for i in range(len(bin_edges_above)-1)]

bin_centers_below = (bin_edges_below[:-1] + bin_edges_below[1:]) / 2
bin_centers_above = (bin_edges_above[:-1] + bin_edges_above[1:]) / 2

# Create gradient color arrays
colors_below = []
for center in bin_centers_below:
    ratio = center / threshold
    r = int(195 + (255 - 195) * ratio)
    g = int(66 + (244 - 66) * ratio)
    b = int(66 + (79 - 66) * ratio)
    colors_below.append(f'rgb({r},{g},{b})')

colors_above = []
max_sig = initiatives_with_sigs['signatures_numeric'].max()
for center in bin_centers_above:
    ratio = min((center - threshold) / threshold, 1.0)
    r = int(184 - (184 - 60) * ratio)
    g = int(216 - (216 - 163) * ratio)
    b = int(127 - (127 - 113) * ratio)
    colors_above.append(f'rgb({r},{g},{b})')


# ------------------------------------------------------------------------------
# VISUALIZATION: Histogram of Signature Distribution
# ------------------------------------------------------------------------------

fig = go.Figure()

# Add bars for below threshold
fig.add_trace(go.Bar(
    x=bin_centers_below,
    y=hist_below,
    name='Below 1M Threshold',
    marker=dict(color=colors_below, line=dict(color='white', width=0.5)),
    width=np.diff(bin_edges_below),
    customdata=eci_lists_below,
    hovertemplate='<b>Signatures Range:</b> %{x:,.0f}<br>' +
                  '<b>Count:</b> %{y}<br><br>' +
                  '<b>ECIs:</b><br>%{customdata}' +
                  '<extra></extra>'
))

# Add bars for above threshold
fig.add_trace(go.Bar(
    x=bin_centers_above,
    y=hist_above,
    name='Above 1M Threshold',
    marker=dict(color=colors_above, line=dict(color='white', width=0.5)),
    width=np.diff(bin_edges_above),
    customdata=eci_lists_above,
    hovertemplate='<b>Signatures Range:</b> %{x:,.0f}<br>' +
                  '<b>Count:</b> %{y}<br><br>' +
                  '<b>ECIs:</b><br>%{customdata}' +
                  '<extra></extra>'
))

# Add threshold line
fig.add_vline(x=1000000, line_dash="dash", line_color="#3AB23F", line_width=3,
              annotation_text="1M Threshold", annotation_position="top right",
              annotation_font_size=14)

fig.update_layout(
    title='<b>Distribution of Signature Counts (All ECIs with Collection Step)</b>',
    xaxis_title="Signatures Collected",
    yaxis_title="Number of Initiatives",
    height=500,
    showlegend=True,
    font=dict(size=14),
    bargap=0.05,
    xaxis=dict(title=dict(font=dict(size=14))),
    yaxis=dict(title=dict(font=dict(size=14))),
    legend=dict(
        font=dict(size=14)
    )
)

fig.show()


# ------------------------------------------------------------------------------
# OUTPUT: Display Summary Tables
# ------------------------------------------------------------------------------

print("\nTotal Signatures Summary (All Time):")
display(total_sigs_stats.style.hide(axis="index"))

print("\n\nSuccess Rate by Signature Brackets:")
display(sig_bracket_analysis.style.hide(axis="index"))

print("\nCountry Threshold Analysis:")
display(threshold_stats.style.hide(axis="index").format({
    'Countries Passed Threshold': lambda x: f'{x:.2f}' if threshold_stats[threshold_stats['Countries Passed Threshold'] == x]['Metric'].values[0] == 'Mean' else f'{int(x)}'
}))


<a id='question-4'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">4. Which Countries Drive Participation?</p>

Examines the countries that contribute the most signatures and meet their national thresholds the most frequently.<br>
Also shows which nationals from which countries consistently organize successful ECIs versus those with sporadic participation.<br>

NOTE:<br>
> - A country ["passing threshold"](https://citizens-initiative.europa.eu/thresholds_en) (out of 27 possible) means it contributed enough signatures for its population size according to EU requirements (e.g., Germany needs ~69,000, Malta needs ~4,300).<br>
> - The thresholds are disproportionate compared to the country's size. While Germany needs ~0.08% of its population to pass the threshold, Malta needs ~0.96%‚Äînearly **12 times higher** as a percentage of population. This means smaller countries face significantly greater threshold challenges relative to their population.
> - Note that the United Kingdom was an EU member state until January 31, 2020.

In [None]:
# ==============================================================================
# QUESTION 4: MEMBER STATE PARTICIPATION ANALYSIS
# ==============================================================================

# ------------------------------------------------------------------------------
# Geographic Data: Country Mapping Constants
# ------------------------------------------------------------------------------

# Common mapping data
COUNTRY_ISO_MAP = {
    'Germany': 'DEU', 'Spain': 'ESP', 'Netherlands': 'NLD', 'Belgium': 'BEL',
    'France': 'FRA', 'Slovakia': 'SVK', 'Ireland': 'IRL', 'Denmark': 'DNK',
    'Croatia': 'HRV', 'Austria': 'AUT', 'Italy': 'ITA', 'Finland': 'FIN',
    'Hungary': 'HUN', 'Greece': 'GRC', 'Lithuania': 'LTU', 'Poland': 'POL',
    'Luxembourg': 'LUX', 'Sweden': 'SWE', 'Bulgaria': 'BGR', 'Slovenia': 'SVN',
    'Portugal': 'PRT', 'Romania': 'ROU', 'Latvia': 'LVA', 'Malta': 'MLT',
    'Czechia': 'CZE', 'Estonia': 'EST', 'United Kingdom': 'GBR', 'Cyprus': 'CYP'
}

COUNTRY_COORDS = {
    'Germany': (51.0, 10.5), 'Spain': (40.0, -4.0), 'Netherlands': (52.3, 5.3), 
    'Belgium': (50.6, 4.6), 'France': (46.6, 2.3), 'Slovakia': (48.7, 19.5),
    'Ireland': (53.0, -8.0), 'Denmark': (56.0, 10.0), 'Croatia': (45.5, 16.0),
    'Austria': (47.5, 14.5), 'Italy': (42.8, 12.8), 'Finland': (64.0, 26.0),
    'Hungary': (47.0, 19.5), 'Greece': (39.0, 22.0), 'Lithuania': (55.3, 23.9),
    'Poland': (52.0, 19.0), 'Luxembourg': (49.8, 6.1), 'Sweden': (62.0, 15.0),
    'Bulgaria': (42.7, 25.5), 'Slovenia': (46.1, 15.0), 'Portugal': (39.5, -8.0),
    'Romania': (46.0, 25.0), 'Latvia': (57.0, 25.0), 'Malta': (35.9, 14.4),
    'Czechia': (49.8, 15.5), 'Estonia': (59.0, 26.0), 'United Kingdom': (54.0, -2.5),
    'Cyprus': (35.0, 33.0)
}


# ------------------------------------------------------------------------------
# Helper Function: Choropleth Map Creation for European Countries
# ------------------------------------------------------------------------------

def create_country_choropleth_map(
    country_df,
    z_column,
    label_column,
    customdata_columns,
    hover_template,
    title,
    colorbar_title,
    colorscale='Viridis',
    height=700
):
    """
    Create a choropleth map for European countries with text labels.
    
    Parameters:
    - country_df: DataFrame with country data (must have 'Country' column)
    - z_column: Column name for choropleth color values
    - label_column: Column name for text labels on map
    - customdata_columns: List of column names for hover customdata
    - hover_template: String template for hover tooltip
    - title: Map title
    - colorbar_title: Title for the color bar
    - colorscale: Plotly colorscale (default: 'Viridis')
    - height: Map height in pixels (default: 700)
    
    Returns:
    - Plotly figure object
    """
    # Add ISO codes and coordinates if not already present
    if 'ISO' not in country_df.columns:
        country_df['ISO'] = country_df['Country'].map(COUNTRY_ISO_MAP)
    if 'Lat' not in country_df.columns:
        country_df['Lat'] = country_df['Country'].map(lambda x: COUNTRY_COORDS.get(x, (0, 0))[0])
    if 'Lon' not in country_df.columns:
        country_df['Lon'] = country_df['Country'].map(lambda x: COUNTRY_COORDS.get(x, (0, 0))[1])
    
    # Create figure
    fig = go.Figure()
    
    # Add choropleth layer
    fig.add_trace(go.Choropleth(
        locations=country_df['ISO'],
        z=country_df[z_column],
        text=country_df['Country'],
        customdata=country_df[customdata_columns],
        colorscale=colorscale,
        colorbar_title=colorbar_title,
        hovertemplate=hover_template,
        showscale=True
    ))
    
    # Add text labels with outline effect
    offsets = [(-0.15, 0), (0.15, 0), (0, -0.15), (0, 0.15), 
               (-0.1, -0.1), (-0.1, 0.1), (0.1, -0.1), (0.1, 0.1)]
    
    # Add outline layers
    for dx, dy in offsets:
        fig.add_trace(go.Scattergeo(
            lon=country_df['Lon'] + dx,
            lat=country_df['Lat'] + dy,
            text=country_df[label_column],
            mode='text',
            textfont=dict(size=11, color='#4d297f', family='Arial Black'),
            hoverinfo='skip',
            showlegend=False
        ))
    
    # Add white text on top
    fig.add_trace(go.Scattergeo(
        lon=country_df['Lon'],
        lat=country_df['Lat'],
        text=country_df[label_column],
        mode='text',
        textfont=dict(size=11, color='white', family='Arial Black'),
        hoverinfo='skip',
        showlegend=False
    ))
    
    # Configure map layout
    fig.update_geos(
        scope='world',
        projection_type='natural earth',
        showland=True,
        landcolor='rgb(243, 243, 243)',
        coastlinecolor='rgb(204, 204, 204)',
        showcountries=True,
        countrycolor='rgb(204, 204, 204)',
        lataxis_range=[34, 72],
        lonaxis_range=[-14, 36]
    )
    
    fig.update_layout(
        title=title,
        height=height,
        margin=dict(l=0, r=0, t=50, b=0)
    )
    
    return fig


# ==============================================================================
# DATA PROCESSING SECTION
# ==============================================================================

# ------------------------------------------------------------------------------
# Data Extraction: Total Signatures by Country
# ------------------------------------------------------------------------------

# Parse signatures_collected_by_country and aggregate by country
country_signatures = {}
country_eci_details = {}

for idx, row in df.iterrows():
    if pd.notna(row['signatures_collected_by_country']):
        try:
            sig_data = json.loads(row['signatures_collected_by_country'])
            
            for country, data in sig_data.items():
                # Parse signatures (remove commas and asterisks)
                sig_str = data['statements_of_support'].replace(',', '').replace('*', '')
                signatures = int(sig_str)
                
                # Aggregate total signatures
                if country not in country_signatures:
                    country_signatures[country] = 0
                    country_eci_details[country] = []
                
                country_signatures[country] += signatures
                
                # Store ECI details for hover
                country_eci_details[country].append({
                    'title': row['title'],
                    'signatures': signatures,
                    'threshold_met': signatures >= int(data['threshold'].replace(',', ''))
                })
        except:
            continue

# Create DataFrame
country_signatures_df = pd.DataFrame({
    'Country': list(country_signatures.keys()),
    'Total Signatures': list(country_signatures.values())
}).sort_values('Total Signatures', ascending=False)

# Format signatures for display (e.g., "1.2M")
def format_sig_display(sig):
    if sig >= 1_000_000:
        return f"{sig/1_000_000:.1f}M"
    elif sig >= 1_000:
        return f"{sig/1_000:.0f}K"
    else:
        return str(sig)

country_signatures_df['Signatures Display'] = country_signatures_df['Total Signatures'].apply(format_sig_display)

# Prepare ECI lists for hover (top 10 ECIs by signatures for each country)
country_eci_hover = []
for country in country_signatures_df['Country']:
    ecis = country_eci_details[country]
    ecis_sorted = sorted(ecis, key=lambda x: x['signatures'], reverse=True)
    
    if len(ecis_sorted) == 0:
        eci_text = "No ECIs"
    elif len(ecis_sorted) <= 10:
        eci_text = '<br>'.join(f"‚Ä¢ {eci['title']}: {eci['signatures']:,}" for eci in ecis_sorted)
    else:
        eci_text = '<br>'.join(f"‚Ä¢ {eci['title']}: {eci['signatures']:,}" for eci in ecis_sorted[:10])
        eci_text += f"<br><i>... (and {len(ecis_sorted) - 10} more ECIs)</i>"
    country_eci_hover.append(eci_text)

country_signatures_df['ECI_List'] = country_eci_hover


# ------------------------------------------------------------------------------
# Data Extraction: Countries Meeting Signature Thresholds
# ------------------------------------------------------------------------------

# Function to extract countries that met threshold
def extract_countries_met_threshold(row):
    if pd.isna(row['signatures_collected_by_country']):
        return []
    
    try:
        country_data = json.loads(row['signatures_collected_by_country'])
        countries_met = []
        for country, data in country_data.items():
            if isinstance(data, dict) and 'percentage' in data:
                pct_str = data['percentage'].rstrip('%')
                try:
                    pct = float(pct_str)
                    if pct >= 100.0:
                        countries_met.append(country)
                except:
                    pass
        return countries_met
    except:
        return []

df['countries_met_threshold_list'] = df.apply(extract_countries_met_threshold, axis=1)

# Count countries in successful ECIs
all_countries = []
for countries in df[df['successful_eci']]['countries_met_threshold_list']:
    all_countries.extend(countries)

country_counts = Counter(all_countries)
country_participation = pd.DataFrame(country_counts.items(), columns=['Country', 'Times Met Threshold'])
country_participation = country_participation.sort_values('Times Met Threshold', ascending=False)

country_participation['Participation Rate (%)'] = (country_participation['Times Met Threshold'] / len(df[df['successful_eci']]) * 100).round(2)

# Prepare ECI lists for hover
country_eci_lists = []
for country in country_participation['Country']:
    country_ecis = []
    for idx, row in df[df['successful_eci']].iterrows():
        if country in row.get('countries_met_threshold_list', []):
            country_ecis.append(row['title'])
    
    if len(country_ecis) == 0:
        eci_text = "No ECIs"
    elif len(country_ecis) <= 10:
        eci_text = '<br>'.join(f"‚Ä¢ {title}" for title in country_ecis)
    else:
        eci_text = '<br>'.join(f"‚Ä¢ {title}" for title in country_ecis[:10])
        eci_text += f"<br><i>... (and {len(country_ecis) - 10} more)</i>"
    country_eci_lists.append(eci_text)

country_participation['ECI_List'] = country_eci_lists


# ------------------------------------------------------------------------------
# Data Extraction: Organizer Country Analysis
# ------------------------------------------------------------------------------

# Organizer country analysis
def extract_organizer_countries(row):
    try:
        org_data = json.loads(row['organizer_representative'])
        if 'countries_of_residence' in org_data:
            return list(org_data['countries_of_residence'].keys())
        return []
    except:
        return []

df['organizer_countries'] = df.apply(extract_organizer_countries, axis=1)

# Count organizer countries
all_org_countries = []
for countries in df['organizer_countries']:
    all_org_countries.extend(countries)

org_country_counts = Counter(all_org_countries)
org_participation = pd.DataFrame(org_country_counts.items(), columns=['Country', 'Initiatives Organized'])
org_participation = org_participation.sort_values('Initiatives Organized', ascending=False)

# Successful organizer countries
successful_org_countries = []
for countries in df[df['successful_eci']]['organizer_countries']:
    successful_org_countries.extend(countries)

successful_org_counts = Counter(successful_org_countries)
successful_org_participation = pd.DataFrame(successful_org_counts.items(), columns=['Country', 'Successful Initiatives'])
successful_org_participation = successful_org_participation.sort_values('Successful Initiatives', ascending=False)

# Prepare data for organizer comparison chart
org_top10 = org_participation.head(10).iloc[::-1]
successful_top10 = successful_org_participation.head(10).iloc[::-1]

# Add space to country names for visual spacing
org_top10 = org_top10.copy()
org_top10['Country'] = org_top10['Country'] + ' '

successful_top10 = successful_top10.copy()
successful_top10['Country'] = successful_top10['Country'] + ' '

# Prepare ECI lists for organizer countries
org_eci_lists = []
for country in org_top10['Country']:
    country_ecis = df[df['organizer_countries'].apply(lambda x: country.strip() in x)]['title'].tolist()
    if len(country_ecis) <= 15:
        eci_text = '<br>'.join(f"‚Ä¢ {title}" for title in country_ecis)
    else:
        eci_text = '<br>'.join(f"‚Ä¢ {title}" for title in country_ecis[:15])
        eci_text += f"<br><i>... (and {len(country_ecis) - 15} more)</i>"
    org_eci_lists.append(eci_text)

successful_eci_lists = []
for country in successful_top10['Country']:
    country_ecis = df[(df['successful_eci']) & (df['organizer_countries'].apply(lambda x: country.strip() in x))]['title'].tolist()
    if len(country_ecis) <= 15:
        eci_text = '<br>'.join(f"‚Ä¢ {title}" for title in country_ecis)
    else:
        eci_text = '<br>'.join(f"‚Ä¢ {title}" for title in country_ecis[:15])
        eci_text += f"<br><i>... (and {len(country_ecis) - 15} more)</i>"
    successful_eci_lists.append(eci_text)


# ==============================================================================
# VISUALIZATION SECTION
# ==============================================================================

# ------------------------------------------------------------------------------
# VISUALIZATION 1: Total Signatures by Country Map
# ------------------------------------------------------------------------------

fig1 = create_country_choropleth_map(
    country_df=country_signatures_df,
    z_column='Total Signatures',
    label_column='Signatures Display',
    customdata_columns=['Total Signatures', 'ECI_List'],
    hover_template='<b>%{text}</b><br>' +
                   'Total Signatures: %{customdata[0]:,.0f}<br><br>' +
                   '<b>Top ECIs by Signatures:</b><br>%{customdata[1]}' +
                   '<extra></extra>',
    title='<b>Total ECI Signatures by Country (All Time)</b>',
    colorbar_title='Total<br>Signatures',
    colorscale='Viridis'
)
fig1.show()


# ------------------------------------------------------------------------------
# VISUALIZATION 2: Country Participation Map (Threshold Met)
# ------------------------------------------------------------------------------

fig2 = create_country_choropleth_map(
    country_df=country_participation,
    z_column='Participation Rate (%)',
    label_column='Times Met Threshold',
    customdata_columns=['Times Met Threshold', 'ECI_List'],
    hover_template='<b>%{text}</b><br>' +
                   'Times Met Threshold: %{customdata[0]}<br>' +
                   'Participation Rate: %{z:.2f}%<br><br>' +
                   '<b>ECIs:</b><br>%{customdata[1]}' +
                   '<extra></extra>',
    title='<b>Countries Leading ECI Signature Thresholds (Europe)</b>',
    colorbar_title='Participation<br>Rate (%)',
    colorscale='Viridis',
    height=400
)
fig2.show()


# ------------------------------------------------------------------------------
# VISUALIZATION 3: Organizer Countries Comparison (Side-by-Side)
# ------------------------------------------------------------------------------

fig3 = make_subplots(rows=1, cols=2,
                     subplot_titles=('<b>All Initiatives Organized</b>', '<b>Successful Initiatives Organized</b>'),
                     horizontal_spacing=0.15)

fig3.add_trace(
    go.Bar(x=org_top10['Initiatives Organized'],
           y=org_top10['Country'],
           orientation='h',
           marker_color='lightblue',
           showlegend=False,
           customdata=org_eci_lists,
           hovertemplate='<b>%{y}</b><br>' +
                         'Initiatives: %{x}<br><br>' +
                         '<b>ECIs:</b><br>%{customdata}' +
                         '<extra></extra>'),
    row=1, col=1
)

fig3.add_trace(
    go.Bar(x=successful_top10['Successful Initiatives'],
           y=successful_top10['Country'],
           orientation='h',
           marker_color='#3CA371',
           showlegend=False,
           customdata=successful_eci_lists,
           hovertemplate='<b>%{y}</b><br>' +
                         'Successful: %{x}<br><br>' +
                         '<b>ECIs:</b><br>%{customdata}' +
                         '<extra></extra>'),
    row=1, col=2
)

fig3.update_layout(
    height=500, 
    title_text='Countries Organizing ECIs by Organiser Representative Residence',
    xaxis=dict(title=dict(font=dict(size=14))),
    yaxis=dict(title=dict(font=dict(size=14))),
    legend=dict(
        font=dict(size=14)
    )
)
fig3.show()


# ------------------------------------------------------------------------------
# OUTPUT: Display Summary Table
# ------------------------------------------------------------------------------

# Display top 10 countries by signatures
print("\nTop 10 Countries by Total Signatures:")
display(country_signatures_df[['Country', 'Total Signatures']].head(10).style.hide(axis='index').format({'Total Signatures': '{:,.0f}'}))

[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id='question-5'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">5. How Do Success Rates Evolve by Year?</p>

Tracks success rates over time to identify whether organizers are getting better at running campaigns or if the process is becoming harder.

In [None]:
# ==============================================================================
# QUESTION 5: TEMPORAL TRENDS AND YEAR-OVER-YEAR ANALYSIS
# ==============================================================================

# ------------------------------------------------------------------------------
# Basic Yearly Statistics Aggregation
# ------------------------------------------------------------------------------

yearly_stats = df.groupby('registration_year').agg({
    'registration_number': 'count',
    'reached_signatures': 'sum',
    'met_country_threshold': 'sum',
    'successful_eci': 'sum',
    'commission_responded': 'sum'
}).reset_index()

yearly_stats.columns = ['Year', 'Total Registered', 'Reached 1M', 'Met Country Threshold', 'Successful', 'Commission Response']
yearly_stats['Success Rate (%)'] = (yearly_stats['Successful'] / yearly_stats['Total Registered'] * 100).round(2)
yearly_stats['Response Rate (%)'] = (yearly_stats['Commission Response'] / yearly_stats['Total Registered'] * 100).round(2)

# Calculate Failed count
yearly_stats['Failed'] = yearly_stats['Total Registered'] - yearly_stats['Successful']
yearly_stats['Successful No Response'] = yearly_stats['Successful'] - yearly_stats['Commission Response']


# ------------------------------------------------------------------------------
# Detailed Yearly Statistics with In-Progress Tracking
# ------------------------------------------------------------------------------

# Prepare custom hover data with ECI titles for each category and year
years = sorted(df['registration_year'].unique())

# Calculate yearly stats including in-progress initiatives
yearly_stats_detailed = df.groupby('registration_year').agg({
    'registration_number': 'count',
    'reached_signatures': 'sum',
    'met_country_threshold': 'sum',
    'successful_eci': 'sum',
    'commission_responded': 'sum'
}).reset_index()

yearly_stats_detailed.columns = ['Year', 'Total Registered', 'Reached 1M', 'Met Country Threshold', 'Successful', 'Commission Response']

# Calculate in-progress initiatives (not failed, not successful yet)
# In progress: current_status is 'Collection ongoing', 'Verification', 'Valid initiative', etc.
in_progress_by_year = df[df['final_outcome'].isna()].groupby('registration_year').size().reset_index(name='In Progress')
yearly_stats_detailed = yearly_stats_detailed.merge(in_progress_by_year, left_on='Year', right_on='registration_year', how='left').drop('registration_year', axis=1)

# Calculate truly failed (unsuccessful or withdrawn)
failed_by_year = df[df['final_outcome'].isin(['Unsuccessful Collection', 'Withdrawn'])].groupby('registration_year').size().reset_index(name='Failed')
yearly_stats_detailed = yearly_stats_detailed.merge(failed_by_year, left_on='Year', right_on='registration_year', how='left').drop('registration_year', axis=1)

# Calculate successful without response
yearly_stats_detailed['Successful No Response'] = yearly_stats_detailed['Successful'] - yearly_stats_detailed['Commission Response']


# ------------------------------------------------------------------------------
# Prepare Hover Text Data for Each Category and Year
# ------------------------------------------------------------------------------

# Create hover text for each category
failed_hover = []
in_progress_hover = []
successful_no_response_hover = []
commission_response_hover = []

for year in years:
    year_df = df[df['registration_year'] == year]
    
    # Failed ECIs
    failed_ecis = year_df[year_df['final_outcome'].isin(['Unsuccessful Collection', 'Withdrawn'])]['title'].tolist()
    failed_hover.append('<br>'.join([f'‚Ä¢ {title}' for title in failed_ecis]) if failed_ecis else 'None')
    
    # In Progress ECIs
    in_progress_ecis = year_df[year_df['final_outcome'].isna()]['title'].tolist()
    in_progress_hover.append('<br>'.join([f'‚Ä¢ {title}' for title in in_progress_ecis]) if in_progress_ecis else 'None')
    
    # Successful No Response ECIs
    successful_no_resp = year_df[(year_df['successful_eci']) & (~year_df['commission_responded'])]['title'].tolist()
    successful_no_response_hover.append('<br>'.join([f'‚Ä¢ {title}' for title in successful_no_resp]) if successful_no_resp else 'None')
    
    # Commission Responded ECIs
    commission_resp = year_df[year_df['commission_responded'] == True]['title'].tolist()
    commission_response_hover.append('<br>'.join([f'‚Ä¢ {title}' for title in commission_resp]) if commission_resp else 'None')


# ------------------------------------------------------------------------------
# VISUALIZATION: Stacked Bar Chart - ECI Outcomes by Year
# ------------------------------------------------------------------------------

fig = go.Figure()

# Add stacked bars with custom hover data
fig.add_trace(go.Bar(
    x=yearly_stats_detailed['Year'],
    y=yearly_stats_detailed['Failed'],
    name='Failed',
    marker_color='#C34242',
    customdata=failed_hover,
    hovertemplate='<b>Failed</b><br>Year: %{x}<br>Count: %{y}<br><br>ECIs:<br>%{customdata}<extra></extra>'
))

fig.add_trace(go.Bar(
    x=yearly_stats_detailed['Year'],
    y=yearly_stats_detailed['In Progress'],
    name='In Progress',
    marker_color='#6C9BD1',
    customdata=in_progress_hover,
    hovertemplate='<b>In Progress</b><br>Year: %{x}<br>Count: %{y}<br><br>ECIs:<br>%{customdata}<extra></extra>'
))

fig.add_trace(go.Bar(
    x=yearly_stats_detailed['Year'],
    y=yearly_stats_detailed['Successful No Response'],
    name='Successful',
    marker_color='#F0B840',
    customdata=successful_no_response_hover,
    hovertemplate='<b>Successful (No Response)</b><br>Year: %{x}<br>Count: %{y}<br><br>ECIs:<br>%{customdata}<extra></extra>'
))

fig.add_trace(go.Bar(
    x=yearly_stats_detailed['Year'],
    y=yearly_stats_detailed['Commission Response'],
    name='Commission Responded',
    marker_color='#3CA371',
    customdata=commission_response_hover,
    hovertemplate='<b>Commission Responded</b><br>Year: %{x}<br>Count: %{y}<br><br>ECIs:<br>%{customdata}<extra></extra>'
))

fig.update_layout(
    title='<b>ECI Outcomes by Registration Year</b>',
    xaxis_title='Registration Year',
    yaxis_title='Number of Initiatives',
    barmode='stack',
    height=600,
    font=dict(size=14),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
    ),
    xaxis=dict(title=dict(font=dict(size=14))),
    yaxis=dict(title=dict(font=dict(size=14))),
)

fig.show()


# ------------------------------------------------------------------------------
# OUTPUT: Trend Analysis Summary
# ------------------------------------------------------------------------------

print("\n\nTrend Analysis by Success Rate:")
trend_analysis = pd.DataFrame({
    'Metric': [
        'Best Year for Success Rate',
        'Worst Year for Success Rate',
        'Average Success Rate'
    ],
    'Value': [
        f"{yearly_stats.loc[yearly_stats['Success Rate (%)'].idxmax(), 'Year']:.0f} ({yearly_stats['Success Rate (%)'].max():.2f}%)",
        f"{yearly_stats.loc[yearly_stats['Success Rate (%)'].idxmin(), 'Year']:.0f} ({yearly_stats['Success Rate (%)'].min():.2f}%)",
        f"{yearly_stats['Success Rate (%)'].mean():.2f}%"
    ]
})

display(trend_analysis.style.hide(axis="index"))


# =============================================================================
# SUCCESSFUL ECIs BY YEAR ANALYSIS
# =============================================================================

# Note: The 'successful' dataframe is created in section 7, so we'll recreate it here
successful_ecis = df[df['successful_eci'] == True].copy()

successful_by_year = successful_ecis.groupby('registration_year').size().reset_index()
successful_by_year.columns = ['Year', 'Count']

print(f"\n\nSuccessful ECIs by year:")
display(successful_by_year.style.hide(axis='index'))

# Trend analysis
print(f"\nTotal successful ECIs: {len(successful_ecis)}")
print(f"Best year: {successful_by_year.loc[successful_by_year['Count'].idxmax(), 'Year']} with {successful_by_year['Count'].max()} successful ECIs")
print(f"Average successful ECIs per year: {successful_by_year['Count'].mean():.1f}")

# =============================================================================
# COLLECTION START MONTH CORRELATION ANALYSIS
# =============================================================================

# Extract month from collection start date
df['collection_start_month'] = df['collection_start_date'].dt.month

# Filter ECIs with both collection start date and signatures
ecis_with_dates = df[
    (df['collection_start_month'].notna()) & 
    (df['signatures_numeric'].notna())
]

# Calculate correlation
correlation = ecis_with_dates[['collection_start_month', 'signatures_numeric']].corr().iloc[0, 1]

# Interpret correlation strength
abs_corr = abs(correlation)
strength = define_correlation(abs_corr)

# Dynamic interpretation based on correlation direction and strength
if abs_corr < 0.1:
    interpretation = "Collection start month has no meaningful relationship with signature success"
elif abs_corr < 0.3:
    if correlation < 0:
        interpretation = "ECIs starting earlier in the year show slightly better signature collection,\nbut the effect is minimal"
    else:
        interpretation = "ECIs starting later in the year show slightly better signature collection,\nbut the effect is minimal"
elif abs_corr < 0.5:
    if correlation < 0:
        interpretation = "ECIs starting earlier in the year tend to collect more signatures"
    else:
        interpretation = "ECIs starting later in the year tend to collect more signatures"
else:
    if correlation < 0:
        interpretation = "Early-year launches are strongly associated with higher signature counts"
    else:
        interpretation = "Late-year launches are strongly associated with higher signature counts"

print(f"\nCorrelation between start month and signatures: {correlation:.3f}")
print(f"Strength: {strength}")
print(f"\nüí° Note:\n{interpretation};\ncampaign quality and topic remain the critical success factors.")

[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id='question-6'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">6. How Long Do ECIs Take at Each Stage?</p>

This section measures time durations between key milestones to understand process efficiency and bureaucratic delays. Most initiatives start collection immediately after registration (median 0 days). The Commission takes a median 520 days (1.4 years) to respond after collection ends.<br><br>

NOTE:<br>
> - Collection periods exceeding 365 days occur when initiatives use the allowed extensions due to technical problems or special circumstances. The first registered ECIs (including Right2Water and Fraternit√© 2020) received nearly 6-month extensions due to [initial problems](https://www.europarl.europa.eu/RegData/etudes/BRIE/2017/614627/EPRS_BRI(2017)614627_EN.pdf) with the online signature collection system. The early software was incompatible with mobile devices and had numerous technical errors that impeded signature collection. Later initiatives [received COVID-19 pandemic extensions](https://eustrat.uni-nke.hu/hirek/2020/04/28/impact-of-the-coronavirus-on-eci-procedures-will-the-commission-extend-deadlines) of up to 12 months total when physical signature gathering became impossible. The fastest collection was ["Ban glyphosate" at 158 days](https://citizens-initiative.europa.eu/initiatives/details/2017/000002_en) due to strong mobilization by environmental and health organizations across Europe, while "Cohesion policy for the equality of the regions and sustainability of the regional cultures" took 731 days (2 years) partly due to pandemic disruptions.<br><br>
> - The longest wait for Commission response was 1,580 days (4.3 years) for the "Cohesion policy for the equality of the regions and sustainability of the regional cultures" initiative. This ECI faced extraordinary delays: the Commission initially [refused registration](https://curia.europa.eu/jcms/upload/docs/application/pdf/2024-02/cp240034en.pdf) in 2013, but the Court of Justice [ruled this refusal unlawful](https://citizens-initiative.europa.eu/news/court-final-judgment-commission-decision-registration-eci-cohesion-policy-equality-regions-and_en) in March 2020, forcing registration. After successfully collecting 1,269,351 signatures with pandemic extensions, organizers [waited over four years](https://citizens-initiative.europa.eu/initiatives/details/2019/000007_en) for the Commission's response. In September 2025, the [Commission finally responded](https://citizens-initiative.europa.eu/cohesion-policy-equality-regions-and-sustainability-regional-cultures_en) by declining to take any legislative action on the minority rights and regional funding proposals. The Commission avoided substantive engagement with the initiative despite its broad citizen support, and when organizers appealed to the European Court of Justice, the Court issued a [technical rejection without addressing the substantive arguments](https://hhrf.org/2025/07/23/european-court-of-justice-dismisses-minority-safepack-appeal/) in June 2025. The excessive delay highlights how organizers of politically controversial ECIs must navigate not only signature collection but also years of legal battles and institutional foot-dragging before receiving substantive responses, and even then, not succeed.


In [None]:
# =============================================================================
# DURATION METRICS: COLUMN CALCULATIONS
# =============================================================================

# Calculate time between key milestones
df['registration_to_collection_days'] = (df['collection_start_date'] - df['registered_date']).dt.days
df['collection_to_response_days'] = (df['commission_response_date'] - df['collection_closed_date']).dt.days

# =============================================================================
# FILTER RESPONDED INITIATIVES
# =============================================================================

# Filter for responded initiatives
responded = df[df['commission_responded']]

# =============================================================================
# SUMMARY TABLE: DURATION ANALYSIS
# =============================================================================

# Create consolidated duration analysis DataFrame
duration_analysis = pd.DataFrame({
    'Period': [
        'Registration ‚Üí Collection Start',
        'Collection Period Duration',
        'Collection End ‚Üí Commission Response',
        'Registration ‚Üí Commission Response'
    ],
    'Count': [
        df['registration_to_collection_days'].notna().sum(),
        df['collection_duration_days'].notna().sum(),
        df['collection_to_response_days'].notna().sum(),
        responded['time_to_commission_response_days'].notna().sum()
    ],
    'Median Days': [
        df['registration_to_collection_days'].median(),
        df['collection_duration_days'].median(),
        df['collection_to_response_days'].median(),
        responded['time_to_commission_response_days'].median()
    ],
    'Mean Days': [
        df['registration_to_collection_days'].mean(),
        df['collection_duration_days'].mean(),
        df['collection_to_response_days'].mean(),
        responded['time_to_commission_response_days'].mean()
    ],
    'Min Days': [
        df['registration_to_collection_days'].min(),
        df['collection_duration_days'].min(),
        df['collection_to_response_days'].min(),
        responded['time_to_commission_response_days'].min()
    ],
    'Max Days': [
        df['registration_to_collection_days'].max(),
        df['collection_duration_days'].max(),
        df['collection_to_response_days'].max(),
        responded['time_to_commission_response_days'].max()
    ]
})

# Round numeric columns for readability
duration_analysis['Median Days'] = duration_analysis['Median Days'].round(1)
duration_analysis['Mean Days'] = duration_analysis['Mean Days'].round(1)
duration_analysis['Min Days'] = duration_analysis['Min Days'].fillna(0).astype(int)
duration_analysis['Max Days'] = duration_analysis['Max Days'].fillna(0).astype(int)
duration_analysis['Count'] = duration_analysis['Count'].astype(int)

print("\nDuration Analysis by Milestone:")
display(duration_analysis.style.hide(axis="index").format({
    'Count': '{:,}',
    'Min Days': '{:,}',
    'Max Days': '{:,}',
    'Median Days': '{:,.0f}',
    'Mean Days': '{:,.1f}',
}))

# =============================================================================
# HISTOGRAM VISUALIZATION UTILITIES
# =============================================================================

# üìä DURATION VISUALIZATION FUNCTION
def create_duration_histogram(df, column_name, title, color_scheme, bins=30):
    """
    Create histogram visualization for duration analysis.
    
    Parameters:
    - df: DataFrame to analyze
    - column_name: Column containing the duration data
    - title: Chart title
    - color_scheme: Tuple of ((r1,g1,b1), (r2,g2,b2)) for gradient (start_color, end_color)
    - bins: Number of histogram bins (default: 30)
    """
    
    # Filter data
    data = df[df[column_name].notna()].copy()
    
    # Create histogram
    hist, edges = np.histogram(data[column_name], bins=bins)
    bin_centers = (edges[:-1] + edges[1:]) / 2
    
    # Create bin range labels (smart: single value or range)
    bin_ranges = []
    for i in range(len(edges) - 1):

        if i == len(edges) - 2:  # Last bin (inclusive)

            # Last bin includes upper boundary to capture maximum values
            bin_values = data[(data[column_name] >= edges[i]) & 
                            (data[column_name] <= edges[i+1])][column_name].values
        else:
            # Regular bins exclude upper boundary to prevent overlap between adjacent bins
            bin_values = data[(data[column_name] >= edges[i]) & 
                            (data[column_name] < edges[i+1])][column_name].values


        
        if len(bin_values) > 0:

            unique_values = np.unique(bin_values)

            if len(unique_values) == 1:
                # All values in bin are the same - show single value
                # Example: bin_values = [365, 365, 365, 365] ‚Üí returns "365"
                bin_ranges.append(f"{int(unique_values[0])}")

            else:
                # Multiple different values - show range
                # Example: bin_values = [365, 370, 380, 390] 
                # with edges[i]=365, edges[i+1]=400 ‚Üí returns "365-400"
                bin_ranges.append(f"{int(edges[i])}-{int(edges[i+1])}")
        else:
            # Empty bin - show range as fallback
            # Example: no values in bin 
            # with edges[i]=50, edges[i+1]=60 ‚Üí returns "50-60"
            bin_ranges.append(f"{int(edges[i])}-{int(edges[i+1])}")
    
    # Group ECIs by bin
    hover_text = []
    for i in range(len(edges) - 1):
        if i == len(edges) - 2:  # Last bin (inclusive)
            # Last bin includes upper boundary to ensure maximum duration values are captured with their titles
            # Example: If max is 731 days, this ensures "Ban glyphosate..." (with 731 days) is included
            bin_ecis = data[(data[column_name] >= edges[i]) & 
                           (data[column_name] <= edges[i+1])]['title'].tolist()
        else:
            # Regular bins exclude upper boundary to prevent titles from appearing in multiple bins
            # Example: ECI with exactly 365 days appears only in [365-400) bin, 
            # not in both [365-400) and [330-365)
            bin_ecis = data[(data[column_name] >= edges[i]) & 
                           (data[column_name] < edges[i+1])]['title'].tolist()
        
        # Limit to 15 titles
        if not bin_ecis:
            titles_text = "No ECIs"
        elif len(bin_ecis) <= 15:
            titles_text = '<br>'.join(f"‚Ä¢ {title}" for title in bin_ecis)
        else:
            titles_text = '<br>'.join(f"‚Ä¢ {title}" for title in bin_ecis[:15])
            titles_text += f"<br><i>... (and {len(bin_ecis) - 15} more)</i>"
        
        hover_text.append(titles_text)
    
    # Create gradient colors
    (r1, g1, b1), (r2, g2, b2) = color_scheme
    colors = []
    for i in range(len(bin_centers)):
        ratio = i / len(bin_centers)
        r = int(r1 + (r2 - r1) * ratio)
        g = int(g1 + (g2 - g1) * ratio)
        b = int(b1 + (b2 - b1) * ratio)
        colors.append(f'rgb({r},{g},{b})')
    
    # Create figure
    fig = go.Figure(go.Bar(
        x=bin_centers,
        y=hist,
        marker=dict(color=colors, line=dict(color='white', width=0.5)),
        width=np.diff(edges),
        customdata=list(zip(bin_ranges, hover_text)),
        hovertemplate=f'<b>{title}</b><br><b>Days:</b> %{{customdata[0]}}<br><b>Count:</b> %{{y}}<br><br><b>ECIs:</b><br>%{{customdata[1]}}<extra></extra>'
    ))
    
    fig.update_layout(
        title=title,
        xaxis_title='Days',
        yaxis_title='Number of Initiatives',
        height=400,
        font=dict(size=14),
        xaxis=dict(title=dict(font=dict(size=14))),
        yaxis=dict(title=dict(font=dict(size=14))),
    )
    
    return fig

# =============================================================================
# STATS TABLE UTILITIES
# =============================================================================

def create_duration_stats_table(df, column_name, period_name):
    """
    Create a summary table showing min, median, and max examples for a duration metric.
    Ensures unique ECI examples when possible and prioritizes ECIs with non-NaN signatures.
    
    Parameters:
    - df: DataFrame to analyze
    - column_name: Column containing the duration data
    - period_name: Descriptive name for the period (e.g., "Registration ‚Üí Collection Start")
    
    Returns:
    - DataFrame with columns: Metric, Title, Days, Signatures_Collected, Successful_ECI
    """
    # Filter to only rows with valid duration data
    valid_data = df[df[column_name].notna()].copy()
    
    if len(valid_data) == 0:
        return pd.DataFrame(columns=['Metric', 'Title', 'Days', 'Signatures_Collected', 'Successful_ECI'])
    
    # Calculate statistics
    min_val = valid_data[column_name].min()
    median_val = valid_data[column_name].median()
    max_val = valid_data[column_name].max()
    
    # Get all ECIs for each statistic
    min_ecis = valid_data[valid_data[column_name] == min_val]
    median_ecis = valid_data[valid_data[column_name] == median_val]
    max_ecis = valid_data[valid_data[column_name] == max_val]
    
    # Helper function to select best candidate (prioritize non-NaN signatures and uniqueness)
    def select_best_candidate(candidates, used_indices):
        # First try: unique ECIs with non-NaN signatures
        preferred = candidates[(~candidates.index.isin(used_indices)) & 
                              (candidates['signatures_collected'].notna())]
        if len(preferred) > 0:
            return preferred.iloc[0]
        
        # Second try: any unique ECI (even with NaN signatures)
        unique_candidates = candidates[~candidates.index.isin(used_indices)]
        if len(unique_candidates) > 0:
            return unique_candidates.iloc[0]
        
        # Third try: non-unique but with non-NaN signatures
        with_sigs = candidates[candidates['signatures_collected'].notna()]
        if len(with_sigs) > 0:
            return with_sigs.iloc[0]
        
        # Last resort: any candidate
        return candidates.iloc[0]
    
    # Select unique examples when possible
    used_indices = set()
    
    # Pick min example
    min_eci = select_best_candidate(min_ecis, used_indices)
    used_indices.add(min_eci.name)
    
    # Pick median example
    median_eci = select_best_candidate(median_ecis, used_indices)
    used_indices.add(median_eci.name)
    
    # Pick max example
    max_eci = select_best_candidate(max_ecis, used_indices)
    
    # Build results DataFrame
    stats_table = pd.DataFrame({
        'Case': ['Min', 'Median', 'Max'],
        'Days': [
            int(min_val),
            int(median_val),
            int(max_val)
        ],
        'Title': [
            min_eci['title'],
            median_eci['title'],
            max_eci['title']
        ],
        'Signatures_Collected': [
            min_eci['signatures_collected'],
            median_eci['signatures_collected'],
            max_eci['signatures_collected']
        ],
        'Successful_ECI': [
            min_eci['successful_eci'],
            median_eci['successful_eci'],
            max_eci['successful_eci']
        ]
    })
    
    print(f"\n{period_name} - Key Examples:")
    display(stats_table.style.hide(axis='index'))

# =============================================================================
# VISUALIZATION 1: REGISTRATION ‚Üí COLLECTION START
# =============================================================================

fig1 = create_duration_histogram(
    df=df,
    column_name='registration_to_collection_days',
    title='<b>Registration ‚Üí Collection Start</b>',
    color_scheme=((200, 240, 220), (15, 100, 120))  # Light teal to dark teal
)
fig1.show()

stats1 = create_duration_stats_table(
    df=df,
    column_name='registration_to_collection_days',
    period_name='Registration ‚Üí Collection Start'
)


# =============================================================================
# VISUALIZATION 2: COLLECTION PERIOD DURATION
# =============================================================================

fig2 = create_duration_histogram(
    df=df,
    column_name='collection_duration_days',
    title='<b>Collection Period Duration</b>',
    color_scheme=((255, 200, 150), (180, 80, 20))  # Light orange to dark orange
)
fig2.show()

stats2 = create_duration_stats_table(
    df=df,
    column_name='collection_duration_days',
    period_name='Collection Period Duration'
)


# =============================================================================
# VISUALIZATION 3: COLLECTION END ‚Üí COMMISSION RESPONSE
# =============================================================================

fig3 = create_duration_histogram(
    df=df,
    column_name='collection_to_response_days',
    title='<b>Collection End ‚Üí Commission Response</b>',
    color_scheme=((200, 250, 150), (20, 120, 50))  # Light green to dark green
)
fig3.show()

stats3 = create_duration_stats_table(
    df=df,
    column_name='collection_to_response_days',
    period_name='Collection End ‚Üí Commission Response'
)


# =============================================================================
# VISUALIZATION 4: REGISTRATION ‚Üí COMMISSION RESPONSE
# =============================================================================

responded = df[df['commission_responded']]
fig4 = create_duration_histogram(
    df=responded,
    column_name='time_to_commission_response_days',
    title='<b>Registration ‚Üí Commission Response</b>',
    color_scheme=((255, 255, 150), (100, 140, 20))  # Light yellow to dark yellow
)
fig4.show()

stats4 = create_duration_stats_table(
    df=responded,
    column_name='time_to_commission_response_days',
    period_name='Registration ‚Üí Commission Response'
)

[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id='question-7'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">7. Which Policy Areas Succeed Most Often?</p>

Categorizes initiatives by topic to answer which policy domains achieve higher success rates and whether certain types of issues mobilize European citizens more effectively. Whether abstract governance topics (like democracy and transparency) perform differently from concrete, tangible issues (like animal welfare or environmental protection).<br><br>
NOTE:<br>
> - Policy area categories are not mutually exclusive‚Äîan initiative about pesticides could be tagged as both "Agriculture" and "Environment," potentially affecting these counts. These categories were assigned using keyword-based classification rather than official EU policy taxonomy, since the European Commission does not provide standardized policy area classifications for ECIs on the official register. While the categorization is somewhat arbitrary, it remains helpful for identifying at first glance which types of issues resonate most strongly with EU citizens.

In [None]:
# --- 4) Analysis output (same as before, but use primary_policy_area) ---
policy_dist = df["primary_policy_area"].value_counts().reset_index()
policy_dist.columns = ["Policy Area", "Count"]
policy_dist["Percentage"] = (policy_dist["Count"] / len(df) * 100).round(2)

print("\n\nSuccess Rate by Policy Area:")
policy_success = df.groupby("primary_policy_area").agg({
    "registration_number": "count",
    "successful_eci": "sum",
    "commission_responded": "sum",
    "signatures_numeric": ["mean", "median"]
}).reset_index()
policy_success.columns = ["Policy Area", "Total", 'Successful', "Commission Response", "Avg Signatures", "Median Signatures"]
policy_success["Success Rate (%)"] = (policy_success['Successful'] / policy_success["Total"] * 100).round(2)

# Round signature columns for readability
policy_success["Avg Signatures"] = policy_success["Avg Signatures"].round(0).astype(int)
policy_success["Median Signatures"] = policy_success["Median Signatures"].fillna(0).astype(int)

# --- 4) Analysis output: Pie chart for policy area distribution ---
policy_dist = df["primary_policy_area"].value_counts().reset_index()
policy_dist.columns = ["Policy Area", "Count"]
policy_dist["Percentage"] = (policy_dist["Count"] / len(df) * 100).round(2)

# Create pie chart using reusable function
fig_pie = create_pie_chart_with_eci_lists(
    df=df,
    values_column='primary_policy_area',
    names_column='primary_policy_area',
    title='Registered by Primary Policy Area',
    filter_column='primary_policy_area',
    filter_values_dict={},
    color_map=policy_area_colors,
    hole=0.1,
    height=600
)
fig_pie.show()

# üìä VISUALIZATION: Grouped Bar Chart - Policy Area Performance

# Prepare custom hover data for each trace separately
def prepare_hover_data(policy_area, trace_type):
    """
    trace_type: 'Total', 'Collected Signatures', or 'Commission Response'
    """
    # Get all ECIs in this policy area
    ecis_df = df[df['primary_policy_area'] == policy_area].copy()
    
    # Filter based on trace type
    if trace_type == 'Collected Signatures':
        ecis_df = ecis_df[ecis_df['successful_eci'] == True]
    elif trace_type == 'Commission Response':
        ecis_df = ecis_df[ecis_df['commission_responded'] == True]
    # For 'Total', no filtering needed
    
    # Build the ECI list (show first 15)
    eci_items = []
    for _, row in ecis_df.head(15).iterrows():
        eci_items.append(f"‚Ä¢ {row['title']}")
    
    eci_list = '<br>'.join(eci_items)
    
    # Add "and X more" if applicable
    if len(ecis_df) > 15:
        eci_list += f'<br><i>... (and {len(ecis_df) - 15} more)</i>'
    
    return eci_list, len(ecis_df)

# Create figure
fig = go.Figure()

# Prepare hover data for each trace
total_hover = []
waiting_hover = []
commission_hover = []

for policy_area in policy_success['Policy Area']:
    total_list, total_count = prepare_hover_data(policy_area, 'Total')
    waiting_list, waiting_count = prepare_hover_data(policy_area, 'Collected Signatures')
    comm_list, comm_count = prepare_hover_data(policy_area, 'Commission Response')
    
    total_hover.append((total_list, total_count, 'Total Initiatives'))
    waiting_hover.append((waiting_list, waiting_count, 'Collected Signatures'))
    commission_hover.append((comm_list, comm_count, 'Commission Response'))

# Add traces with custom hover templates
fig.add_trace(go.Bar(
    x=policy_success['Policy Area'],
    y=policy_success['Total'],
    name='Total Initiatives',
    marker_color='lightblue',
    customdata=total_hover,
    hovertemplate='<b>%{x}</b><br>' +
                  '<b>Count:</b> %{customdata[1]}<br>' +
                  '<b>Type:</b> %{customdata[2]}<br><br>' +
                  '<b>ECIs:</b><br>%{customdata[0]}' +
                  '<extra></extra>'
))

fig.add_trace(go.Bar(
    x=policy_success['Policy Area'],
    y=policy_success['Successful'],
    name='Collected Signatures',
    marker_color='#F5A623',
    customdata=waiting_hover,
    hovertemplate='<b>%{x}</b><br>' +
                  '<b>Count:</b> %{customdata[1]}<br>' +
                  '<b>Type:</b> %{customdata[2]}<br><br>' +
                  '<b>ECIs:</b><br>%{customdata[0]}' +
                  '<extra></extra>'
))

fig.add_trace(go.Bar(
    x=policy_success['Policy Area'],
    y=policy_success['Commission Response'],
    name='Commission Response',
    marker_color='#3CA371',
    customdata=commission_hover,
    hovertemplate='<b>%{x}</b><br>' +
                  '<b>Count:</b> %{customdata[1]}<br>' +
                  '<b>Type:</b> %{customdata[2]}<br><br>' +
                  '<b>ECIs:</b><br>%{customdata[0]}' +
                  '<extra></extra>'
))

fig.update_layout(
    title='<b>ECI Performance by Policy Area</b>',
    xaxis_title='Policy Area',
    yaxis_title='Number of Initiatives',
    barmode='group',
    height=600,
    xaxis={'categoryorder':'total descending'}
)

fig.show()

# üìä VISUALIZATION: Success Rate bar chart by Policy Area
policy_sorted = policy_success.sort_values('Success Rate (%)', ascending=True)

# Prepare ECI lists for each policy area
policy_eci_lists = []
for policy in policy_sorted['Policy Area']:
    ecis = df[df['primary_policy_area'] == policy]['title'].tolist()
    if len(ecis) <= 15:
        eci_text = '<br>'.join(f"‚Ä¢ {title}" for title in ecis)
    else:
        eci_text = '<br>'.join(f"‚Ä¢ {title}" for title in ecis[:15])
        eci_text += f"<br><i>... (and {len(ecis) - 15} more)</i>"
    policy_eci_lists.append(eci_text)

fig2 = px.bar(policy_sorted,
              x='Success Rate (%)', y='Policy Area',
              orientation='h',
              title='<b>Success Rate (%) by Policy Area</b>',
              color='Success Rate (%)',
              color_continuous_scale='RdYlGn',
              text='Success Rate (%)',
              custom_data=[policy_eci_lists])

fig2.update_traces(
    texttemplate='%{text:.1f}%', 
    textposition='outside',
    hovertemplate='<b>%{y}</b><br>' +
                  '<b>Success Rate:</b> %{x:.2f}%<br><br>' +
                  '<b>ECIs:</b><br>%{customdata[0]}' +
                  '<extra></extra>'
)
fig2.update_layout(
    height=500,
    xaxis=dict(title=dict(font=dict(size=14))),
    yaxis=dict(title=dict(font=dict(size=14))),
    legend=dict(
        font=dict(size=14)
    )
    )
fig2.show()

# =============================================================================
# üìä VISUALIZATION: SCATTER PLOT OF SUCCESSFUL ECIs
# =============================================================================

# Prepare data for scatter plot
successful = df[df['successful_eci'] == True].copy()

successful_by_year = successful.groupby('registration_year').size().reset_index()
successful_by_year.columns = ['Year', 'Count']

successful_by_policy = successful['primary_policy_area'].value_counts().reset_index()
successful_by_policy.columns = ['Policy Area', 'Count']

successful_detail = successful[['registration_number', 'title', 'registration_year', 
                                  'signatures_numeric', 'signatures_threshold_met_numeric',
                                  'primary_policy_area', 'commission_responded']].copy()
successful_detail = successful_detail.sort_values('signatures_numeric', ascending=False)
successful_detail.columns = ['Reg #', 'Title', 'Year', 'Signatures', 'Countries', 'Policy Area', 'Commission Responded']

successful_plot = successful.copy()
successful_plot['title_short'] = successful_plot['title'].str[:50] + '...'

# Apply power scaling (e.g., signatures^4.3)
successful_plot['size_scaled'] = successful_plot['signatures_numeric'] ** 4.3

# Get policy area order by count (most to least)
policy_order = successful_plot['primary_policy_area'].value_counts().index.tolist()

# =============================================================================
# COLOR MAP FOR POLICY AREAS
# =============================================================================

color_map = {
    'Agriculture & Fisheries & Animal rights': '#8bc34a',  # Green (nature/plants)
    'Democracy & Citizens\' rights': '#5c6bc0',             # Indigo (official/formal)
    'Social Policy': '#ec407a',                             # Pink (community/people)
    'Health': '#ef5350',                                    # Red (medical/health)
    'Environment & Climate': '#66bb6a',                     # Forest green (nature)
    'Consumer protection': '#ffa726',                       # Orange (warning/safety)
    'Transport': '#42a5f5',                                 # Blue (sky/movement)
    'Economy & Finance': '#fdd835',                         # Yellow (gold/money)
    'Digital & Communications': '#7e57c2',                  # Purple (technology)
    'Education & Culture': '#26c6da',                       # Cyan (knowledge/creativity)
    'Joke ECI': '#bdbdbd',                                  # Grey (neutral)
    'Other': '#9e9e9e'                                      # Grey (neutral)
}

# =============================================================================
# BUILD SCATTER FIGURE
# =============================================================================

fig = px.scatter(
    successful_plot, 
    x='signatures_threshold_met_numeric', 
    y='signatures_numeric',
    size='size_scaled',
    color='primary_policy_area',
    custom_data=['title', 'registration_year', 'primary_policy_area'],  # Explicitly set custom data
    title='<b>Successful ECIs: Policy Area and Signatures vs Countries Meeting Threshold</b>',
    labels={
        'signatures_threshold_met_numeric': 'Number of Countries Meeting Threshold',
        'signatures_numeric': 'Total Signatures Collected',
        'primary_policy_area': 'Policy Area'
    },
    category_orders={'primary_policy_area': policy_order},  # Order legend
    color_discrete_map=policy_area_colors,
    size_max=50
)

# Custom hover template
fig.update_traces(
    hovertemplate=(
        '<b>%{customdata[0]}:</b><br>'
        '<b>%{customdata[2]}</b><br>'
        '<b>Year:</b> %{customdata[1]}<br>'
        '<b>Signatures Collected:</b> %{y:,.0f}<br>'
        '<b>Countries Meeting Threshold:</b> %{x}<br>'
        '<extra></extra>'
    )
)

fig.update_layout(
    height=600, 
    xaxis=dict(title=dict(font=dict(size=14))),
    yaxis=dict(title=dict(font=dict(size=14))),
)
fig.show()

# =============================================================================
# SUMMARY TABLE FOR SUCCESSFUL ECIs BY POLICY AREA
# =============================================================================

print(f"\n\nSuccessful ECIs by policy area:")
display(successful_by_policy.style.hide(axis='index'))


[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id='question-8'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">8. How Long Do ECIs Wait for Commission Responses?</p>

Examines the successful initiatives that received Commission responses to measure bureaucratic processing time.

NOTE:<br>
> The 6+-years wait time for "Cohesion policy" shows the Commission delays with complex or politically sensitive proposals.

In [None]:
# =============================================================================
# üìä VISUALIZATION: TIMELINE TO COMMISSION RESPONSE
# =============================================================================

responded_plot = responded.copy()
responded_plot['title_short'] = responded_plot['title'].str[:40] + '...'
responded_plot['years_to_response'] = responded_plot['time_to_commission_response_days'] / 365

# =============================================================================
# TIME COMPONENTS (YEARS / MONTHS / DAYS)
# =============================================================================

# Calculate time components
responded_plot['years_full'] = (responded_plot['time_to_commission_response_days'] // 365).astype(int)
responded_plot['months_full'] = ((responded_plot['time_to_commission_response_days'] % 365) // 30).astype(int)
responded_plot['days_full'] = ((responded_plot['time_to_commission_response_days'] % 365) % 30).astype(int)

# Create formatted time string with proper singular/plural
def format_time(row):
    parts = []
    if row['years_full'] > 0:
        parts.append(f"{row['years_full']} year{'s' if row['years_full'] != 1 else ''}")
    if row['months_full'] > 0:
        parts.append(f"{row['months_full']} month{'s' if row['months_full'] != 1 else ''}")
    if row['days_full'] > 0 or len(parts) == 0:
        parts.append(f"{row['days_full']} day{'s' if row['days_full'] != 1 else ''}")
    return ' '.join(parts)

# =============================================================================
# OBJECTIVE FORMATTING (LINE BREAKS / BULLETS)
# =============================================================================

# Format objective with line breaks every 11 words
def format_objective(objective):
    if pd.isna(objective):
        return "No objective provided"
    
    obj_str = str(objective)
    
    # Split by bullet points first
    bullet_sections = obj_str.split('‚Ä¢')
    formatted_sections = []
    
    for i, section in enumerate(bullet_sections):
        section = section.strip()
        if not section:
            continue
            
        # Add bullet back (except for first section if it didn't start with bullet)
        if i > 0:
            section = '‚Ä¢ ' + section
        
        # Add line breaks every 11 words within each section
        words = section.split()
        lines = []
        for j in range(0, len(words), 11):
            lines.append(' '.join(words[j:j+11]))
        
        formatted_sections.append('<br>'.join(lines))
    
    return '<br>'.join(formatted_sections)

responded_plot['time_formatted'] = responded_plot.apply(format_time, axis=1)
responded_plot['objective_formatted'] = responded_plot['objective'].apply(format_objective)

# =============================================================================
# BUILD BAR FIGURE
# =============================================================================

fig = px.bar(
    responded_plot.sort_values('time_to_commission_response_days'),
    y='title_short',
    x='years_to_response',
    orientation='h',
    title='<b>Time to Commission Response (Years) for Each Initiative</b>',
    labels={'years_to_response': 'Years to Response', 'title_short': 'Initiative'},
    color='years_to_response',
    color_continuous_scale='Viridis_r',
    custom_data=['title', 'time_formatted', 'objective_formatted']
)

# Custom hover template
fig.update_traces(
    hovertemplate='<b>%{customdata[0]}</b><br>' +
                  '<b>Time to Response:</b> %{customdata[1]}<br>' +
                  '<b>Objective:</b><br>' +
                  '%{customdata[2]}<br>' +
                  '<extra></extra>'
)

fig.update_layout(
    height=600, 
    showlegend=False,
    xaxis=dict(title=dict(font=dict(size=14))),
    yaxis=dict(title=dict(font=dict(size=14))),
)
fig.show()

# =============================================================================
# SUMMARY STATS AND TABLES FOR RESPONDED INITIATIVES
# =============================================================================

responded = df[df['commission_responded'] == True].copy()

print(f"\nTotal initiatives with Commission response: {len(responded)}")
print(f"Response rate (of ECIs with minimal collected signatures): {(len(responded)/df['successful_eci'].sum()*100):.2f}%")

# Convert to years for display
time_in_years = responded['time_to_commission_response_days'] / 365

print("\n\nTime to receive Commission response (from registration) in years:")
time_stats = pd.DataFrame({
    'Statistic': ['mean', 'median', 'min', 'max'],
    'Years': [
        time_in_years.mean(),
        time_in_years.median(),
        time_in_years.min(),
        time_in_years.max()
    ]
})
time_stats['Years'] = time_stats['Years'].round(2)
display(time_stats.style.hide(axis='index').format({'Years': '{:.2f}'}))

print("\n\nResponded initiatives by registration year:")
responded_by_year = responded.groupby('registration_year').size().reset_index()
responded_by_year.columns = ['Year', 'Count']
display(responded_by_year.style.hide(axis=False))


[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id='question-9'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">9. How Much Funding Do ECIs Receive?</p>

Examines whether financial resources play a significant role in ECI success by analyzing funding patterns across all initiatives. 

In [None]:
# =============================================================================
# üìä VISUALIZATION: SCATTER PLOT - FUNDING VS SIGNATURES
# =============================================================================

plot_data = df[(df['funding_numeric'].notna()) & (df['signatures_numeric'].notna())].copy()

# =============================================================================
# VALUE FORMATTING HELPERS
# =============================================================================

# Format funding to K/M with 2 decimals
def format_currency(value):
    if value >= 1_000_000:
        return f"‚Ç¨{value/1_000_000:.2f}M"
    elif value >= 1_000:
        return f"‚Ç¨{value/1_000:.1f}K"
    else:
        return f"‚Ç¨{value}"

plot_data['funding_formatted'] = plot_data['funding_numeric'].apply(format_currency)
plot_data['successful_text'] = plot_data['successful_eci'].map({True: 'Yes', False: 'No'})

# =============================================================================
# BUILD SCATTER + TRENDLINE
# =============================================================================

fig = px.scatter(
    plot_data,
    y='funding_numeric',
    x='signatures_numeric',
    color='successful_eci',
    size='funding_numeric',
    custom_data=['title', 'registration_year', 'funding_formatted', 'successful_text'],
    title='<b>Funding vs Signatures Collected (Correlation Analysis)</b>',
    labels={
        'funding_numeric': 'Funding (EUR)',
        'signatures_numeric': 'Signatures Collected',
        'successful_eci': 'Successful'
    },
    color_discrete_map={True: '#2ecc71', False: '#e74c3c'},
    trendline='ols',
    size_max=50
)

# Custom hover template
fig.update_traces(
    hovertemplate='<b>%{customdata[0]}</b><br>' +
                  '<b>Year:</b> %{customdata[1]}<br>' +
                  '<b>EUR:</b> %{customdata[2]}<br>' +
                  '<b>Signatures:</b> %{x:,.0f}<br>' +
                  '<b>Successful:</b> %{customdata[3]}<br>' +
                  '<extra></extra>',
    selector=dict(mode='markers')  # Only apply to scatter points, not trendline
)

fig.update_layout(
    height=600,
    xaxis=dict(title=dict(font=dict(size=14))),
    yaxis=dict(title=dict(font=dict(size=14))),
)
fig.show()

# =============================================================================
# FUNDING COMPARISON: SUCCESSFUL VS UNSUCCESSFUL
# =============================================================================

print("\n\nFunding comparison: Successful vs Unsuccessful:")
successful_data = df[df['successful_eci'] == True]
unsuccessful_data = df[df['successful_eci'] == False]
successful_funding = successful_data['funding_numeric']
unsuccessful_funding = unsuccessful_data['funding_numeric']

funding_comparison = pd.DataFrame({
    'Category': ['Successful ECIs', 'Unsuccessful ECIs'],
    'Count': [successful_funding.notna().sum(), unsuccessful_funding.notna().sum()],
    'Mean Funding': [successful_funding.mean(), unsuccessful_funding.mean()],
    'Median Funding': [successful_funding.median(), unsuccessful_funding.median()],
    'Max Funding': [successful_funding.max(), unsuccessful_funding.max()]
})
display(funding_comparison.style.hide(axis='index').format({
    'Mean Funding': '{:,.2f}',
    'Median Funding': '{:,.2f}',
    'Max Funding': '{:,.2f}'
}))

# =============================================================================
# FUNDING BY OUTCOME (INCLUDING WAITING RESPONSE)
# =============================================================================

print("\n\nFunding by outcome:")
funding_by_outcome = df.groupby('final_outcome_enhanced').agg({
    'funding_numeric': ['count', 'mean', 'median', 'min', 'max']
}).round(2)
funding_by_outcome.columns = funding_by_outcome.columns.droplevel(0)
funding_by_outcome.columns = ['Count', 'Mean', 'Median', 'Min', 'Max']
funding_by_outcome.index.name = 'Final Outcome'
funding_by_outcome = funding_by_outcome.reset_index()
display(funding_by_outcome.style.hide(axis="index").format({
    'Mean': '{:,.2f}',
    'Median': '{:,.2f}',
    'Min': '{:,.2f}',
    'Max': '{:,.2f}',
}))

# =============================================================================
# FUNDING AVAILABILITY STATS
# =============================================================================

# Statistics: ECIs with and without funding
print("\nFunding availability (‚Ç¨):*")
funding_availability = pd.DataFrame({
    'Category': ['ECIs with funding data', 'ECIs without funding data'],
    'Count': [
        (df['funding_numeric'] > 0).sum(),
        ((df['funding_numeric'] == 0) | (df['funding_numeric'].isna())).sum()
    ]
})
display(funding_availability.style.hide(axis='index'))
print("* EU Regulation requires ECI organisers to declare all funding")
print("  and support exceeding ‚Ç¨500 per sponsor in the official register.")

# =============================================================================
# CORRELATION COEFFICIENT + INTERPRETATION
# =============================================================================

initiatives_with_both = df[(df['funding_numeric'].notna()) & (df['signatures_numeric'].notna())]
if len(initiatives_with_both) > 0:
    print("\n\nCorrelation: Funding vs Signatures:*")
    correlation = initiatives_with_both[['funding_numeric', 'signatures_numeric']].corr()
    corr_value = correlation.iloc[0, 1]
    
    # Determine direction
    if corr_value > 0:
        direction = "Positive (+)"
    elif corr_value < 0:
        direction = "Negative (-)"
    else:
        direction = "None"
    
    # Determine strength (using common thresholds)
    abs_corr = abs(corr_value)
    strength = define_correlation(abs_corr)
    
    # Create interpretation DataFrame
    correlation_summary = pd.DataFrame({
        'Metric': ['Correlation Coefficient', 'Direction', 'Strength', 'Interpretation'],
        'Value': [
            f"{corr_value:.4f}",
            direction,
            strength,
            f"{strength} {direction.lower()} relationship between funding and signatures"
        ]
    })
    
    display(correlation_summary.style.hide(axis='index'))
    print("* Remember, correlation is not causation!")


[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id='question-10'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">10. What Predicts Whether an ECI Will Succeed?</p>

Identify which factors most strongly predict whether an ECI will meet the dual success criteria of 1 million signatures and count threshold across seven member states.

In [None]:
# =============================================================================
# CORRELATION ANALYSIS DATASET
# =============================================================================

# Create analysis dataset
analysis_df = df[[
    'successful_eci',
    'commission_responded',
    'signatures_numeric',
    'signatures_threshold_met_numeric',
    'funding_numeric',
    'collection_duration_days',
    'registration_to_collection_days',
    'registration_year'
]].copy()

# Convert boolean to numeric
analysis_df['successful_numeric'] = analysis_df['successful_eci'].astype(int)
analysis_df['responded_numeric'] = analysis_df['commission_responded'].astype(int)

corr_columns = [
    'successful_numeric',
    'signatures_numeric',
    'signatures_threshold_met_numeric',
    'funding_numeric',
    'collection_duration_days',
    'registration_to_collection_days',
    'registration_year'
]

correlation_matrix = analysis_df[corr_columns].corr()

# =============================================================================
# üìä VISUALIZATION: CORRELATION HEATMAP
# =============================================================================

corr_labels = {
    'successful_numeric': 'Success',
    'signatures_numeric': 'Signatures',
    'signatures_threshold_met_numeric': 'Countries Thresholds Met',
    'funding_numeric': 'Funding',
    'collection_duration_days': 'Collection Days',
    'registration_to_collection_days': 'Reg ‚Üí Start Collection',
    'registration_year': 'Year'
}

# Rename for display
corr_display = correlation_matrix.rename(columns=corr_labels, index=corr_labels)

# Keep only lower triangle (remove diagonal and upper triangle)
mask = np.triu(np.ones_like(corr_display, dtype=bool))
corr_display_masked = corr_display.mask(mask)

# Create strength matrix for hover
try:
    strength_matrix = corr_display_masked.map(
        lambda x: define_correlation(abs(x)) if pd.notna(x) else ''
    )
except AttributeError:
    strength_matrix = corr_display_masked.applymap(
        lambda x: define_correlation(abs(x)) if pd.notna(x) else ''
    )

fig = px.imshow(
    corr_display_masked,
    text_auto='.2f',
    aspect='auto',
    color_continuous_scale='RdBu_r',
    color_continuous_midpoint=0,
    title='<b>Correlation Matrix: Key Success Factors</b>',
    zmin=-1, zmax=1
)

# Update hover template
fig.update_traces(
    customdata=strength_matrix.values,
    hovertemplate=(
        'x: <b>%{x}</b><br>'
        'y: <b>%{y}</b><br>'
        'strength: <b>%{customdata}</b><br>'
        'correlation: <b>%{z:.2f}</b><extra></extra>'
    )
)

# Add space to y-axis tick labels for margin
y_labels_with_space = [label + ' ' for label in corr_display_masked.index]

fig.update_layout(
    height=600,
    xaxis=dict(title=dict(font=dict(size=14))),
    yaxis=dict(
        title=dict(font=dict(size=14)),
        tickmode='array',
        tickvals=list(range(len(y_labels_with_space))),
        ticktext=y_labels_with_space
    ),
)
fig.show()

[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id='question-11'></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">11. Key Findings</p>

Putting it all together

In [None]:
# =============================================================================
# OVERALL ECI LANDSCAPE
# =============================================================================

print("\n### OVERALL ECI LANDSCAPE ###")
landscape_df = pd.DataFrame({
    'Metric': [
        'Total ECIs registered (all time)',
        'Time period',
        'Average Registered initiatives per year'
    ],
    'Value': [
        len(df),
        f"{df['registration_year'].min():.0f} - {df['registration_year'].max():.0f}",
        f"{len(df) / (df['registration_year'].max() - df['registration_year'].min() + 1):.1f}"
    ]
})
display(landscape_df.style.hide(axis='index'))

# =============================================================================
# SUCCESS RATES
# =============================================================================

print("\n\n### SUCCESS RATES ###")
success_rates_df = pd.DataFrame({
    'Metric': [
        'Initiatives reaching 1M signatures',
        'Initiatives meeting country threshold',
        'Successful ECIs (both criteria)',
        'Commission responses received',
        'Response rate for successful ECIs'
    ],
    'Count': [
        df['reached_signatures'].sum(),
        df['met_country_threshold'].sum(),
        df['successful_eci'].sum(),
        df['commission_responded'].sum(),
        f"{(df['commission_responded'].sum()/df['successful_eci'].sum()*100):.1f}%"
    ],
    'Percentage': [
        f"{(df['reached_signatures'].sum()/len(df)*100):.1f}%",
        f"{(df['met_country_threshold'].sum()/len(df)*100):.1f}%",
        f"{(df['successful_eci'].sum()/len(df)*100):.1f}%",
        f"{(df['commission_responded'].sum()/len(df)*100):.1f}%",
        "N/A"
    ]
})
display(success_rates_df.style.hide(axis='index'))

# =============================================================================
# KEY BARRIERS
# =============================================================================

print("\n\n### KEY BARRIERS ###")
unsuccessful = df[df['final_outcome'] == 'Unsuccessful Collection']
withdrawn = df[df['final_outcome'] == 'Withdrawn']
barriers_df = pd.DataFrame({
    'Metric': [
        'Unsuccessful collections',
        'Withdrawn initiatives',
        'Attrition rate (did not complete)'
    ],
    'Count': [
        len(unsuccessful),
        len(withdrawn),
        len(unsuccessful) + len(withdrawn)
    ],
    'Percentage': [
        f"{(len(unsuccessful)/len(df)*100):.1f}%",
        f"{(len(withdrawn)/len(df)*100):.1f}%",
        f"{((len(unsuccessful) + len(withdrawn))/len(df)*100):.1f}%"
    ]
})
display(barriers_df.style.hide(axis='index'))

# =============================================================================
# SIGNATURES REQUIRED
# =============================================================================

print("\n\n### SIGNATURES REQUIRED ###")
successful_sigs = df[df['successful_eci']]['signatures_numeric']
signatures_df = pd.DataFrame({
    'Metric': [
        'Minimum signatures among successful',
        'Average signatures for successful',
        'Median signatures for successful',
        'Maximum signatures achieved'
    ],
    'Signatures': [
        f"{successful_sigs.min():,.0f}",
        f"{successful_sigs.mean():,.0f}",
        f"{successful_sigs.median():,.0f}",
        f"{successful_sigs.max():,.0f}"
    ]
})
display(signatures_df.style.hide(axis='index'))

# =============================================================================
# COUNTRY THRESHOLD PATTERNS
# =============================================================================

print("\n\n### COUNTRY THRESHOLD PATTERNS ###")
successful_countries = df[df['successful_eci']]['signatures_threshold_met_numeric']
all_countries = df['signatures_threshold_met_numeric']

countries_df = pd.DataFrame({
    'Metric': [
        'Minimum countries needed',
        'Average countries met (all ECIs)',
        'Median countries met in successful',
        'Maximum countries met'
    ],
    'Countries': [
        "7",
        f"{all_countries.mean():.1f}",
        f"{successful_countries.median():.1f}",
        f"{int(successful_countries.max())}"
    ]
})
display(countries_df.style.hide(axis='index'))

# =============================================================================
# TIME EXPECTATIONS
# =============================================================================

print("\n\n### TIME EXPECTATIONS ###")

# Calculate time metrics
median_collection_days = df['collection_duration_days'].median()
avg_resp_days = responded['time_to_commission_response_days'].mean()
min_resp_days = responded['time_to_commission_response_days'].min()
min_resp_row = responded[responded['time_to_commission_response_days'] == min_resp_days].iloc[0]
max_resp_days = responded['time_to_commission_response_days'].max()
max_resp_row = responded[responded['time_to_commission_response_days'] == max_resp_days].iloc[0]

# Commission response extremes (with ECI titles)
print("\nCommission response time examples:")
time_examples_df = pd.DataFrame({
    'Metric': [
        'Fastest Commission response (from registration)',
        'Slowest Commission response (from registration)'
    ],
    'Days': [
        min_resp_days,
        max_resp_days
    ],
    'Years': [
        min_resp_days/365,
        max_resp_days/365
    ],
    'ECI Title': [
        min_resp_row['title'],
        max_resp_row['title']
    ]
})
display(time_examples_df.style.hide(axis='index').format({
    'Days': '{:,.0f}',
    'Years': '{:.2f}'
}))

# =============================================================================
# FUNDING INSIGHTS
# =============================================================================

print("\n\n### FUNDING INSIGHTS ###")
funding_ratio = successful_data['funding_numeric'].mean() / unsuccessful_data['funding_numeric'].mean()
funding_df = pd.DataFrame({
    'Metric': [
        'Successful ECIs avg funding',
        'Unsuccessful ECIs avg funding',
        'How much more funding successful ECIs get on avg',
        'Correlation (funding vs signatures)'
    ],
    'Value': [
        f"‚Ç¨{successful_data['funding_numeric'].mean():,.0f}",
        f"‚Ç¨{unsuccessful_data['funding_numeric'].mean():,.0f}",
        f"{funding_ratio:.1f}x",
        f"{corr_value:.3f}"
    ]
})
display(funding_df.style.hide(axis='index'))

# Dynamic note based on correlation strength
abs_corr = abs(corr_value)
if abs_corr < 0.3:
    strength = "weak"
    interpretation = "Funding shows little relationship with signature success"
elif abs_corr < 0.5:
    strength = "moderate"
    interpretation = "Funding tends to follow signature success"
elif abs_corr < 0.7:
    strength = "strong"
    interpretation = "Funding is strongly linked to signature success"
else:
    strength = "very strong"
    interpretation = "Funding and signatures are highly correlated"

print(f"üí° Note: {interpretation} ({strength} correlation, {corr_value:.3f}); signatures remain the critical success factor.")

# =============================================================================
# TOPIC AREAS WITH HIGHEST SUCCESS
# =============================================================================

print("\n\n### TOPIC AREAS WITH HIGHEST SUCCESS ###")
top_topics = policy_success[policy_success['Total'] >= 5].sort_values('Success Rate (%)', ascending=False).head(5)
display(top_topics[['Policy Area', 'Total', 'Successful', 'Success Rate (%)']].style.hide(axis='index').format({
    'Success Rate (%)': '{:.2f}',
}))

# =============================================================================
# TEMPORAL TRENDS
# =============================================================================

print("\n\n### TEMPORAL TRENDS ###")
print("Best performing years:")
best_years = yearly_stats.nlargest(3, 'Success Rate (%)')
display(best_years[['Year', 'Total Registered', 'Successful', 'Success Rate (%)']].style.hide(axis='index').format({
    'Success Rate (%)': '{:.2f}',
}))

print("\nWorst performing years (with >= 5 registrations):")
worst_years = yearly_stats[yearly_stats['Total Registered'] >= 5].nsmallest(3, 'Success Rate (%)')
display(worst_years[['Year', 'Total Registered', 'Successful', 'Success Rate (%)']].style.hide(axis='index').format({
    'Success Rate (%)': '{:.2f}',
}))

# =============================================================================
# GEOGRAPHIC INSIGHTS
# =============================================================================

print("\n\n### GEOGRAPHIC INSIGHTS ###")
print("Top 5 countries by threshold achievement in successful ECIs:")
display(country_participation[['Country', 'Times Met Threshold', 'Participation Rate (%)']].head(5).style.hide(axis='index').format({
    'Participation Rate (%)': '{:.2f}',
}))

print("\nTop 5 countries organizing successful initiatives, by representative residency:")
display(successful_org_participation.head(5).style.hide(axis='index'))


[‚Üë Table of Contents ‚Üë](#table-of-contents)

<a id="contact"></a>
## <p style="padding:10px;background-color:#fff798;margin:0;color:#435672;font-family:newtimesroman;text-align:center;border-radius: 15px 50px;overflow:hidden;font-weight:500;font-size:150%;">‚úâÔ∏è Contact</p>

<b>Karol ≈Åukaszczyk:</b><br>
- Via e-mail: **karol.lukaszczyk.contact@proton.me**<br>
- Explore Github: **https://github.com/Luk-kar**