# Statistical Analysis of Clustering Results
This notebook performs statistical analysis on the clustering results, including ANOVA for 3-cluster solutions and t-tests for 2-cluster solutions. The analysis is performed both with and without dummy variables to ensure robust results.

## 1. Setup and Data Loading

- Import required libraries
- Load clustering results
- Configure visualization settings

In [1]:
# Import required libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import f_oneway, ttest_ind
import numpy as np
import warnings
import os
from scipy import stats
warnings.filterwarnings('ignore')

# Set global visualization parameters
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 12
sns.set_palette("Set2")

plt.ioff()  # Turn off interactive plotting

# Helper functions
def perform_statistical_analysis(df, n_clusters=3):
    """
    Performs statistical analysis for either 2 or 3 clusters
    Args:
        df: DataFrame with cluster assignments
        n_clusters: Number of clusters to analyze
    Returns:
        DataFrame of significant variables
    """
    if n_clusters == 3:
        return perform_anova_analysis(df)
    return perform_ttest_analysis(df)

def perform_anova_analysis(df):
    """
    Performs ANOVA analysis for 3 clusters
    Args:
        df: DataFrame with cluster assignments
    Returns:
        DataFrame with ANOVA results
    """
    groups = df['KMeans_Cluster'].unique()
    p_values = {}
    
    for column in df.columns:
        if column != 'KMeans_Cluster':
            group_data = [df[df['KMeans_Cluster'] == group][column] for group in groups]
            f_statistic, p_value = f_oneway(*group_data)
            p_values[column] = p_value
            
    return process_results(df, p_values)

def perform_ttest_analysis(df):
    """
    Performs t-test analysis for 2 clusters
    Args:
        df: DataFrame with cluster assignments
    Returns:
        DataFrame with t-test results
    """
    group_X = df[df['KMeans_Cluster'] == 0]
    group_Y = df[df['KMeans_Cluster'] == 1]
    
    results = []
    for col in df.columns:
        if col != 'KMeans_Cluster':
            t_stat, p_value = stats.ttest_ind(group_X[col], group_Y[col], equal_var=False)
            results.append((col, p_value))
            
    return process_results(df, dict(results))

def es_dummy(col):
    """
    Check if a column contains only binary values (0/1)
    Args:
        col: DataFrame column to check
    Returns:
        bool: True if column contains only binary values
    """
    return col.dropna().isin([0, 1]).all()

label_mapping = {
    'roi4': 'Negative ROI',
    'roi_yes': 'ROI Measured',
    'ecp_extredeployment': 'External Redeployment',
    'p_effect_reverse': 'Program Effectiveness',
    'evp_network': 'Cross-departmental Networks',
    'roi5': 'Positive ROI',
    'sum_tr_sk': 'Sum of Trained Skills',
    'sha_b_sk_n_digital': 'Share Needed Digital Skills',
    'stat_government': 'Help Org Use Gov Subsidy',
    'ecp_intredeployment': 'Internal Redeployment',
    'stat_csr': 'Fulfilling CSR Requirement',
    'reason_dei': 'Reason DEI',
    'sk_selected': 'Total Nr skills needed',
    'stand2': 'Mix Standardization customization',
    'f_union_1 - 25%': 'Union Share 1-25%',
    'inc_mgr_nofin': 'Manager: Non-financial Incentive',
    'invest_cont': 'Continued Investment',
    'p_fund_gov': 'Funded by Government',
    'f_medium': 'Medium Firm Size (100-999)',
    'roi2': 'Not yet but intend to calculate ROI',
    'roi3': 'Tried to but unable to',
    'roi1': 'No attempt to calculate',
    'p_eligibility': 'Participation Eligibility',
    'p_targetfunc_it_Not_Selected': 'Target Function: IT not selected',
    'p_cont_investment_Very likely': 'Cont. Investment Very Likely',
    'p_adv_hr': 'Advocate HR',
    'p_participated_2023_1000 - 9999': 'Participated in 2023 (1000 - 9999)',
    'p_program_length': 'Program Length (Years)',
    'p_fund_org': 'Funded by Org',
    'p_criteria_jobtitle': 'Selection Criteria: Job Title',
    'p_criteria_assmskills': 'Selection Criteria: Assessment of Skills',
    'p_target_emp': 'Target Group: Employees',
    'p_challenge_progcompl': 'Challenge: Program Completion',
    'p_year_end_2023': 'Program End: 2023',
    'p_part': 'Number of participants',
    'p_fund_wrk': 'Funded by Worker',
    'sha_b_sk_n_soft': 'Needed share soft skill',
    'sha_b_sk_n_man': 'Needed share mgmt skill',
    'p_part_exp': 'Expected Participation',
    'tot_kpi_tracked': 'Total Nr of KPIs',
    'f_union_1___25%': 'Union share 1-25%',  # Updated label
    'p_targetfunc_it': 'Target function IT',  # Updated label
}

def create_statistical_table(df, group_col, results, n_groups):
    """Create statistical table with means and standard deviations"""
    groups = df[group_col].unique()
    
    # Create LaTeX table
    if n_groups == 3:
        latex_table = "\\begin{tabular}{lcccccc}\n"
        latex_table += "Feature & Mean G1 & SD G1 & Mean G2 & SD G2 & Mean G3 & SD G3 & p-value \\\\\n"
    else:
        latex_table = "\\begin{tabular}{lcccc}\n"
        latex_table += "Feature & Mean G1 & SD G1 & Mean G2 & SD G2 & p-value \\\\\n"
    
    latex_table += "\\hline\n"
    
    # Create pandas DataFrame for visualization
    table_data = []
    columns = ['Feature']
    for i in range(n_groups):
        columns.extend([f'Mean G{i+1}', f'SD G{i+1}'])
    columns.append('p-value')
    
    for variable, p_value in results:
        variable_label = label_mapping.get(variable, variable)
        means = []
        sds = []
        # row_data = [variable]
        row_data = [variable_label]
        for group in range(n_groups):
            group_data = df[df[group_col] == group][variable]
            mean = group_data.mean()
            sd = group_data.std()
            means.append(mean)
            sds.append(sd)
            row_data.extend([f"{mean:.2f}", f"{sd:.2f}"])
        
        # Add to LaTeX table
        stats_str = " & ".join([f"{m:.2f} & {s:.2f}" for m, s in zip(means, sds)])
        #latex_table += f"{variable} & {stats_str} & {p_value:.4f} \\\\\n"
        latex_table += f"{variable_label} & {stats_str} & {p_value:.4f} \\\\\n"
        
        # Add to pandas data
        row_data.append(f"{p_value:.4f}")
        table_data.append(row_data)
    
    latex_table += "\\end{tabular}"
    
    # Create pandas DataFrame
    pd_table = pd.DataFrame(table_data, columns=columns)
    
    return latex_table, pd_table

def plot_density_distributions(df, variables, group_col, title_prefix="", n_clusters=None, analysis_type="all_vars"):
    """
    Create and save density plots for specified variables
    Args:
        df: DataFrame containing the data
        variables: List of variables to plot
        group_col: Column name for cluster assignments
        title_prefix: Prefix for plot titles
        n_clusters: Number of clusters (2 or 3)
        analysis_type: Type of analysis ("all_vars" or "no_dummies")
    """
    # Create base output directory if it doesn't exist
    base_dir = "../Output/Figures"
    cluster_dir = f"{base_dir}/{n_clusters}_clusters"
    analysis_dir = f"{cluster_dir}/{analysis_type}"
    
    # Create nested directories if they don't exist
    for dir_path in [base_dir, cluster_dir, analysis_dir]:
        os.makedirs(dir_path, exist_ok=True)
    
    for var in variables:
        plt.figure(figsize=(10, 6))
        for group in sorted(df[group_col].unique()):
            group_data = df[df[group_col] == group][var]
            sns.kdeplot(data=group_data, label=f'Cluster {group}', fill=True)
        
        plt.title(f'{title_prefix} {var}')
        plt.xlabel(var)
        plt.ylabel('Density')
        plt.legend()
        plt.tight_layout()
        
        # Save plot
        safe_var_name = "".join(x for x in var if x.isalnum() or x in ['-', '_'])
        plt.savefig(f"{analysis_dir}/density_{safe_var_name}.png")
        plt.close()
        
def process_results(df, p_values, alpha=0.05):
    """
    Processes statistical results and generates visualizations
    Args:
        df: DataFrame with data
        p_values: Dictionary of p-values by variable
        alpha: Significance level
    Returns:
        DataFrame of top significant variables
    """
    p_values_df = pd.DataFrame(list(p_values.items()), columns=['Variable', 'p_value'])
    significant_vars = p_values_df[p_values_df['p_value'] < alpha]
    top_vars = significant_vars.sort_values('p_value').head(20)
    
    create_density_plots(df, top_vars['Variable'].tolist())
    return top_vars

def create_density_plots(df, variables, n_clusters=None, analysis_type="all_vars"):
    """
    Creates density plots for significant variables
    Args:
        df: DataFrame containing the data
        variables: List of variables to plot
        n_clusters: Number of clusters (2 or 3)
        analysis_type: Type of analysis ("all_vars" or "no_dummies")
    """
    # Create base output directory if it doesn't exist
    base_dir = "../Output/Figures"
    cluster_dir = f"{base_dir}/{n_clusters}_clusters"
    analysis_dir = f"{cluster_dir}/{analysis_type}"
    
    # Create nested directories if they don't exist
    for dir_path in [base_dir, cluster_dir, analysis_dir]:
        os.makedirs(dir_path, exist_ok=True)
    
    for var in variables:
        plt.figure(figsize=(10, 6))
        g = sns.FacetGrid(df, hue='KMeans_Cluster', height=6, aspect=1.5)
        g.map(sns.kdeplot, var, fill=True)
        g.add_legend()
        plt.title(f'Density Plot for {var}')
        plt.tight_layout()
        
        # Save plot
        safe_var_name = "".join(x for x in var if x.isalnum() or x in ['-', '_'])
        plt.savefig(f"{analysis_dir}/density_facet_{safe_var_name}.png")
        plt.close()

## 2. Three-Cluster Analysis

- ANOVA testing for all variables
- Generation of statistical tables
- Density plots for significant variables
- Analysis without dummy variables

In [2]:
def analyze_clusters(df, n_clusters):
    """
    Main analysis function for clusters
    Args:
        df: DataFrame with cluster assignments
        n_clusters: Number of clusters to analyze
    """
    # Drop unnecessary columns if they exist
    if n_clusters == 3:
        df.drop(["tr_part", "tr_eligibility", "tr_part_exp"], axis=1, inplace=True)
    
    # Perform statistical analysis
    top_vars = perform_statistical_analysis(df, n_clusters)
    
    # Create plots and save them
    create_density_plots(df, top_vars['Variable'].tolist(), 
                        n_clusters=n_clusters, 
                        analysis_type="all_vars")
    
    # Generate and print LaTeX table and pandas table
    latex_table, pd_table = create_statistical_table(df, 'KMeans_Cluster', 
                                                   zip(top_vars['Variable'], top_vars['p_value']), 
                                                   n_clusters)
    print(f"\nLaTeX Table for {n_clusters}-Cluster Analysis:")
    print(latex_table)
    print(f"\nVisualized Table for {n_clusters}-Cluster Analysis:")
    display(pd_table)
    
    return top_vars

# Execute analysis for 3 clusters
df_3clusters = pd.read_csv("../Output/Results/clusters_3_construction.csv")
top_vars_3clusters = analyze_clusters(df_3clusters, 3)


LaTeX Table for 3-Cluster Analysis:
\begin{tabular}{lcccccc}
Feature & Mean G1 & SD G1 & Mean G2 & SD G2 & Mean G3 & SD G3 & p-value \\
\hline
Number of participants & 38.93 & 16.63 & 76.59 & 9.12 & 74.02 & 10.99 & 0.0000 \\
Participation Eligibility & 43.77 & 13.06 & 74.36 & 10.24 & 74.80 & 14.18 & 0.0000 \\
Expected Participation & 52.67 & 17.49 & 83.01 & 11.17 & 83.46 & 9.87 & 0.0000 \\
Funded by Org & 57.48 & 30.59 & 23.63 & 6.68 & 56.30 & 18.62 & 0.0000 \\
Funded by Government & 14.25 & 12.24 & 23.10 & 10.23 & 12.40 & 9.44 & 0.0000 \\
tr_sk_n_f_finance & 0.23 & 0.43 & 0.11 & 0.32 & 0.54 & 0.50 & 0.0000 \\
p_fund_other & 3.25 & 8.47 & 10.19 & 10.79 & 2.10 & 5.45 & 0.0000 \\
Funded by Worker & 12.33 & 12.11 & 22.87 & 10.91 & 15.20 & 10.97 & 0.0000 \\
presp2 & 0.32 & 0.47 & 0.74 & 0.44 & 0.64 & 0.48 & 0.0000 \\
p_resp_hr & 0.32 & 0.47 & 0.74 & 0.44 & 0.64 & 0.48 & 0.0000 \\
p_responsibility_HR Leaders & 0.32 & 0.47 & 0.74 & 0.44 & 0.64 & 0.48 & 0.0000 \\
p_responsibility_hier_HR & 0

Unnamed: 0,Feature,Mean G1,SD G1,Mean G2,SD G2,Mean G3,SD G3,p-value
0,Number of participants,38.93,16.63,76.59,9.12,74.02,10.99,0.0
1,Participation Eligibility,43.77,13.06,74.36,10.24,74.8,14.18,0.0
2,Expected Participation,52.67,17.49,83.01,11.17,83.46,9.87,0.0
3,Funded by Org,57.48,30.59,23.63,6.68,56.3,18.62,0.0
4,Funded by Government,14.25,12.24,23.1,10.23,12.4,9.44,0.0
5,tr_sk_n_f_finance,0.23,0.43,0.11,0.32,0.54,0.5,0.0
6,p_fund_other,3.25,8.47,10.19,10.79,2.1,5.45,0.0
7,Funded by Worker,12.33,12.11,22.87,10.91,15.2,10.97,0.0
8,presp2,0.32,0.47,0.74,0.44,0.64,0.48,0.0
9,p_resp_hr,0.32,0.47,0.74,0.44,0.64,0.48,0.0


<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

In [3]:
import umap
from sklearn.cluster import KMeans

def analyze_without_dummies(df, n_clusters):
    """
    Analysis excluding dummy variables
    Args:
        df: Original DataFrame
        n_clusters: Number of clusters to analyze
    """
    # Prepare data
    df_numeric = df.apply(pd.to_numeric, errors='coerce')
    df_numeric = df_numeric.dropna(axis=1, how='all')
    df_no_dummies = df_numeric.loc[:, ~df_numeric.apply(es_dummy)]
    df_no_year = df_no_dummies.loc[:, ~df_no_dummies.columns.str.contains('year', case=False)]
    
    # Perform clustering
    reducer = umap.UMAP(random_state=42)
    embedding = reducer.fit_transform(df_no_year.dropna())
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    
    # Prepare final dataset
    df_analysis = df_no_year.dropna().copy()
    df_analysis['KMeans_Cluster'] = kmeans.fit_predict(embedding)
    
    # Analyze and create visualizations
    top_vars = analyze_clusters(df_analysis, n_clusters)
    create_density_plots(df_analysis, top_vars['Variable'].tolist(),
                        n_clusters=n_clusters,
                        analysis_type="no_dummies")
    
    return df_analysis, top_vars


# Execute analysis without dummies for 3 clusters
df = pd.read_stata("../Data/V1_qualflags_analysis2_ML.dta")
df_analysis_3, top_vars_3 = analyze_without_dummies(df, 3)


LaTeX Table for 3-Cluster Analysis:
\begin{tabular}{lcccccc}
Feature & Mean G1 & SD G1 & Mean G2 & SD G2 & Mean G3 & SD G3 & p-value \\
\hline
Funded by Government & 21.61 & 14.97 & 0.96 & 3.66 & 21.70 & 11.87 & 0.0000 \\
p_fund_union & 15.99 & 13.20 & 0.42 & 2.97 & 19.22 & 8.59 & 0.0000 \\
Funded by Worker & 16.82 & 15.87 & 1.18 & 4.72 & 20.96 & 12.80 & 0.0000 \\
Funded by Org & 41.41 & 20.64 & 97.23 & 7.01 & 28.92 & 13.86 & 0.0000 \\
Expected Participation & 58.71 & 18.00 & 54.93 & 25.39 & 84.07 & 9.81 & 0.0000 \\
Number of participants & 48.99 & 18.37 & 41.43 & 23.72 & 76.22 & 11.31 & 0.0000 \\
Participation Eligibility & 54.23 & 17.49 & 54.18 & 23.38 & 74.04 & 12.65 & 0.0000 \\
p_fund_other & 4.16 & 8.53 & 0.21 & 1.82 & 9.21 & 10.32 & 0.0000 \\
p_participated_coarse & 1.96 & 0.77 & 1.76 & 0.80 & 2.30 & 0.77 & 0.0000 \\
Total Nr of KPIs & 3.51 & 1.82 & 4.00 & 1.85 & 3.33 & 1.42 & 0.0000 \\
Program Length (Years) & 1.67 & 1.12 & 1.91 & 1.18 & 1.49 & 1.03 & 0.0001 \\
f_size_n & 4.39 

Unnamed: 0,Feature,Mean G1,SD G1,Mean G2,SD G2,Mean G3,SD G3,p-value
0,Funded by Government,21.61,14.97,0.96,3.66,21.7,11.87,0.0
1,p_fund_union,15.99,13.2,0.42,2.97,19.22,8.59,0.0
2,Funded by Worker,16.82,15.87,1.18,4.72,20.96,12.8,0.0
3,Funded by Org,41.41,20.64,97.23,7.01,28.92,13.86,0.0
4,Expected Participation,58.71,18.0,54.93,25.39,84.07,9.81,0.0
5,Number of participants,48.99,18.37,41.43,23.72,76.22,11.31,0.0
6,Participation Eligibility,54.23,17.49,54.18,23.38,74.04,12.65,0.0
7,p_fund_other,4.16,8.53,0.21,1.82,9.21,10.32,0.0
8,p_participated_coarse,1.96,0.77,1.76,0.8,2.3,0.77,0.0
9,Total Nr of KPIs,3.51,1.82,4.0,1.85,3.33,1.42,0.0


<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

## 3. Two-Cluster Analysis

- T-test analysis for all variables
- Statistical tables generation
- Density plots for significant differences
- Analysis without dummy variables

In [4]:
# Execute analysis for 2 clusters
df_2clusters = pd.read_csv("../Output/Results/clusters_2_construction.csv")
top_vars_2clusters = analyze_clusters(df_2clusters, 2)
create_density_plots(df_2clusters, top_vars_2clusters['Variable'].tolist())

# Execute analysis without dummies for 2 clusters
df = pd.read_stata("../Data/V1_qualflags_analysis2_ML.dta")
df_analysis_2, top_vars_2 = analyze_without_dummies(df, 2)


LaTeX Table for 2-Cluster Analysis:
\begin{tabular}{lcccc}
Feature & Mean G1 & SD G1 & Mean G2 & SD G2 & p-value \\
\hline
Number of participants & 45.95 & 19.66 & 77.22 & 8.83 & 0.0000 \\
tr_part & 45.95 & 19.66 & 77.22 & 8.83 & 0.0000 \\
tr_eligibility & 50.96 & 17.83 & 74.94 & 12.05 & 0.0000 \\
Participation Eligibility & 50.96 & 17.83 & 74.94 & 12.05 & 0.0000 \\
Expected Participation & 59.84 & 20.18 & 83.57 & 10.78 & 0.0000 \\
tr_part_exp & 59.84 & 20.18 & 83.57 & 10.78 & 0.0000 \\
Funded by Org & 61.30 & 28.42 & 30.14 & 12.66 & 0.0000 \\
p_fund_union & 10.95 & 12.98 & 20.00 & 8.67 & 0.0000 \\
Funded by Worker & 12.12 & 12.28 & 21.31 & 10.53 & 0.0000 \\
p_resp_hr & 0.39 & 0.49 & 0.72 & 0.45 & 0.0000 \\
presp2 & 0.39 & 0.49 & 0.72 & 0.45 & 0.0000 \\
fsub & 0.66 & 0.48 & 0.93 & 0.26 & 0.0000 \\
Funded by Government & 13.06 & 11.87 & 20.47 & 10.53 & 0.0000 \\
p_fund_other & 2.56 & 7.49 & 8.08 & 10.22 & 0.0000 \\
p_ongoing & 0.55 & 0.50 & 0.27 & 0.45 & 0.0001 \\
p_target_top & 0.53 &

Unnamed: 0,Feature,Mean G1,SD G1,Mean G2,SD G2,p-value
0,Number of participants,45.95,19.66,77.22,8.83,0.0
1,tr_part,45.95,19.66,77.22,8.83,0.0
2,tr_eligibility,50.96,17.83,74.94,12.05,0.0
3,Participation Eligibility,50.96,17.83,74.94,12.05,0.0
4,Expected Participation,59.84,20.18,83.57,10.78,0.0
5,tr_part_exp,59.84,20.18,83.57,10.78,0.0
6,Funded by Org,61.3,28.42,30.14,12.66,0.0
7,p_fund_union,10.95,12.98,20.0,8.67,0.0
8,Funded by Worker,12.12,12.28,21.31,10.53,0.0
9,p_resp_hr,0.39,0.49,0.72,0.45,0.0



LaTeX Table for 2-Cluster Analysis:
\begin{tabular}{lcccc}
Feature & Mean G1 & SD G1 & Mean G2 & SD G2 & p-value \\
\hline
Funded by Org & 33.43 & 17.20 & 91.39 & 18.16 & 0.0000 \\
p_fund_union & 18.77 & 10.48 & 1.00 & 5.21 & 0.0000 \\
Funded by Worker & 19.82 & 13.69 & 2.17 & 8.67 & 0.0000 \\
Number of participants & 66.33 & 16.79 & 38.68 & 23.31 & 0.0000 \\
tr_part & 66.33 & 16.79 & 38.68 & 23.31 & 0.0000 \\
Funded by Government & 20.95 & 12.54 & 4.86 & 12.72 & 0.0000 \\
tr_part_exp & 74.68 & 15.87 & 52.21 & 25.25 & 0.0000 \\
Expected Participation & 74.68 & 15.87 & 52.21 & 25.25 & 0.0000 \\
p_fund_other & 7.03 & 9.84 & 0.58 & 3.63 & 0.0000 \\
Participation Eligibility & 66.99 & 15.85 & 51.23 & 23.34 & 0.0000 \\
tr_eligibility & 66.99 & 15.85 & 51.23 & 23.34 & 0.0000 \\
p_participated_coarse & 2.17 & 0.77 & 1.75 & 0.80 & 0.0000 \\
Total Nr of KPIs & 3.30 & 1.51 & 4.10 & 1.93 & 0.0000 \\
Program Effectiveness & 2.57 & 0.51 & 2.42 & 0.51 & 0.0000 \\
Program Length (Years) & 1.58 & 1.0

Unnamed: 0,Feature,Mean G1,SD G1,Mean G2,SD G2,p-value
0,Funded by Org,33.43,17.2,91.39,18.16,0.0
1,p_fund_union,18.77,10.48,1.0,5.21,0.0
2,Funded by Worker,19.82,13.69,2.17,8.67,0.0
3,Number of participants,66.33,16.79,38.68,23.31,0.0
4,tr_part,66.33,16.79,38.68,23.31,0.0
5,Funded by Government,20.95,12.54,4.86,12.72,0.0
6,tr_part_exp,74.68,15.87,52.21,25.25,0.0
7,Expected Participation,74.68,15.87,52.21,25.25,0.0
8,p_fund_other,7.03,9.84,0.58,3.63,0.0
9,Participation Eligibility,66.99,15.85,51.23,23.34,0.0


<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>