In [None]:
import pandas as pd

file_path_thales = 'THALES.xlsx'
data = pd.ExcelFile(file_path_thales)
# Load the first sheet into a DataFrame
df = data.parse('default_1')  # THALES.xlsx

# Display the first few rows to understand its structure
df.head()

# Load the GR dataset
file_path_gr = 'GR.xlsx'
data_2 = pd.ExcelFile(file_path_gr)
df_2 = data_2.parse('default_1') # GR.xlsx

# Display the first few rows to understand its structure
df_2.head()

In [None]:
from scipy.stats import ttest_ind, chi2_contingency, spearmanr, pearsonr
import numpy as np

# I. Gender and Tenure
# Filter data by gender
male_tenure = df[df['Sex'] == 'M']['Years in the function'].dropna()
female_tenure = df[df['Sex'] == 'F']['Years in the function'].dropna()

# Perform a two-sample t-test
t_stat, p_value_gender = ttest_ind(male_tenure, female_tenure, equal_var=False)

# II. Educational Fields and Governance Roles
# Create contingency table
education_governance_table = pd.crosstab(df['Education Field'], df['Corporate Governance Body'])

# Perform chi-square test of independence
chi2_stat, p_value_edu_gov, dof, expected = chi2_contingency(education_governance_table)

# III. Age and Tenure
# Drop rows with missing values in Age or Years in the function
age_tenure_data = df[['Age', 'Years in the function']].dropna()

# Calculate Pearson correlation
pearson_corr, p_value_correlation = pearsonr(age_tenure_data['Age'], age_tenure_data['Years in the function'])

# Summarize results
results = {
    "Gender and Tenure (T-test)": {
        "T-statistic": t_stat,
        "P-value": p_value_gender,
        "Conclusion": "Reject H0" if p_value_gender <= 0.05 else "Fail to Reject H0"
    },
    "Educational Fields and Governance Roles (Chi-square)": {
        "Chi-square Statistic": chi2_stat,
        "P-value": p_value_edu_gov,
        "Degrees of Freedom": dof,
        "Conclusion": "Reject H0" if p_value_edu_gov <= 0.05 else "Fail to Reject H0"
    },
    "Age and Tenure (Correlation)": {
        "Correlation Coefficient": pearson_corr,
        "P-value": p_value_correlation,
        "Conclusion": "Reject H0" if p_value_correlation <= 0.05 else "Fail to Reject H0"
    }
}

results


In [None]:
# Ensure that 'Age' and 'Years in the function' columns contain numeric values
df_2['Age'] = pd.to_numeric(df_2['Age'], errors='coerce')
df_2['Years in the function'] = pd.to_numeric(df_2['Years in the function'], errors='coerce')

# Drop rows with missing values in 'Age' or 'Years in the function'
age_tenure_data_2 = df_2[['Age', 'Years in the function']].dropna()

# Recalculate Pearson correlation
pearson_corr_2, p_value_correlation_2 = pearsonr(age_tenure_data_2['Age'], age_tenure_data_2['Years in the function'])

# Summarize results again
results_2 = {
    "Gender and Tenure (T-test)": {
        "T-statistic": t_stat_2,
        "P-value": p_value_gender_2,
        "Conclusion": "Reject H0" if p_value_gender_2 <= 0.05 else "Fail to Reject H0"
    },
    "Educational Fields and Governance Roles (Chi-square)": {
        "Chi-square Statistic": chi2_stat_2,
        "P-value": p_value_edu_gov_2,
        "Degrees of Freedom": dof_2,
        "Conclusion": "Reject H0" if p_value_edu_gov_2 <= 0.05 else "Fail to Reject H0"
    },
    "Age and Tenure (Correlation)": {
        "Correlation Coefficient": pearson_corr_2,
        "P-value": p_value_correlation_2,
        "Conclusion": "Reject H0" if p_value_correlation_2 <= 0.05 else "Fail to Reject H0"
    }
}

results_2
