In [6]:
import pandas as pd
from scipy.stats import ttest_ind, chi2_contingency, pearsonr, f_oneway, kruskal
import numpy as np

# Load dataset
file_path = "THALES.xlsx"  # Replace with your file path
data = pd.ExcelFile(file_path)
df = data.parse(data.sheet_names[0])

# Ensure numeric columns are numeric
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')
df['Years in the function'] = pd.to_numeric(df['Years in the function'], errors='coerce')
df['Years in the company'] = pd.to_numeric(df['Years in the company'], errors='coerce')

# I. Gender and Tenure
# Filter data by gender
male_tenure = df[df['Sex'] == 'M']['Years in the function'].dropna()
female_tenure = df[df['Sex'] == 'F']['Years in the function'].dropna()

# Perform a two-sample t-test
t_stat_gender, p_value_gender = ttest_ind(male_tenure, female_tenure, equal_var=False)

# II. Educational Fields and Governance Roles
# Create contingency table
education_governance_table = pd.crosstab(df['Education Field'], df['Corporate Governance Body'])

# Perform chi-square test of independence
chi2_stat_edu_gov, p_value_edu_gov, dof_edu_gov, _ = chi2_contingency(education_governance_table)

# III. Age and Tenure
# Drop rows with missing values in Age or Years in the function
age_tenure_data = df[['Age', 'Years in the function']].dropna()


# Loop through pairs of categorical variables for chi-square testing
for i, col1 in enumerate(categorical_columns):
    for col2 in categorical_columns[i + 1:]:
        contingency_table = pd.crosstab(df[col1], df[col2])
        chi2_stat, p_value, _, _ = chi2_contingency(contingency_table)
        chi_square_results[f"{col1} vs {col2}"] = {
            "Chi-square Statistic": chi2_stat,
            "P-value": p_value,
            "Conclusion": "Reject H0" if p_value <= 0.05 else "Fail to Reject H0"
        }

# Print Results
print("Hypothesis Testing Results:")

# Gender and Tenure
print("\nI. Gender and Tenure (T-test)")
print(f"T-statistic: {t_stat_gender}")
print(f"P-value: {p_value_gender}")
print(f"Conclusion: {'Reject H0' if p_value_gender <= 0.05 else 'Fail to Reject H0'}")

# Educational Fields and Governance Roles
print("\nII. Educational Fields and Governance Roles (Chi-square Test)")
print(f"Chi-square Statistic: {chi2_stat_edu_gov}")
print(f"P-value: {p_value_edu_gov}")
print(f"Degrees of Freedom: {dof_edu_gov}")
print(f"Conclusion: {'Reject H0' if p_value_edu_gov <= 0.05 else 'Fail to Reject H0'}")

# Age and Tenure
print("\nIII. Age and Tenure (Correlation)")
print(f"Correlation Coefficient: {pearson_corr_age_tenure}")
print(f"P-value: {p_value_corr_age_tenure}")
print(f"Conclusion: {'Reject H0' if p_value_corr_age_tenure <= 0.05 else 'Fail to Reject H0'}")

Hypothesis Testing Results:

I. Gender and Tenure (T-test)
T-statistic: 0.660430518462893
P-value: 0.5144709140883743
Conclusion: Fail to Reject H0

II. Educational Fields and Governance Roles (Chi-square Test)
Chi-square Statistic: 11.913015873015873
P-value: 0.4526905448632246
Degrees of Freedom: 12
Conclusion: Fail to Reject H0

III. Age and Tenure (Correlation)
Correlation Coefficient: 0.5493135638916923
P-value: 0.08006780598493816
Conclusion: Fail to Reject H0


  warn("Workbook contains no default style, apply openpyxl's default")


In [7]:

# Load dataset
file_path = "GR.xlsx"  # Replace with your file path
data = pd.ExcelFile(file_path)
df = data.parse(data.sheet_names[0])

# Ensure numeric columns are numeric
df['Age'] = pd.to_numeric(df['Age'], errors='coerce')
df['Years in the function'] = pd.to_numeric(df['Years in the function'], errors='coerce')
df['Years in the company'] = pd.to_numeric(df['Years in the company'], errors='coerce')

# I. Gender and Tenure
# Filter data by gender
male_tenure = df[df['Sex'] == 'M']['Years in the function'].dropna()
female_tenure = df[df['Sex'] == 'F']['Years in the function'].dropna()

# Perform a two-sample t-test
t_stat_gender, p_value_gender = ttest_ind(male_tenure, female_tenure, equal_var=False)

# II. Educational Fields and Governance Roles
# Create contingency table
education_governance_table = pd.crosstab(df['Education Field'], df['Corporate Governance Body'])

# Perform chi-square test of independence
chi2_stat_edu_gov, p_value_edu_gov, dof_edu_gov, _ = chi2_contingency(education_governance_table)

# III. Age and Tenure
# Drop rows with missing values in Age or Years in the function
age_tenure_data = df[['Age', 'Years in the function']].dropna()


# Print Results
print("Hypothesis Testing Results:")

# Gender and Tenure
print("\nI. Gender and Tenure (T-test)")
print(f"T-statistic: {t_stat_gender}")
print(f"P-value: {p_value_gender}")
print(f"Conclusion: {'Reject H0' if p_value_gender <= 0.05 else 'Fail to Reject H0'}")

# Educational Fields and Governance Roles
print("\nII. Educational Fields and Governance Roles (Chi-square Test)")
print(f"Chi-square Statistic: {chi2_stat_edu_gov}")
print(f"P-value: {p_value_edu_gov}")
print(f"Degrees of Freedom: {dof_edu_gov}")
print(f"Conclusion: {'Reject H0' if p_value_edu_gov <= 0.05 else 'Fail to Reject H0'}")

# Age and Tenure
print("\nIII. Age and Tenure (Correlation)")
print(f"Correlation Coefficient: {pearson_corr_age_tenure}")
print(f"P-value: {p_value_corr_age_tenure}")
print(f"Conclusion: {'Reject H0' if p_value_corr_age_tenure <= 0.05 else 'Fail to Reject H0'}")

  warn("Workbook contains no default style, apply openpyxl's default")


Hypothesis Testing Results:

I. Gender and Tenure (T-test)
T-statistic: -0.16866057481438188
P-value: 0.8673717493977257
Conclusion: Fail to Reject H0

II. Educational Fields and Governance Roles (Chi-square Test)
Chi-square Statistic: 13.09059829059829
P-value: 0.595300210376975
Degrees of Freedom: 15
Conclusion: Fail to Reject H0

III. Age and Tenure (Correlation)
Correlation Coefficient: 0.5493135638916923
P-value: 0.08006780598493816
Conclusion: Fail to Reject H0
