In [None]:
import pandas as pd
import numpy as np
from scipy.stats import kruskal
from itertools import combinations
from statsmodels.stats.multitest import multipletests

# Read data from an Excel file
file_path = '/content/size outliers removed.xlsx'  # Replace with your file path
data = pd.read_excel(file_path)

# Specify the dataset columns you want to test
datasets = ['s1', 's2', 's3', 'w1','w2','w3']

# Create a list to hold the results
results = []

# Conduct the Kruskal-Wallis test
all_data = [data[ds].dropna().values for ds in datasets]
kruskal_stat, kruskal_p = kruskal(*all_data)

# Print Kruskal-Wallis test results
print(f"Kruskal-Wallis Test Statistic: {kruskal_stat}")
print(f"Kruskal-Wallis p-value: {kruskal_p}")

# If the Kruskal-Wallis test is significant, conduct pairwise comparisons
if kruskal_p < 0.05:
    for group1, group2 in combinations(datasets, 2):
        # Perform pairwise Kruskal-Wallis test (or Wilcoxon rank-sum test)
        stat, p_value = kruskal(data[group1].dropna().values, data[group2].dropna().values)
        results.append({'Group1': group1, 'Group2': group2, 'Statistic': stat, 'p-value': p_value})

    # Extract p-values for Bonferroni correction
    p_values = [result['p-value'] for result in results]

    # Apply Bonferroni correction
    corrected_results = multipletests(p_values, method='bonferroni')

    # Add corrected p-values to results
    for i, result in enumerate(results):
        result['Corrected p-value'] = corrected_results[1][i]

    # Create a DataFrame for results
    results_df = pd.DataFrame(results)

    # Add Kruskal-Wallis results to the DataFrame
    results_df = pd.concat([pd.DataFrame({'Kruskal-Wallis Statistic': [kruskal_stat], 'Kruskal-Wallis p-value': [kruskal_p]}), results_df], ignore_index=True)

    # Output results to Excel
    results_df.to_excel('kruskal_wallis_bonferroni_results.xlsx', index=False)

    # Print results
    print(results_df)
else:
    print("The Kruskal-Wallis test is not significant; no pairwise comparisons are performed.")

    # Create a DataFrame for the Kruskal-Wallis result
    results_df = pd.DataFrame({'Kruskal-Wallis Statistic': [kruskal_stat], 'Kruskal-Wallis p-value': [kruskal_p]})

    # Output results to Excel
    results_df.to_excel('kruskal_wallis_bonferroni_results.xlsx', index=False)




Kruskal-Wallis Test Statistic: 218.616685991301
Kruskal-Wallis p-value: 2.939540830073813e-45
    Kruskal-Wallis Statistic  Kruskal-Wallis p-value Group1 Group2  \
0                 218.616686            2.939541e-45    NaN    NaN   
1                        NaN                     NaN     s1     s2   
2                        NaN                     NaN     s1     s3   
3                        NaN                     NaN     s1     w1   
4                        NaN                     NaN     s1     w2   
5                        NaN                     NaN     s1     w3   
6                        NaN                     NaN     s2     s3   
7                        NaN                     NaN     s2     w1   
8                        NaN                     NaN     s2     w2   
9                        NaN                     NaN     s2     w3   
10                       NaN                     NaN     s3     w1   
11                       NaN                     NaN     s3     w2