<h1 style='font-size: 45px; color: crimson; font-family: Colonna MT; font-weight: 600; text-align: center'>Kruskal-Wallis Test | Comparing Group Mediun Differences </h1>

---

<h2 style='color: gray; font-weight: 600; font-size: 20px; text-align: left; font-style: italic;'>1.0. Perform Kruskal-Wallis Test on All Variables Across Group(s)</h2>

In [48]:
from scipy.stats import kruskal
import pandas as pd
import numpy as np

def kruskall_wallis(df, group_columns: str, numerical_columns: list = None):
    if numerical_columns is None:
        numerical_columns = df.select_dtypes(include=[np.number]).columns.tolist()
        for g in group_columns:
            if g in numerical_columns:
                numerical_columns.remove(g)
    results = []
    for group_column in group_columns:
        for column in numerical_columns:
            # Create a list of samples grouped by group_column
            groups = [group[column].dropna().values for name, group in df.groupby(group_column)]
            stats, p_value = kruskal(*groups)
            interpretation = '✔' if p_value < 0.05 else '✖'
            results.append({
                'Group': group_column,
                'Variables': column,
                'Kruskal-Wallis Statistic': stats,
                'P-value': p_value,
                'Significant (α<0.05)': interpretation
            })
    return pd.DataFrame(results)


<h2 style='color: gray; font-weight: 600; font-size: 20px; text-align: left; font-style: italic;'>2.0. Dataset | Suppose You Have DataFrame Like This:</h2>

In [67]:
df = pd.DataFrame({
    'Group 1': ['Ashura', 'Ashura', 'Ashura', 'Barack', 'Barack', 'Barack', 'Colins', 'Colins', 'Colins'],
    'Group 2': ['Orenge', 'Orenge', 'Orenge', 'Banana', 'Banana', 'Banana', 'Carott', 'Carott', 'Carott'],
    'Group 3': ['Alpha', 'Alpha', 'Alpha', 'Bravo', 'Bravo', 'Bravo', 'Eagle', 'Eagle', 'Eagle'],
    'Variable 1': [12, 14, 13, 15, 16, 14, 10, 9, 11],
    'Variable 2': [7, 6, 7, 8, 9, 10, 5, 6, 5],
    'Variable 3': [20, 21, 19, 23, 22, 21, 18, 17, 19],
    'Variable 4': [124, 145, 137, 150, 163, 148, 180, 90, 111],
    'Variable 5': [70, 66, 75, 80, 92, 100, 56, 64, 56],
    'Variable 6': [2, 2, 1, 2, 2, 2, 1, 1, 1]
})

display(df)

Unnamed: 0,Group 1,Group 2,Group 3,Variable 1,Variable 2,Variable 3,Variable 4,Variable 5,Variable 6
0,Ashura,Orenge,Alpha,12,7,20,124,70,2
1,Ashura,Orenge,Alpha,14,6,21,145,66,2
2,Ashura,Orenge,Alpha,13,7,19,137,75,1
3,Barack,Banana,Bravo,15,8,23,150,80,2
4,Barack,Banana,Bravo,16,9,22,163,92,2
5,Barack,Banana,Bravo,14,10,21,148,100,2
6,Colins,Carott,Eagle,10,5,18,180,56,1
7,Colins,Carott,Eagle,9,6,17,90,64,1
8,Colins,Carott,Eagle,11,5,19,111,56,1


<h2 style='color: gray; font-weight: 600; font-size: 20px; text-align: left; font-style: italic;'>3.0. Implimentation | Perform Kruskal-Wallis Test on All Variables Across Group(s)</h2>

In [68]:
group_columns = ['Group 1', 'Group 2', 'Group 3']                   # List of categorical columns or factors
numerical_columns = df.select_dtypes(include=[np.number]).columns   # List of numerical variables
results = kruskall_wallis(df, group_columns, numerical_columns)     # Perform test Kruskall Wallis Test
pd.set_option('display.float_format', lambda x: '%.4f' % x)         # Display Configuration
display(results)

Unnamed: 0,Group,Variables,Kruskal-Wallis Statistic,P-value,Significant (α<0.05)
0,Group 1,Variable 1,6.8796,0.0321,✔
1,Group 1,Variable 2,6.9972,0.0302,✔
2,Group 1,Variable 3,6.5311,0.0382,✔
3,Group 1,Variable 4,2.4,0.3012,✖
4,Group 1,Variable 5,7.2605,0.0265,✔
5,Group 1,Variable 6,5.6,0.0608,✖
6,Group 2,Variable 1,6.8796,0.0321,✔
7,Group 2,Variable 2,6.9972,0.0302,✔
8,Group 2,Variable 3,6.5311,0.0382,✔
9,Group 2,Variable 4,2.4,0.3012,✖



<h2 style='color: gray; font-weight: 600; font-size: 20px; text-align: left; font-style: italic;'>4.0. Dataset From External Directory | Perform Kruskal-Wallis Test</h2>

In [70]:
filepath = "./Datasets/Eggplant Fusarium Fresistance Data.csv"      # File path also it can be url
df = pd.read_csv(filepath)
display(df)

Unnamed: 0,Variety,Resistance Level,Replication ID,Infection Severity (%),Wilt index,Plant height (cm),Days to wilt symptoms,Survival rate (%),Disease incidence (%)
0,EP-R1,Resistant,1,22.5000,0.7000,88.9000,21,88.8000,23.4000
1,EP-R1,Resistant,2,27.9000,1.2000,82.2000,19,87.7000,21.7000
2,EP-R1,Resistant,3,21.2000,0.0000,74.7000,17,84.9000,27.2000
3,EP-R1,Resistant,4,15.5000,0.1000,93.8000,18,90.3000,15.0000
4,EP-R1,Resistant,5,17.3000,0.9000,78.1000,19,87.0000,23.0000
...,...,...,...,...,...,...,...,...,...
795,EP-S3,Susceptible,96,75.2000,3.6000,68.2000,7,6.4000,85.5000
796,EP-S3,Susceptible,97,74.8000,4.9000,59.5000,4,27.2000,82.0000
797,EP-S3,Susceptible,98,58.1000,3.6000,78.8000,7,30.8000,75.4000
798,EP-S3,Susceptible,99,54.1000,4.1000,63.7000,7,24.1000,81.8000


In [71]:
# Perform Kruskal-Wallis Test on All Variables Across Group(s)
group_columns = ['Variety', 'Resistance Level']                     # List of categorical columns or factors
numerical_columns = df.select_dtypes(include=[np.number]).columns   # List of numerical variables
results = kruskall_wallis(df, group_columns, numerical_columns)     # Perform test Kruskall Wallis Test
pd.set_option('display.float_format', lambda x: '%.4f' % x)         # Display Configuration
display(results)

Unnamed: 0,Group,Variables,Kruskal-Wallis Statistic,P-value,Significant (α<0.05)
0,Variety,Replication ID,0.0,1.0,✖
1,Variety,Infection Severity (%),682.4852,0.0,✔
2,Variety,Wilt index,669.3773,0.0,✔
3,Variety,Plant height (cm),597.986,0.0,✔
4,Variety,Days to wilt symptoms,701.1451,0.0,✔
5,Variety,Survival rate (%),698.1517,0.0,✔
6,Variety,Disease incidence (%),692.2784,0.0,✔
7,Resistance Level,Replication ID,0.0,1.0,✖
8,Resistance Level,Infection Severity (%),682.1712,0.0,✔
9,Resistance Level,Wilt index,668.7051,0.0,✔
