<h1 style='font-size: 35px; color: crimson; font-family: Colonna MT; font-weight: 600; text-align: center'>Tukeys Honest Significant Difference (THSD)</h1>

---

In [18]:
# Import Required Libraries
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import pandas as pd

# Performe Turkey THSD for one variable at a time
# variable = 'Infection Severity (%)'
# group_column = 'Variety'
# turkey = pairwise_tukeyhsd(endog=df[Variable], groups=df[group_column], alpha=0.05)
# print(turkey)

# Automate test over multiple variables and retur results in formated DataFrame
def compute_turkey_results(df, Metrics, group_column):
    results_data = []
    for metric in Metrics:
        turkey_results = pairwise_tukeyhsd(endog=df[metric], groups=df[group_column], alpha=0.05)
        results_table = turkey_results.summary()
        
        for i in range(1, len(results_table)):
            row = results_table.data[i]
            results_data.append({
                'Category': group_column,
                'Metric': metric,
                'Group1': row[0],
                'Group2': row[1],
                'Mean Difference': row[2],
                'P-Value': row[3],
                'Lower CI': row[4],
                'Upper CI': row[5],
                'Reject Null': row[6]
            })
        
        result_df = pd.DataFrame(results_data)
    return result_df

In [16]:
# Import Cleaned and Structured Dataset
filepath = "./Datasets/Eggplant Fusarium Fertilizer Data.csv"
df = pd.read_csv(filepath)
display(df)

Unnamed: 0,Variety,Fertilizer,Replication ID,Infection Severity (%),Wilt index,Plant height (cm),Days to wilt symptoms,Survival rate (%),Disease incidence (%)
0,EP-R1,Inorganic,1,22.500,0.700,88.900,21,88.800,23.400
1,EP-R1,Inorganic,2,27.900,1.200,82.200,19,87.700,21.700
2,EP-R1,Inorganic,3,21.200,0.000,74.700,17,84.900,27.200
3,EP-R1,Inorganic,4,15.500,0.100,93.800,18,90.300,15.000
4,EP-R1,Inorganic,5,17.300,0.900,78.100,19,87.000,23.000
...,...,...,...,...,...,...,...,...,...
795,EP-S3,Control,96,75.200,3.600,68.200,7,6.400,85.500
796,EP-S3,Control,97,74.800,4.900,59.500,4,27.200,82.000
797,EP-S3,Control,98,58.100,3.600,78.800,7,30.800,75.400
798,EP-S3,Control,99,54.100,4.100,63.700,7,24.100,81.800


In [7]:
# Performe Turkey THSD for one variable at a time
variable = 'Infection Severity (%)'
group_column = 'Variety'
turkey = pairwise_tukeyhsd(endog=df[variable], groups=df[group_column], alpha=0.05)
print(turkey)

 Multiple Comparison of Means - Tukey HSD, FWER=0.05  
group1 group2 meandiff p-adj   lower    upper   reject
------------------------------------------------------
 EP-M1  EP-M2     1.27 0.9603  -2.2944   4.8344  False
 EP-M1  EP-R1  -23.478    0.0 -27.0424 -19.9136   True
 EP-M1  EP-R2  -23.354    0.0 -26.9184 -19.7896   True
 EP-M1  EP-R3  -23.273    0.0 -26.8374 -19.7086   True
 EP-M1  EP-S1   31.092    0.0  27.5276  34.6564   True
 EP-M1  EP-S2   29.821    0.0  26.2566  33.3854   True
 EP-M1  EP-S3   29.742    0.0  26.1776  33.3064   True
 EP-M2  EP-R1  -24.748    0.0 -28.3124 -21.1836   True
 EP-M2  EP-R2  -24.624    0.0 -28.1884 -21.0596   True
 EP-M2  EP-R3  -24.543    0.0 -28.1074 -20.9786   True
 EP-M2  EP-S1   29.822    0.0  26.2576  33.3864   True
 EP-M2  EP-S2   28.551    0.0  24.9866  32.1154   True
 EP-M2  EP-S3   28.472    0.0  24.9076  32.0364   True
 EP-R1  EP-R2    0.124    1.0  -3.4404   3.6884  False
 EP-R1  EP-R3    0.205    1.0  -3.3594   3.7694  False
 EP-R1  EP

In [19]:
# Main execution
if __name__ == "__main__":
    # Load data
    DATA_PATH = "./Datasets/Eggplant Fusarium Fertilizer Data.csv"
    df = pd.read_csv(DATA_PATH)
    
    # Define metrics to analyze
    ANALYSIS_METRICS = [
        'Infection Severity (%)',
        'Wilt index', 
        'Plant height (cm)', 
        'Days to wilt symptoms', 
        'Survival rate (%)', 
        'Disease incidence (%)'
    ]
    
    # Perform analysis
    tukey_results = compute_turkey_results(df, ANALYSIS_METRICS, group_column='Variety')
    
    # Configure display options
    pd.set_option("display.float_format", "{:.3f}".format)
    
    # Show first 10 results
    display(tukey_results.head(10))

Unnamed: 0,Category,Metric,Group1,Group2,Mean Difference,P-Value,Lower CI,Upper CI,Reject Null
0,Variety,Infection Severity (%),EP-M1,EP-M2,1.27,0.96,-2.294,4.834,False
1,Variety,Infection Severity (%),EP-M1,EP-R1,-23.478,0.0,-27.042,-19.914,True
2,Variety,Infection Severity (%),EP-M1,EP-R2,-23.354,0.0,-26.918,-19.79,True
3,Variety,Infection Severity (%),EP-M1,EP-R3,-23.273,0.0,-26.837,-19.709,True
4,Variety,Infection Severity (%),EP-M1,EP-S1,31.092,0.0,27.528,34.656,True
5,Variety,Infection Severity (%),EP-M1,EP-S2,29.821,0.0,26.257,33.385,True
6,Variety,Infection Severity (%),EP-M1,EP-S3,29.742,0.0,26.178,33.306,True
7,Variety,Infection Severity (%),EP-M2,EP-R1,-24.748,0.0,-28.312,-21.184,True
8,Variety,Infection Severity (%),EP-M2,EP-R2,-24.624,0.0,-28.188,-21.06,True
9,Variety,Infection Severity (%),EP-M2,EP-R3,-24.543,0.0,-28.107,-20.979,True
