In [2]:
import pandas as pd
from scipy import stats

# Load Excel file
file_path = 'morfinaltothehighestlevel.xlsx'
df = pd.read_excel(file_path)

# Clean column names
df.columns = df.columns.str.strip()

# Separate groups
manual_group = df[df['Group'] == 'Manual']
automated_group = df[df['Group'] == 'Automated']

# Identify numeric columns
numeric_columns = df.select_dtypes(include=['number']).columns.tolist()
if 'Seed ID' in numeric_columns:
    numeric_columns.remove('Seed ID')

print("🔍 Statistical Comparison Between Manual and Automated Groups\n")

for col in numeric_columns:
    print(f"📈 Analyzing '{col}':")
    
    # Normality check using Shapiro-Wilk test
    stat_m, p_m = stats.shapiro(manual_group[col])
    stat_a, p_a = stats.shapiro(automated_group[col])
    
    normal_m = p_m > 0.05
    normal_a = p_a > 0.05
    
    print(f"  - Shapiro Manual: W = {stat_m:.4f}, p = {p_m:.4f} → {'Normal' if normal_m else 'Not normal'}")
    print(f"  - Shapiro Auto:   W = {stat_a:.4f}, p = {p_a:.4f} → {'Normal' if normal_a else 'Not normal'}")

    if normal_m and normal_a:
        # Both are normally distributed → use Welch’s t-test
        t_stat, p_val = stats.ttest_ind(manual_group[col], automated_group[col], equal_var=False)
        test_name = "Welch’s t-test"
        print(f"  ✅ {test_name}: t = {t_stat:.4f}, p = {p_val:.4f}")
    else:
        # At least one not normal → use Mann-Whitney U test
        u_stat, p_val = stats.mannwhitneyu(manual_group[col], automated_group[col], alternative='two-sided')
        test_name = "Mann-Whitney U test"
        print(f"  ✅ {test_name}: U = {u_stat:.4f}, p = {p_val:.4f}")
    
    print("-" * 50)


🔍 Statistical Comparison Between Manual and Automated Groups

📈 Analyzing 'Germinated':
  - Shapiro Manual: W = 0.8423, p = 0.0000 → Not normal
  - Shapiro Auto:   W = 0.8648, p = 0.0000 → Not normal
  ✅ Mann-Whitney U test: U = 7632.5000, p = 0.7338
--------------------------------------------------
📈 Analyzing 'GerminationRate':
  - Shapiro Manual: W = 0.9217, p = 0.0000 → Not normal
  - Shapiro Auto:   W = 0.8705, p = 0.0000 → Not normal
  ✅ Mann-Whitney U test: U = 171.5000, p = 0.0000
--------------------------------------------------
📈 Analyzing 'SeedHeight':
  - Shapiro Manual: W = 1.0000, p = 1.0000 → Normal
  - Shapiro Auto:   W = 1.0000, p = 1.0000 → Normal
  ✅ Welch’s t-test: t = nan, p = nan
--------------------------------------------------
📈 Analyzing 'RootLength':
  - Shapiro Manual: W = 0.6719, p = 0.0000 → Not normal
  - Shapiro Auto:   W = 0.7142, p = 0.0000 → Not normal
  ✅ Mann-Whitney U test: U = 13156.0000, p = 0.0000
----------------------------------------------

  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)
