In [None]:
import pandas as pd
import numpy as np

np.random.seed(0)
df_oneway = pd.DataFrame({
    'factor': np.repeat(['A', 'B', 'C'], 10),
    'response': np.random.normal(loc=10, scale=2, size=30)
})
df_oneway.to_excel("dummy_oneway.xlsx", index=False)
df_oneway

In [None]:
# One-way ANOVA
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

df = df_oneway
# Example: response ~ factor
model = ols('response ~ C(factor)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
# display(anova_table)
def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

anova_table['P-value'] = anova_table['PR(>F)']
anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
display(anova_table)

In [None]:
# Extract degrees of freedom and variance (MSE) from ANOVA table
# Get degrees of freedom for error/residual
df_error = anova_table.loc['Residual', 'df'] if 'Residual' in anova_table.index else anova_table.iloc[-1]['df']

# Get Mean Square Error (MSE) for error/residual
mse_error = anova_table.loc['Residual', 'sum_sq'] / anova_table.loc['Residual', 'df'] if 'Residual' in anova_table.index else anova_table.iloc[-1]['sum_sq'] / anova_table.iloc[-1]['df']

print(f"Degrees of Freedom (Error): {df_error}")
print(f"Mean Square Error (MSE): {mse_error}")

# You can now use df_error and mse_error for LSD, Tukey HSD, etc.

In [None]:
# LSD Value Calculation
from scipy.stats import t

# Suppose n = number of replicates per group, means = group means
n = 10  # adjust as per your data
alpha = 0.05
t_critical = t.ppf(1 - alpha/2, df_error)
lsd = t_critical * (2 * mse_error / n) ** 0.5
print(f"LSD value: {lsd}")

In [None]:
# Mean comparison and significance letters using Tukey HSD
from statsmodels.stats.multicomp import MultiComparison

factor_col = 'factor'
response_col = 'response'

# 1. Calculate group means
means = df.groupby(factor_col)[response_col].mean().reset_index()
means.columns = [factor_col, 'Mean']

# 2. Tukey HSD test
mc = MultiComparison(df[response_col], df[factor_col])
tukey_result = mc.tukeyhsd()

# 3. Assign significance letters (robust version)
import numpy as np

def get_significance_letters(tukey_result, group_names):
    n = len(group_names)
    sig_matrix = np.ones((n, n), dtype=bool)
    # Use tukey_result.summary() to get group pairs and reject status
    summary = tukey_result.summary()
    data = summary.data[1:]  # skip header
    group_to_idx = {name: i for i, name in enumerate(group_names)}
    for row in data:
        g1, g2, _, _, _, _, reject = row
        i, j = group_to_idx[g1], group_to_idx[g2]
        sig_matrix[i, j] = not reject
        sig_matrix[j, i] = not reject
    # Assign letters
    letters = [''] * n
    current_letter = 'A'
    assigned = [False] * n
    for i in range(n):
        if not assigned[i]:
            letters[i] += current_letter
            assigned[i] = True
            for j in range(i+1, n):
                if sig_matrix[i, j]:
                    letters[j] += current_letter
                    assigned[j] = True
            current_letter = chr(ord(current_letter) + 1)
    return dict(zip(group_names, letters))

letters_dict = get_significance_letters(tukey_result, tukey_result.groupsunique)
means['Letter'] = means[factor_col].map(letters_dict)

# 4. Display the table
print(means)

In [2]:
np.random.seed(1)
df_twoway = pd.DataFrame({
    'factor1': np.repeat(['X', 'Y'], 15),
    'factor2': np.tile(np.repeat(['M', 'N', 'O'], 5), 2),
    'response': np.random.normal(loc=20, scale=3, size=30)
})
df_twoway.to_excel("dummy_twoway.xlsx", index=False)
df_twoway.head()
# df_twoway = pd.read_excel("D:\\Study\\Study and Extras\\Scientific Work\\Spinach Rehan\\Data Graphs\\Sheet1Tabl.xlsx", sheet_name="Stat Sheet")
df_twoway

Unnamed: 0,factor1,factor2,response
0,X,M,24.873036
1,X,M,18.164731
2,X,M,18.415485
3,X,M,16.781094
4,X,M,22.596223
5,X,N,13.095384
6,X,N,25.234435
7,X,N,17.716379
8,X,N,20.957117
9,X,N,19.251889


In [None]:
# Two-way ANOVA
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import t
import re

def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

def get_lsd_letters_cld(means_sorted, lsd):
    group_names = means_sorted['group'].tolist()
    means = means_sorted['Mean'].values
    n = len(means)
    letter_sets = [set() for _ in range(n)]
    current_letter = ord('a')
    for i in range(n):
        used_letters = set()
        for j in range(i):
            if abs(means[i] - means[j]) > lsd:
                used_letters |= letter_sets[j]
        while chr(current_letter) in used_letters:
            current_letter += 1
        letter_sets[i].add(chr(current_letter))
        for j in range(i+1, n):
            if abs(means[i] - means[j]) <= lsd:
                letter_sets[j].add(chr(current_letter))
    letters = [''.join(sorted(s)) for s in letter_sets]
    return dict(zip(group_names, letters))

def safe_sheet_name(name, suffix=""):
    name = re.sub(r'[\\/*?:\[\]]', '_', str(name))
    base = name[:25]
    return f"{base}{suffix}"

def two_way_anova(df, output_path):
    factor_cols = df.columns[:2].tolist()
    response_cols = df.columns[2:].tolist()
    with pd.ExcelWriter(output_path) as writer:
        for response in response_cols:
            df_sub = df_twoway[factor_cols + [response]].dropna()
            df_sub['group'] = df_sub[factor_cols].astype(str).apply(lambda row: '_'.join(row), axis=1)
            formula = f'{response} ~ C({factor_cols[0]})*C({factor_cols[1]})'
            model = ols(formula, data=df_sub).fit()
            anova_table = sm.stats.anova_lm(model, typ=2)
            anova_table['P-value'] = anova_table['PR(>F)']
            anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
            df_error = anova_table.loc['Residual', 'df'] if 'Residual' in anova_table.index else anova_table.iloc[-1]['df']
            mse_error = anova_table.loc['Residual', 'sum_sq'] / anova_table.loc['Residual', 'df'] if 'Residual' in anova_table.index else anova_table.iloc[-1]['sum_sq'] / anova_table.iloc[-1]['df']
            means = df_sub.groupby('group')[response].agg(['mean', 'count']).reset_index()
            means.columns = ['group', 'Mean', 'n']
            means = means.sort_values('Mean', ascending=False).reset_index(drop=True)
            n_eff = means['n'].min()
            alpha = 0.05
            t_critical = t.ppf(1 - alpha/2, df_error)
            lsd = t_critical * np.sqrt(2 * mse_error / n_eff)
            letters_dict = get_lsd_letters_cld(means, lsd)
            means['Letter'] = means['group'].map(letters_dict)
            base = safe_sheet_name(response)
            anova_table.to_excel(writer, sheet_name=f'{base}_ANOVA')
            pd.DataFrame({'LSD_value': [lsd], 'alpha': [alpha], 't_critical': [t_critical], 'df_error': [df_error], 'mse_error': [mse_error]}).to_excel(writer, sheet_name=f'{base}_LSD', index=False)
            means.to_excel(writer, sheet_name=f'{base}_Means', index=False)

In [None]:
# np.random.seed(2)
# df_threeway = pd.DataFrame({
#     'f1': np.repeat(['P', 'Q'], 12),
#     'f2': np.tile(np.repeat(['R', 'S'], 6), 2),
#     'f3': np.tile(['U', 'V', 'U', 'V', 'U', 'V'], 4),
#     'response': np.random.normal(loc=30, scale=4, size=24)
# })
# df_threeway.to_excel("dummy_threeway.xlsx", index=False)
# df_threeway.head()
df_threeway = pd.read_excel("C:\\Users\\mirza\\OneDrive\\Desktop\\Copy of Mungbean_Excel_sheet(1).xlsx", sheet_name="Sheet3")
df_threeway

In [None]:
# Three Way ANOVA
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import t
from statsmodels.stats.multicomp import MultiComparison
import re

# Load your data
file_path = "C:\\Users\\mirza\\OneDrive\\Desktop\\Copy of Mungbean_Excel_sheet(1).xlsx"
sheet_name = "Sheet3"
df = pd.read_excel(file_path, sheet_name=sheet_name)

# Automatically get factor and response columns
factor_cols = df.columns[:3].tolist()
response_cols = df.columns[3:].tolist()

def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

def get_lsd_letters_cld(means_sorted, lsd):
    group_names = means_sorted['group'].tolist()
    means = means_sorted['Mean'].values
    n = len(means)
    letter_sets = [set() for _ in range(n)]
    current_letter = ord('a')
    for i in range(n):
        used_letters = set()
        for j in range(i):
            if abs(means[i] - means[j]) > lsd:
                used_letters |= letter_sets[j]
        while chr(current_letter) in used_letters:
            current_letter += 1
        letter_sets[i].add(chr(current_letter))
        for j in range(i+1, n):
            if abs(means[i] - means[j]) <= lsd:
                letter_sets[j].add(chr(current_letter))
    letters = [''.join(sorted(s)) for s in letter_sets]
    return dict(zip(group_names, letters))

def safe_sheet_name(name, suffix=""):
    # Remove invalid characters and truncate to 25 chars (to allow suffix)
    name = re.sub(r'[\\/*?:\[\]]', '_', str(name))
    base = name[:25]
    return f"{base}{suffix}"

with pd.ExcelWriter('all_responses_anova_results.xlsx') as writer:
    for idx, response in enumerate(response_cols):
        # Drop NA for this response
        df_sub = df[factor_cols + [response]].dropna()
        # Create group label
        df_sub['group'] = df_sub[factor_cols].astype(str).apply(lambda row: '_'.join(row), axis=1)
        # ANOVA
        formula = f'{response} ~ C({factor_cols[0]})*C({factor_cols[1]})*C({factor_cols[2]})'
        model = ols(formula, data=df_sub).fit()
        anova_table = sm.stats.anova_lm(model, typ=2)
        anova_table['P-value'] = anova_table['PR(>F)']
        anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
        # Degrees of freedom and MSE
        df_error = anova_table.loc['Residual', 'df'] if 'Residual' in anova_table.index else anova_table.iloc[-1]['df']
        mse_error = anova_table.loc['Residual', 'sum_sq'] / anova_table.loc['Residual', 'df'] if 'Residual' in anova_table.index else anova_table.iloc[-1]['sum_sq'] / anova_table.iloc[-1]['df']
        # LSD
        means_3way = df_sub.groupby('group')[response].agg(['mean', 'count']).reset_index()
        means_3way.columns = ['group', 'Mean', 'n']
        means_3way['Original_Index'] = means_3way.index
        means_3way = means_3way.sort_values('Mean', ascending=False).reset_index(drop=True)
        n_eff = means_3way['n'].min()
        alpha = 0.05
        t_critical = t.ppf(1 - alpha/2, df_error)
        lsd = t_critical * np.sqrt(2 * mse_error / n_eff)
        # LSD letters
        letters_dict_3way = get_lsd_letters_cld(means_3way, lsd)
        means_3way['Letter'] = means_3way['group'].map(letters_dict_3way)
        # Safe sheet names
        base = safe_sheet_name(response)
        anova_table.to_excel(writer, sheet_name=f'{base}_ANOVA')
        pd.DataFrame({'LSD_value': [lsd], 'alpha': [alpha], 't_critical': [t_critical], 'df_error': [df_error], 'mse_error': [mse_error]}).to_excel(writer, sheet_name=f'{base}_LSD', index=False)
        means_3way.to_excel(writer, sheet_name=f'{base}_Means', index=False)
print("Exported all ANOVA, LSD, and mean comparison results for all responses.")



In [None]:
# Tukhys HSD

from statsmodels.stats.multicomp import MultiComparison

# Combine all factor columns into a single group label for interaction means
df_threeway['group'] = (
    df_threeway['Treatment'].astype(str) + "_" +
    df_threeway['Cultivar'].astype(str) + "_" +
    df_threeway['Stress'].astype(str)
)

# Calculate means for each interaction group
means_3way = df_threeway.groupby('group')['RL'].mean().reset_index()
means_3way.columns = ['group', 'Mean']

# Sort means descending so highest mean gets 'A'
means_3way = means_3way.sort_values('Mean', ascending=False).reset_index(drop=True)

# Tukey HSD test on the interaction groups
mc_3way = MultiComparison(df_threeway['RL'], df_threeway['group'])
tukey_result_3way = mc_3way.tukeyhsd()

# Assign significance letters, starting from highest mean
import numpy as np

def get_significance_letters_highest_first(tukey_result, means_sorted):
    group_names = means_sorted['group'].tolist()
    n = len(group_names)
    sig_matrix = np.ones((n, n), dtype=bool)
    summary = tukey_result.summary()
    data = summary.data[1:]  # skip header
    group_to_idx = {name: i for i, name in enumerate(group_names)}
    for row in data:
        g1, g2, _, _, _, _, reject = row
        if g1 in group_to_idx and g2 in group_to_idx:
            i, j = group_to_idx[g1], group_to_idx[g2]
            sig_matrix[i, j] = not reject
            sig_matrix[j, i] = not reject
    letters = [''] * n
    current_letter = 'A'
    assigned = [False] * n
    for i in range(n):
        if not assigned[i]:
            letters[i] += current_letter
            assigned[i] = True
            for j in range(i+1, n):
                if sig_matrix[i, j]:
                    letters[j] += current_letter
                    assigned[j] = True
            current_letter = chr(ord(current_letter) + 1)
    return dict(zip(group_names, letters))

letters_dict_3way = get_significance_letters_highest_first(tukey_result_3way, means_3way)
means_3way['Letter'] = means_3way['group'].map(letters_dict_3way)

print(means_3way)

In [None]:
# LSD Value Calculation for Three-way ANOVA for single letters
import numpy as np
from scipy.stats import t

# Combine all factor columns into a single group label for interaction means
df_threeway['group'] = (
    df_threeway['Treatment'].astype(str) + "_" +
    df_threeway['Cultivar'].astype(str) + "_" +
    df_threeway['Stress'].astype(str)
)

# Calculate means for each interaction group
means_3way = df_threeway.groupby('group')['RL'].mean().reset_index()
means_3way.columns = ['group', 'Mean']

# Sort means descending so highest mean gets 'A'
means_3way = means_3way.sort_values('Mean', ascending=False).reset_index(drop=True)

# Calculate LSD value
# You must have already calculated mse_error and df_error from your ANOVA table
# n = number of replicates per group (adjust as needed)
n = 3
alpha = 0.05
t_critical = t.ppf(1 - alpha/2, df_error)
lsd = t_critical * np.sqrt(2 * mse_error / n)

# Assign significance letters using LSD
def get_lsd_letters(means_sorted, lsd):
    group_names = means_sorted['group'].tolist()
    means = means_sorted['Mean'].values
    n = len(means)
    sig_matrix = np.ones((n, n), dtype=bool)
    # Compare all pairs
    for i in range(n):
        for j in range(i+1, n):
            if abs(means[i] - means[j]) > lsd:
                sig_matrix[i, j] = False
                sig_matrix[j, i] = False
    # Assign letters
    letters = [''] * n
    current_letter = 'A'
    assigned = [False] * n
    for i in range(n):
        if not assigned[i]:
            letters[i] += current_letter
            assigned[i] = True
            for j in range(i+1, n):
                if sig_matrix[i, j]:
                    letters[j] += current_letter
                    assigned[j] = True
            current_letter = chr(ord(current_letter) + 1)
    return dict(zip(group_names, letters))

letters_dict_3way = get_lsd_letters(means_3way, lsd)
means_3way['Letter'] = means_3way['group'].map(letters_dict_3way)

print(means_3way)

In [None]:
np.random.seed(3)
df_fourway = pd.DataFrame({
    'f1': np.repeat(['A', 'B'], 16),
    'f2': np.tile(np.repeat(['C', 'D'], 8), 2),
    'f3': np.tile(['E', 'F'], 16),
    'f4': np.tile(['G', 'H', 'G', 'H'], 8),
    'response': np.random.normal(loc=40, scale=5, size=32)
})
df_fourway.to_excel("dummy_fourway.xlsx", index=False)
df_fourway.head()

In [5]:
import pandas as pd
import numpy as np
from itertools import product

def generate_dummy_4way_data():
    # Factor levels
    levels_A = ['A1', 'A2']
    levels_B = ['B1', 'B2']
    levels_C = ['C1', 'C2', 'C3']
    levels_D = ['D1', 'D2']

    # Number of replicates per combination
    replicates = 3

    # Create all combinations of factor levels
    combinations = list(product(levels_A, levels_B, levels_C, levels_D))
    data = []

    np.random.seed(42)  # Reproducibility

    for a, b, c, d in combinations:
        # Base effect per factor
        base = 10
        a_effect = 5 if a == 'A2' else 0
        b_effect = 3 if b == 'B2' else 0
        c_effect = {'C1': 0, 'C2': 4, 'C3': -2}[c]
        d_effect = 2 if d == 'D2' else 0

        # Simulate interaction and random noise
        interaction_effect = (a_effect + b_effect) * (c_effect + d_effect) * 0.1
        for _ in range(replicates):
            noise = np.random.normal(0, 1.5)
            response = base + a_effect + b_effect + c_effect + d_effect + interaction_effect + noise
            data.append([a, b, c, d, response])

    df = pd.DataFrame(data, columns=['Factor_A', 'Factor_B', 'Factor_C', 'Factor_D', 'Response'])
    return df

# Generate and save
df_4way = generate_dummy_4way_data()
df_4way.to_excel("dummy_4way_anova_data.xlsx", index=False)
print("Dummy 4-way ANOVA data saved as 'dummy_4way_anova_data.xlsx'")


Dummy 4-way ANOVA data saved as 'dummy_4way_anova_data.xlsx'


In [None]:
# Four-way ANOVA
model = ols('response ~ C(f1)*C(f2)*C(f3)*C(f4)', data=df_fourway).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

anova_table['P-value'] = anova_table['PR(>F)']
anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
display(anova_table)

In [None]:
np.random.seed(4)
df_crd = pd.DataFrame({
    'treatment': np.repeat(['T1', 'T2', 'T3', 'T4'], 8),
    'response': np.random.normal(loc=15, scale=2, size=32)
})
df_crd.to_excel("dummy_crd.xlsx", index=False)
df_crd.head()

In [None]:
# CRD (Completely Randomized Design)
# Same as one-way ANOVA, just use your treatment column
model = ols('response ~ C(treatment)', data=df_crd).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

anova_table['P-value'] = anova_table['PR(>F)']
anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
display(anova_table)

In [None]:
np.random.seed(5)
df_rcbd = pd.DataFrame({
    'treatment': np.tile(['T1', 'T2', 'T3', 'T4'], 5),
    'block': np.repeat(['B1', 'B2', 'B3', 'B4', 'B5'], 4),
    'response': np.random.normal(loc=18, scale=2, size=20)
})
df_rcbd.to_excel("dummy_rcbd.xlsx", index=False)
df_rcbd.head()

In [None]:
# RCBD (Randomized Complete Block Design)
model = ols('response ~ C(treatment) + C(block)', data=df_rcbd).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

anova_table['P-value'] = anova_table['PR(>F)']
anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
display(anova_table)

In [None]:
np.random.seed(6)
df_splitplot = pd.DataFrame({
    'mainplot': np.repeat(['M1', 'M2'], 12),
    'subplot': np.tile(np.repeat(['S1', 'S2'], 3), 4),
    'response': np.random.normal(loc=22, scale=3, size=24)
})
df_splitplot.to_excel("dummy_splitplot.xlsx", index=False)
df_splitplot.head()

In [None]:
# Split-plot (basic)
model = ols('response ~ C(mainplot) * C(subplot)', data=df_splitplot).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

anova_table['P-value'] = anova_table['PR(>F)']
anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
display(anova_table)

In [None]:
np.random.seed(7)
df_splitsplit = pd.DataFrame({
    'mainplot': np.repeat(['M1', 'M2'], 8),
    'subplot': np.tile(np.repeat(['S1', 'S2'], 2), 4),
    'subsubplot': np.tile(['SS1', 'SS2'], 8),
    'response': np.random.normal(loc=25, scale=3, size=16)
})
df_splitsplit.to_excel("dummy_splitsplit.xlsx", index=False)
df_splitsplit.head()

In [None]:
# Split-split plot (basic)
model = ols('response ~ C(mainplot) * C(subplot) * C(subsubplot)', data=df_splitsplit).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

anova_table['P-value'] = anova_table['PR(>F)']
anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
display(anova_table)

In [None]:
np.random.seed(8)
df_latin = pd.DataFrame({
    'row': np.tile(['R1', 'R2', 'R3', 'R4'], 4),
    'column': np.repeat(['C1', 'C2', 'C3', 'C4'], 4),
    'treatment': np.tile(['T1', 'T2', 'T3', 'T4'], 4),
    'response': np.random.normal(loc=28, scale=2, size=16)
})
df_latin.to_excel("dummy_latin_square.xlsx", index=False)
df_latin.head()

In [None]:
# Latin Square
model = ols('response ~ C(row) + C(column) + C(treatment)', data=df_latin).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

anova_table['P-value'] = anova_table['PR(>F)']
anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
display(anova_table)

In [None]:
np.random.seed(9)
df_splitblock = pd.DataFrame({
    'block1': np.repeat(['B1', 'B2'], 8),
    'block2': np.tile(['B3', 'B4'], 8),
    'treatment': np.tile(['T1', 'T2'], 8),
    'response': np.random.normal(loc=32, scale=2, size=16)
})
df_splitblock.to_excel("dummy_splitblock.xlsx", index=False)
df_splitblock.head()

In [None]:
# Split block (basic)
model = ols('response ~ C(block1) + C(block2) + C(treatment)', data=df_splitblock).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

anova_table['P-value'] = anova_table['PR(>F)']
anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
display(anova_table)

In [None]:
np.random.seed(10)
df_stripsplit = pd.DataFrame({
    'strip1': np.repeat(['S1', 'S2'], 8),
    'strip2': np.tile(['S3', 'S4'], 8),
    'subplot': np.tile(['SP1', 'SP2'], 8),
    'response': np.random.normal(loc=35, scale=2, size=16)
})
df_stripsplit.to_excel("dummy_stripsplit.xlsx", index=False)
df_stripsplit.head()

In [None]:
# Strip-split (basic)
model = ols('response ~ C(strip1) * C(strip2) * C(subplot)', data=df_stripsplit).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

anova_table['P-value'] = anova_table['PR(>F)']
anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
display(anova_table)

In [None]:
np.random.seed(11)
df_splsqr = pd.DataFrame({
    'row': np.tile(['R1', 'R2', 'R3'], 6),
    'column': np.repeat(['C1', 'C2', 'C3'], 6),
    'mainplot': np.tile(['M1', 'M2'], 9),
    'subplot': np.tile(['S1', 'S2', 'S3'], 6),
    'response': np.random.normal(loc=38, scale=2, size=18)
})
df_splsqr.to_excel("dummy_splitplot_latin_square.xlsx", index=False)
df_splsqr.head()

In [None]:
# Split-plot Latin Square (basic)
model = ols('response ~ C(row) + C(column) + C(mainplot) * C(subplot)', data=df_splsqr).fit()
anova_table = sm.stats.anova_lm(model, typ=2)
def significance_stars(p):
    if p < 0.001:
        return '***'
    elif p < 0.01:
        return '**'
    elif p < 0.05:
        return '*'
    elif p < 0.1:
        return '.'
    else:
        return 'ns'

anova_table['P-value'] = anova_table['PR(>F)']
anova_table['Signif.'] = anova_table['PR(>F)'].apply(significance_stars)
display(anova_table)

In [None]:
# Export ANOVA table to text
anova_table.to_csv("anova_table.txt", sep="\t")

# Export to docx
from docx import Document
doc = Document()
doc.add_heading('ANOVA Table', 0)
t = doc.add_table(rows=1, cols=len(anova_table.columns)+1)
hdr_cells = t.rows[0].cells
hdr_cells[0].text = 'Source'
for i, col in enumerate(anova_table.columns):
    hdr_cells[i+1].text = col
for idx, row in anova_table.iterrows():
    row_cells = t.add_row().cells
    row_cells[0].text = str(idx)
    for i, val in enumerate(row):
        row_cells[i+1].text = str(val)
doc.save("anova_table.docx")

In [None]:
# Export to PDF
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
fig, ax = plt.subplots(figsize=(8,2))
ax.axis('off')
tbl = ax.table(cellText=anova_table.values, colLabels=anova_table.columns, rowLabels=anova_table.index, loc='center')
plt.tight_layout()
pdf = PdfPages("anova_table.pdf")
pdf.savefig(fig)
pdf.close()

In [None]:
# Export to XML
import xml.etree.ElementTree as ET
root = ET.Element("ANOVATable")
for idx, row in anova_table.iterrows():
    entry = ET.SubElement(root, "Row", source=str(idx))
    for col, val in row.items():
        ET.SubElement(entry, col).text = str(val)
tree = ET.ElementTree(root)
tree.write("anova_table.xml")