In [5]:
# IMPORTS
import pandas as pd
import numpy as np
from scipy import stats 
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import matplotlib.pyplot as plt
# HEADERS
headers = ['Survivorship Wk5', 'Average # Flowers', 'Average Height (mm)', 'Average # Pods', 'Total Biomass']

# DATA FRAMES
df = pd.read_csv('plant_data.csv')
df_high = df[df['# Seeds Planted'] == 'HIGH (10)']
df_med = df[df['# Seeds Planted'] == 'MED (5)']
df_low = df[df['# Seeds Planted'] == 'LOW (2)']


KeyboardInterrupt: 

In [None]:
# HELPER FUNCTION
def clean_data(df):
    df = df.copy()
    for header in headers:
        df[header] = pd.to_numeric(df[header], errors='coerce')
    return df

In [None]:
# ANOVA CALCULATIONS
def fp_values(header, low, med, high):
    f_val, p_val = stats.f_oneway(high[header], med[header], low[header])
    return f_val, p_val

In [None]:
# PAIRWISE TUKEY HSD CALCULATIONS
def post_hoc(col):
    data = pd.DataFrame({
        'values': np.concatenate([df_low[col], df_med[col], df_high[col]]),
        'group': np.array(['low']*len(df_low[col]) + ['med']*len(df_med[col]) + ['high']*len(df_high[col]))
    })

    result = pairwise_tukeyhsd(endog=data['values'], groups=data['group'], alpha=0.05)
    return result

In [None]:
# INVALID INPUT CALCULATION (UNCOMMENT OUT TO SEE NUMBER OF INVALID INPUTS)
# high = df_high.isna().sum() + (df_high == '#DIV/0!').sum()
# med = df_med.isna().sum() + (df_med == '#DIV/0!').sum()
# low = df_low.isna().sum() + (df_low == '#DIV/0!').sum()
# DISPLAY INVALID VALUES
# display('High: ')
# display(high)
# display('Med: ')
# display(med)
# display('Low: ')
# display(low)

In [None]:
# DATA CLEANING/SETUP
df.fillna(0, inplace=True)
df.replace('#DIV/0!', 0, inplace=True)
df_high = clean_data(df_high)
df_med = clean_data(df_med)
df_low = clean_data(df_low)

# DATA TABLE VISUAL
# display(df_high)
# display(df_med)
# display(df_low)

In [None]:
# ANOVA CALCULATIONS
for header in headers:
    f_val, p_val = fp_values(header, df_low, df_med, df_high)
    print(f'{header}: F-Value: {f_val}, P-Value: {p_val}')

Survivorship Wk5: F-Value: 4.344637254522216, P-Value: 0.013560213015472153
Average # Flowers: F-Value: 20.55386109857788, P-Value: 3.0276985842356508e-09
Average Height (mm): F-Value: 1.1219340653581618, P-Value: 0.3266164240227396
Average # Pods: F-Value: 20.072799642339046, P-Value: 4.695994752707304e-09
Total Biomass: F-Value: 1.6935666818621093, P-Value: 0.18510683706014305


In [None]:
# POST_HOC CALCULATIONS AND MATLAB VISUALS
post_hoc = {header: post_hoc(header) for header in headers}
for header, result in post_hoc.items():
    display(header)
    display(result.summary())
    fig = result.plot_simultaneous()
    fig.suptitle(f'Post_Hoc results for {header}')
    fig.savefig(f'{header}_post_hoc.png')
    plt.close(fig)

'Survivorship Wk5'

group1,group2,meandiff,p-adj,lower,upper,reject
high,low,11.6901,0.0095,2.3591,21.0212,True
high,med,6.1268,0.2713,-3.2043,15.4578,False
low,med,-5.5634,0.3406,-14.8945,3.7677,False


'Average # Flowers'

group1,group2,meandiff,p-adj,lower,upper,reject
high,low,4.6356,0.0,2.9198,6.3515,True
high,med,1.7774,0.0403,0.0616,3.4933,True
low,med,-2.8582,0.0003,-4.5741,-1.1424,True


'Average Height (mm)'

group1,group2,meandiff,p-adj,lower,upper,reject
high,low,46.2594,0.2983,-26.9256,119.4443,False
high,med,28.0756,0.6392,-45.1093,101.2605,False
low,med,-18.1838,0.8286,-91.3687,55.0011,False


'Average # Pods'

group1,group2,meandiff,p-adj,lower,upper,reject
high,low,6.0753,0.0,3.8145,8.336,True
high,med,3.4073,0.0013,1.1466,5.6681,True
low,med,-2.668,0.0158,-4.9287,-0.4072,True


'Total Biomass'

group1,group2,meandiff,p-adj,lower,upper,reject
high,low,-5.2144,0.1624,-11.931,1.5023,False
high,med,-3.1772,0.5068,-9.8939,3.5395,False
low,med,2.0372,0.7557,-4.6795,8.7538,False
