In [27]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import f_oneway
from scipy.stats import norm
import statsmodels.stats.multicomp as mc


In [28]:
# Manually creating the table based on the provided image
data = [
    [6.7, 14.2, 11.5, 8.6],
    [9.1, 13.9, 12.7, 14.1],
    [4.8, 12.5, 16.3, 9.4],
    [5.6, 9.6, 14.4, 12.0],
    [7.2, 11.2, 15.1, 7.9],
    [5.1, 13.1, 9.3, 13.3],
    [8.8, 18.6, 8.9, 11.4],
    [7.4, 16.2, 14.5, 11.7]
]

# Creating a DataFrame
df = pd.DataFrame(data, columns=["control", "z", "y", "x"])

In [29]:
df

Unnamed: 0,control,z,y,x
0,6.7,14.2,11.5,8.6
1,9.1,13.9,12.7,14.1
2,4.8,12.5,16.3,9.4
3,5.6,9.6,14.4,12.0
4,7.2,11.2,15.1,7.9
5,5.1,13.1,9.3,13.3
6,8.8,18.6,8.9,11.4
7,7.4,16.2,14.5,11.7


In [30]:

# Perform ANOVA test between Col2 and Col3
anova_result = f_oneway(df['x'], df['y'])

# Display the ANOVA test results
anova_result


F_onewayResult(statistic=2.0661816712134993, pvalue=0.17257203321426834)

In [31]:
print(f'The p-value is {anova_result.pvalue:.4f} which is less than 0.05, so we reject the null hypothesis.')

The p-value is 0.1726 which is less than 0.05, so we reject the null hypothesis.


In [32]:
avg_z = df['z'].mean()
std_z = df['z'].std()

In [33]:
avg_z, std_z

(13.6625, 2.813202293269566)

In [34]:
# assume avg_z is the mean of the z column and std_z is the standard deviation of the z column
# calculate the z-score for each value in the z column
z_scores = (14.374 - avg_z) / std_z
p_less_avg = norm.cdf(z_scores)
p_greater_avg = 1 - p_less_avg
print(f'The probability of a value in the z column being more then the 14.37 is {p_greater_avg.mean():.4f}')

The probability of a value in the z column being more then the 14.37 is 0.4002


In [35]:
values = list(df['control'].values) + list(df['z'].values) + list(df['y'].values) + list(df['x'].values)
labels = ['control' for _  in df['control'].values] + ['z' for _  in df['z'].values] + ['y' for _  in df['y'].values] + ['x' for _  in df['x'].values]
print(f'Values: {values}')
print(f'Labels: {labels}')

Values: [6.7, 9.1, 4.8, 5.6, 7.2, 5.1, 8.8, 7.4, 14.2, 13.9, 12.5, 9.6, 11.2, 13.1, 18.6, 16.2, 11.5, 12.7, 16.3, 14.4, 15.1, 9.3, 8.9, 14.5, 8.6, 14.1, 9.4, 12.0, 7.9, 13.3, 11.4, 11.7]
Labels: ['control', 'control', 'control', 'control', 'control', 'control', 'control', 'control', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'z', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'y', 'x', 'x', 'x', 'x', 'x', 'x', 'x', 'x']


In [38]:
# Perform Tukey's HSD Test
comp = mc.MultiComparison(values, labels)
tukey_result = comp.tukeyhsd()

# Print the test results
print(tukey_result.summary())


 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1 group2 meandiff p-adj   lower   upper  reject
-----------------------------------------------------
control      x   4.2125 0.0076  0.9478  7.4772   True
control      y      6.0 0.0001  2.7353  9.2647   True
control      z    6.825    0.0  3.5603 10.0897   True
      x      y   1.7875 0.4539 -1.4772  5.0522  False
      x      z   2.6125 0.1522 -0.6522  5.8772  False
      y      z    0.825 0.9001 -2.4397  4.0897  False
-----------------------------------------------------
