In [None]:
import math
import random
from scipy.stats import shapiro, f_oneway
import numpy as np
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import MultiComparison, pairwise_tukeyhsd
from statsmodels.stats.multitest import multipletests
import pandas as pd
import json

<h3>Collect means and standard deviations</h3>

In [None]:
n = int(input("Enter the number of averages needed: "))
people = 0
averages = []

for i in range(n):
    data = []
    mean = int(input("Enter the mean: "))
    data.append(mean)
    stddev = int(input("Enter the standard deviation: "))
    data.append(stddev)
    size = int(input("Enter the data size: "))
    data.append(size)
    people += size
    min_value = int(input("Enter the minimum value: "))
    data.append(min_value)
    max_value = int(input("Enter the maximum value: "))
    data.append(max_value)

    averages.append(data)
    print(mean, stddev, size, min_value, max_value)
    print(people)

<h3># Generate distribution</h3>

In [None]:
def distribution(mean, stddev, size, min_value, max_value):
    rands = []
    while len(rands) < size:
        random0 = random.uniform(0, 1)
        random1 = random.uniform(0, 1)

        z0 = math.sqrt(-2 * math.log(random0)) * math.cos(2 * math.pi * random1)
        value = mean + z0 * stddev
        rounded_value = round(value)

        if rounded_value < min_value:
            rounded_value = min_value
        if max_value is not None and rounded_value > max_value:
            rounded_value = max_value

        rands.append(rounded_value)
    return rands


<h3>Collecting the data </h3>

In [None]:
data_to_export = []
alpha = 0.05

for i in range(n):
    normal_distribution = distribution(averages[i][0], averages[i][1], averages[i][2], averages[i][3], averages[i][4])
    stat, p = shapiro(normal_distribution)
    if p > alpha:
        data_to_export.append(normal_distribution)

sum_data_to_export = sum(data_to_export, [])


<h3>Collecting additional variables </h3>

In [None]:
n = int(input("Enter the number of additional variables needed: "))
group_variables = []

for _ in range(n):
    variables = int(input("Enter the number of variable levels, e.g., 2 for gender: "))
    repetition = int(input("Enter how many times the set of digits should repeat: "))
    group_name = input("Enter the name of the variable group, e.g., gender: ")
    values = []

    for _ in range(variables):
        name = input("Enter the name of the variable: ")
        x = int(input(f'Enter the number of {name}: '))
        digit = int(input(f'Enter the digit representing {name}: '))

        if repetition == 0:
            for _ in range(x):
                values.append(digit)
        else:
            z = x // repetition
            for _ in range(z):
                values.append(digit)

    group_variables.append(values)

group_variables = sum(group_variables, [])
print(group_variables)


<h3>ANOVA</h3>

In [None]:
statistic, pvalue = f_oneway(data_to_export[0], data_to_export[1], data_to_export[2])
print(pvalue)
print(statistic)
statistic = round(statistic, 3)

if pvalue < 0.001:
    pvalue = '<0.001'
elif pvalue == 0.001:
    pvalue = '=0.001'
else:
    pvalue = round(pvalue, 3)
    pvalue = f'={pvalue}'

print(f'pvalue: {pvalue}')
print(f'F: {statistic}')

In [None]:
#degrees of freedom
df = len(data_to_export) - 1
print(df)
df_between = people - len(data_to_export)
print(df_between)
df_within = people - 1
print(df_within)

In [None]:
# SS between and total
mean = np.mean(sum_data_to_export)
print(mean)

group_means = [np.mean(group) for group in data_to_export]
group_sizes = [len(group) for group in data_to_export]

SSb = sum(
    group_sizes[i] * (group_means[i] - mean) ** 2 for i in range(len(group_means))
)
SSb = round(SSb, 3)
print("Sum of squares between (SS_B):", SSb)

SSt = sum((x - mean) ** 2 for x in sum_data_to_export)
SSt = round(SSt, 3)
print("Total sum of squares (SSt):", SSt)


In [None]:
#eta-squared
eta2 = round(SSb / SSt, 3)
print("Eta-squared (η²):", eta2)

if eta2 < 0.06:
    interpretation = "small"
elif eta2 < 0.14:
    interpretation = "medium"
else:
    interpretation = "large"


In [None]:
#post-hoc = sidak
mc = MultiComparison(sum_data_to_export, group_variables)
result = mc.tukeyhsd()
print(result)
p_values = result.pvalues
sidak_adjusted = multipletests(p_values, method='sidak')[1]

formatted_sidak_adjusted = []
for p in sidak_adjusted:
    if p < 0.001:
        formatted_sidak_adjusted.append('<0.001')
    elif p == 0.001:
        formatted_sidak_adjusted.append('=0.001')
    else:
        formatted_sidak_adjusted.append(round(p, 3))

print(f'Post hoc (Sidak): group 1-2: {formatted_sidak_adjusted[0]}, group 1-3: {formatted_sidak_adjusted[1]}, group 2-3: {formatted_sidak_adjusted[2]}')


<h3>Export</h3>

In [None]:
excel_data = [group_variables, sum_data_to_export]
print(excel_data)

df = pd.DataFrame(excel_data).T
file_name = input("Name the database file: ")
print(file_name)
output_path = f'/content/{file_name}.xlsx'
df.to_excel(output_path, index=False, header=False)