# Power Analysis for ANOVA

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import f_oneway
from numpy.random import multivariate_normal
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Simulation parameters
np.random.seed(42)  # for reproducibility
n_groups = 3  # Number of groups
n_samples = 50  # Number of samples per group
n_simulations = 1000  # Number of simulations
alpha = 0.05  # Significance level

# Define the means and covariance matrix for the groups
means = [0, 1, 2]  # Mean for each group
cov_matrix = np.array([[1, 0.5, 0.2],
                       [0.5, 1, 0.3],
                       [0.2, 0.3, 1]])  # Covariance matrix

# Function to simulate data
def simulate_data(means, cov_matrix, n_samples, n_groups):
    data = []
    group_labels = []
    for i in range(n_groups):
        group_data = multivariate_normal(mean=[means[i]]*n_samples, cov=cov_matrix, size=n_samples).T
        data.append(group_data)
        group_labels += [i] * n_samples
    return np.concatenate(data, axis=0), group_labels

# Function to perform ANOVA test
def run_anova(data, labels):
    df = pd.DataFrame({"data": data, "group": labels})
    model = ols('data ~ C(group)', data=df).fit()
    anova_table = sm.stats.anova_lm(model, typ=2)
    p_value = anova_table['PR(>F)'][0]
    return p_value

# Function to conduct power analysis
def power_analysis(n_simulations, means, cov_matrix, n_samples, n_groups, alpha):
    false_negative_count = 0
    for _ in range(n_simulations):
        data, labels = simulate_data(means, cov_matrix, n_samples, n_groups)
        p_value = run_anova(data, labels)
        
        # If there is a difference in group means but ANOVA doesn't detect it, increase false negatives
        if p_value > alpha:
            false_negative_count += 1
    
    power = false_negative_count / n_simulations
    return power

# Run the simulation and power analysis
power = power_analysis(n_simulations, means, cov_matrix, n_samples, n_groups, alpha)
print(f"Power (Percentage of times ANOVA didn't detect a significant difference): {power*100:.2f}%")


ValueError: mean and cov must have same length