In [None]:
# Import necessary libraries
import numpy as np
import scipy.stats as stats
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

# Q1: Assumptions required to use ANOVA
assumptions = """
1. **Independence:** Observations must be independent of each other.
2. **Normality:** Data within each group should be normally distributed.
3. **Homogeneity of Variance:** Variance within each group should be approximately equal.
"""

print("Q1: Assumptions required to use ANOVA\n", assumptions)

# Q2: Types of ANOVA
types_of_anova = """
1. **One-Way ANOVA:** Comparing means of three or more independent groups based on one factor.
2. **Two-Way ANOVA:** Comparing means across two factors and analyzing their interaction.
3. **Repeated Measures ANOVA:** Used when the same subjects are measured multiple times under different conditions.
"""

print("Q2: Types of ANOVA\n", types_of_anova)

# Q3: Partitioning of Variance in ANOVA
partitioning_variance = """
In ANOVA, the total variance is partitioned into:
1. **Between-group variance (Explained Variance):** Due to differences between group means.
2. **Within-group variance (Unexplained Variance):** Due to differences within the groups.
"""

print("Q3: Partitioning of Variance in ANOVA\n", partitioning_variance)

# Q4: Calculate SST, SSE, and SSR in one-way ANOVA
data = {
    'Group1': [10, 12, 13, 15],
    'Group2': [22, 23, 25, 27],
    'Group3': [30, 31, 35, 37]
}

# Flatten the data
all_data = [item for sublist in data.values() for item in sublist]

# Overall mean
overall_mean = np.mean(all_data)

# Calculate SST, SSE, and SSR
sst = sum((x - overall_mean) ** 2 for x in all_data)
sse = sum(len(data[group]) * (np.mean(data[group]) - overall_mean) ** 2 for group in data)
ssr = sum(sum((x - np.mean(data[group])) ** 2 for x in data[group]) for group in data)

print(f"Q4: Total Sum of Squares (SST): {sst}")
print(f"Q4: Explained Sum of Squares (SSE): {sse}")
print(f"Q4: Residual Sum of Squares (SSR): {ssr}")

# Q5: Two-Way ANOVA main effects and interaction effects calculation
# Sample data
df = pd.DataFrame({
    'Group': np.repeat(['A', 'B', 'C'], 10),
    'Experience': np.tile(['Novice', 'Experienced'], 15),
    'Time': [22, 24, 23, 21, 20, 19, 25, 26, 27, 28, 15, 16, 14, 13, 12, 11, 17, 18, 19, 20, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35]
})

# Two-way ANOVA using statsmodels
model = ols('Time ~ C(Group) + C(Experience) + C(Group):C(Experience)', data=df).fit()
anova_table = anova_lm(model, typ=2)

print("\nQ5: Two-Way ANOVA results:\n", anova_table)

# Interpret F-statistic and p-value
f_stat = 5.23
p_value = 0.02
interpretation = """
Q6: Interpretation of F-statistic and p-value:
- An F-statistic of 5.23 with a p-value of 0.02 indicates that there are significant differences between the groups at the 0.05 significance level.
"""

print(interpretation)
