In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from scipy.stats import shapiro

## Normality of Residuals

In [None]:
# Generate normal data for three groups
np.random.seed(42)
group1 = np.random.normal(loc=5, scale=1, size=30)
group2 = np.random.normal(loc=5.5, scale=1, size=30)
group3 = np.random.normal(loc=6, scale=1, size=30)

# Combine data into a DataFrame
data = pd.DataFrame({
    'value': np.concatenate([group1, group2, group3]),
    'group': ['Group 1'] * 30 + ['Group 2'] * 30 + ['Group 3'] * 30
})

In [None]:
# Fit an ANOVA model
model = sm.OLS.from_formula('value ~ group', data).fit()
residuals = model.resid

In [None]:
# Q-Q plot
sm.qqplot(residuals, line='s')
plt.title('Q-Q Plot of Residuals (Normal Data)')
plt.savefig('qqplot_good.jpg')
plt.show()

In [None]:
# Shapiro-Wilk test for normality
stat, p_value = shapiro(residuals)
print(f"Shapiro-Wilk Test: p-value = {p_value}")

### Negative Exampe - Normality of Residuals

In [None]:
# Generate non-normal data for three groups
np.random.seed(42)
group1 = np.random.exponential(scale=1, size=30)
group2 = np.random.exponential(scale=1.5, size=30)
group3 = np.random.exponential(scale=2, size=30)

# Combine data into a DataFrame
data = pd.DataFrame({
    'value': np.concatenate([group1, group2, group3]),
    'group': ['Group 1'] * 30 + ['Group 2'] * 30 + ['Group 3'] * 30
})

# Fit an ANOVA model
model = sm.OLS.from_formula('value ~ group', data).fit()
residuals = model.resid

# Q-Q plot
sm.qqplot(residuals, line='s')
plt.title('Q-Q Plot of Residuals (Non-normal Data)')
plt.savefig('qqplot_bad.svg')
plt.show()

# Shapiro-Wilk test for normality
stat, p_value = shapiro(residuals)
print(f"Shapiro-Wilk Test: p-value = {p_value}")

## Homogeneity of Variances

In [None]:
# Fitted values
fitted_values = model.fittedvalues

# Residual plot
plt.scatter(fitted_values, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('Residuals vs Fitted Values (Homoscedasticity Data)')
plt.xlabel('Fitted Values')
plt.ylabel('Residuals')
plt.ylim(-7, 7)
plt.savefig('residual_plot_good.jpg')
plt.show()

### Negative Example - Homogenity of Variances

In [None]:
# Generate data with different variances for three groups
np.random.seed(42)
group1 = np.random.normal(loc=5, scale=1, size=30)
group2 = np.random.normal(loc=5.5, scale=2, size=30)
group3 = np.random.normal(loc=6, scale=3, size=30)

# Combine data into a DataFrame
data = pd.DataFrame({
    'value': np.concatenate([group1, group2, group3]),
    'group': ['Group 1'] * 30 + ['Group 2'] * 30 + ['Group 3'] * 30
})

# Fit an ANOVA model
model = sm.OLS.from_formula('value ~ group', data).fit()
residuals = model.resid

# Residual plot
fitted_values = model.fittedvalues
plt.scatter(fitted_values, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title('Residuals vs Fitted Values (Heteroscedastic Data)')
plt.xlabel('Fitted Values')
plt.ylabel('Residuals')
plt.ylim(-7, 7)
plt.savefig('residual_plot_bad.svg')
plt.show()

## Independence of Observations

In [None]:
# Residuals ordered by observation
plt.plot(residuals, marker='o', linestyle='--')
plt.title('Residuals by Observation Order (Independent Data)')
plt.xlabel('Observation Order')
plt.ylabel('Residuals')
plt.savefig('residual_plot_order_good.jpg')
plt.show()

### Negative Example - Independence of Obervations

In [None]:
# Generate data with a temporal pattern
np.random.seed(42)
temporal_data = np.cumsum(np.random.normal(loc=0, scale=1, size=90))
groups = ['Group 1'] * 30 + ['Group 2'] * 30 + ['Group 3'] * 30

# Combine data into a DataFrame
data = pd.DataFrame({
    'value': temporal_data,
    'group': groups
})

# Fit an ANOVA model
model = sm.OLS.from_formula('value ~ group', data).fit()
residuals = model.resid

# Residuals ordered by observation
plt.plot(residuals, marker='o', linestyle='--')
plt.title('Residuals by Observation Order (Temporal Dependence)')
plt.xlabel('Observation Order')
plt.ylabel('Residuals')
plt.savefig('residual_plot_order_bad.svg')
plt.show()