# Statistical Analysis

### Includes Normality test (Shapiro-Wilk), Paired t-test (or) Wilcoxon signed rank, and Cohen's d.

In [3]:
import numpy as np 
import matplotlib.pyplot as plt 
import scipy.stats as stats
from scipy.stats import shapiro, ttest_rel, wilcoxon, norm

In [4]:
#Analyze multiple metrics and plot together
def analyze_metrics(metrics):
    fig, axes = plt.subplots(1, len(metrics), figsize=(5 * len(metrics), 5))

    if len(metrics) == 1:
        axes = [axes]  

    for i, (before, after, label) in enumerate(metrics):
        diff = after - before

        #Normality test
        stat, p_shapiro = shapiro(diff)
        print(f"\n[{label}] Shapiro-Wilk test p = {p_shapiro:.4f}")

        if p_shapiro > 0.05:
            stat, p_value = ttest_rel(after, before)
            test_name = "Paired t-test"
        else:
            stat, p_value = wilcoxon(after, before)
            test_name = "Wilcoxon signed-rank test"

        print(f"[{label}] {test_name} p = {p_value:.4f}")

    
        sns.histplot(diff, kde=True, ax=axes[i], stat="density", color='skyblue', edgecolor='black')

        #Add normal curve
        mu = np.mean(diff)
        sigma = np.std(diff)
        x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
        axes[i].plot(x, norm.pdf(x, mu, sigma), 'r--', label='Normal Curve')

        axes[i].axvline(mu, color='darkred', linestyle='-', label=f'Mean = {mu:.2f}')

        axes[i].set_title(f"{label}", fontsize=16)
        axes[i].set_xlabel("Difference (After - Before)", fontsize=14)
        axes[i].set_ylabel("Density", fontsize=14)
        axes[i].legend()

    plt.tight_layout()
    plt.show()


#Cohen's d
def cohen_d_paired(before, after):
    diff = after - before
    return np.mean(diff) / np.std(diff, ddof=1)

#### Normality test (indepependent)

In [5]:
def check_normal_distribution(values, label=""):
    values = np.array(values)
    
    # Shapiro-Wilk Test
    shapiro_stat, shapiro_p = stats.shapiro(values)

    # Plot Histogram
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.hist(values, bins=20, alpha=0.7, edgecolor='black')
    plt.title(f'{label} Histogram')
    
    # Plot Q-Q plot
    plt.subplot(1, 2, 2)
    stats.probplot(values, dist="norm", plot=plt)
    plt.title(f'{label} Q-Q Plot')

    plt.tight_layout()
    plt.show()

    return {
        f"{label}_Shapiro_p_value": shapiro_p,
        f"{label}_Normal_(Shapiro)": shapiro_p > 0.05,
    }