In [None]:
import torch
import numpy as np


cluster_corrs = torch.load('../bootstrap_results/corrs.pt')
cluster_corrs = np.array(cluster_corrs)
random_cluster_corrs = np.array(torch.load('../bootstrap_results/corrs_random.pt'))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

fig, axes = plt.subplots(2, 2, figsize=(10, 10))
columns = ["Tactfulness", "Helpfulness", "Clearness", "Astuteness"]

hist_params = {"bins": 20, "alpha": 0.75, "color": "blue", "edgecolor": "black"}

for i in range(4):
    ax = axes[i // 2, i % 2]  # Determine the position in a 2x2 grid
    ax.hist(cluster_corrs[:, i], **hist_params)
    ax.set_title(columns[i], fontsize=28, fontweight="bold") 
    ax.set_xlabel("Correlation Values", fontsize=14)
    ax.set_ylabel("Frequency", fontsize=12)

    mean = np.mean(cluster_corrs[:, i])
    std = np.std(cluster_corrs[:, i])
    ci_lower = np.percentile(cluster_corrs[:, i], 2.5)
    ci_upper = np.percentile(cluster_corrs[:, i], 97.5)

    print(f"Column: {columns[i]}")
    print(f"Mean: {mean:.2f}")
    print(f"Std: {std:.2f}")
    print(f"95% CI: ({ci_lower:.2f}, {ci_upper:.2f})")
    print()

plt.tight_layout()
plt.savefig("distribution_plots.png", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
import numpy as np
from scipy.stats import ttest_ind
import statsmodels.stats.power as smp

for i, col in enumerate(columns):
    t_stat, p_value = ttest_ind(
        cluster_corrs[:, i], random_cluster_corrs[:, i], equal_var=False
    )

    mean1 = np.mean(cluster_corrs[:, i])
    mean2 = np.mean(random_cluster_corrs[:, i])
    std1 = np.std(cluster_corrs[:, i], ddof=1)
    std2 = np.std(random_cluster_corrs[:, i], ddof=1)

    print(col)
    print(f"Our mean: {mean1:.3f}, Random mean: {mean2:.3f}")
    print(f"t-statistic Welch t-test: {t_stat:.2f}")
    print(f"p-value Welch t-test: {p_value:.3f}")
    print()

In [None]:
import numpy as np
from scipy.stats import ttest_1samp

def nice_value(value):
    formatted_number = f"{value:.2e}"
    coefficient, exponent = formatted_number.split('e')
    exponent = int(exponent)  # Convert the exponent to an integer
    return f"{coefficient} * 10^({exponent})"

for i, col in enumerate(columns):
    t_stat, p_value = ttest_1samp(
        random_cluster_corrs[:, i], 0
    )

    print(col)
    print(f"t-statistic Welch t-test: {nice_value(t_stat)}")
    print(f"p-value Welch t-test: {nice_value(p_value)}")
    print()