In [None]:
import pandas as pd

# Load the CSV file
file_path = "data.csv"
df = pd.read_csv(file_path)

# Display column names
print(df.columns)


In [2]:
import pandas as pd
import scipy.stats as stats
import numpy as np

# Load the CSV file
file_path = "data.csv"
df = pd.read_csv(file_path)

# Define chance level
chance_level = 1 / 6  # Since there are 6 emotions

# Function to compute Cohen's d
def cohens_d(sample, pop_mean):
    mean_diff = np.mean(sample) - pop_mean
    pooled_std = np.std(sample, ddof=1)  # Sample standard deviation (unbiased)
    return mean_diff / pooled_std if pooled_std > 0 else np.nan

# Compute t-tests and Cohen's d for each emotion
t_test_results = {}
for emotion, group in df.groupby('Emotion'):
    t_stat, p_value = stats.ttest_1samp(group['Correct'], chance_level)
    effect_size = cohens_d(group['Correct'], chance_level)
    t_test_results[emotion] = {
        't_stat': t_stat,
        'p_value': p_value,
        'mean_accuracy': group['Correct'].mean(),
        'cohens_d': effect_size
    }

# Print t-test results for each emotion
print("T-test results (accuracy vs. chance level) per emotion:")
for emotion, result in t_test_results.items():
    print(f"{emotion}: t={result['t_stat']:.3f}, p={result['p_value']:.3f}, "
          f"mean={result['mean_accuracy']:.3f}, d={result['cohens_d']:.3f}")

# Overall t-test collapsed across emotions
t_stat_overall, p_value_overall = stats.ttest_1samp(df['Correct'], chance_level)
effect_size_overall = cohens_d(df['Correct'], chance_level)

print("\nT-test collapsed across emotions:")
print(f"t={t_stat_overall:.3f}, p={p_value_overall:.3f}, mean accuracy={df['Correct'].mean():.3f}, d={effect_size_overall:.3f}")

# F-test: ANOVA to check if accuracy differs significantly across emotions
anova_result = stats.f_oneway(*[group['Correct'] for _, group in df.groupby('Emotion')])

# Compute eta-squared effect size for ANOVA
ss_between = sum(len(group) * (group['Correct'].mean() - df['Correct'].mean())**2 for _, group in df.groupby('Emotion'))
ss_total = sum((df['Correct'] - df['Correct'].mean())**2)
eta_squared = ss_between / ss_total if ss_total > 0 else np.nan

print("\nF-test (ANOVA) results:")
print(f"F={anova_result.statistic:.3f}, p={anova_result.pvalue:.3f}, η²={eta_squared:.3f}")


T-test results (accuracy vs. chance level) per emotion:
Anger: t=15.242, p=0.000, mean=0.313, d=0.315
Disgust: t=-6.766, p=0.000, mean=0.121, d=-0.141
Fear: t=14.318, p=0.000, mean=0.302, d=0.294
Joy: t=-9.142, p=0.000, mean=0.107, d=-0.195
Neutral: t=25.375, p=0.000, mean=0.426, d=0.524
Sadness: t=7.330, p=0.000, mean=0.231, d=0.153

T-test collapsed across emotions:
t=23.071, p=0.000, mean accuracy=0.252, d=0.196

F-test (ANOVA) results:
F=196.415, p=0.000, η²=0.066
