In [18]:
# Import required libraries
import pandas as pd
from scipy.stats import mannwhitneyu, chisquare, fisher_exact
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
import numpy as np

In [19]:
# Fetching the main analysis CSV file from GitHub
url = "https://raw.githubusercontent.com/AustinHendersonEconomist/CodeExamples/main/TDIE_data_main.csv"
response = requests.get(url)
df = pd.read_csv(io.StringIO(response.text))

In [20]:
# 1. Chi square-test
obs1 = [14, 6, 15, 20, 30, 33]
obs2 = [11, 12, 27, 23, 27, 25]
chisq1, p1 = chisquare(obs1)
chisq2, p2 = chisquare(obs2)
print(f"Chi-squared test for obs1: chisq={round(chisq1, 2)}, p={round(p1, 2)}")
print(f"Chi-squared test for obs2: chisq={round(chisq2, 2)}, p={round(p2, 2)}")

Chi-squared test for obs1: chisq=26.71, p=0.0
Chi-squared test for obs2: chisq=13.1, p=0.02


In [21]:
# 2. Mann-Whitney U-test
grouped_dieroll = [df['dieroll'][df['treatment'] == treatment] for treatment in df['treatment'].unique()]
U_stat, p_value = mannwhitneyu(*grouped_dieroll)
print(f"Mann-Whitney U-test for dieroll by treatment: U={U_stat}, p={p_value}")

Mann-Whitney U-test for dieroll by treatment: U=6473.5, p=0.11611136702850341


In [22]:
# 3. Fisher's exact test
oddsratio, p_value = fisher_exact([[32, 84], [25, 100]])
print(f"Fisher's exact test: Odds Ratio={oddsratio}, p={p_value}")

Fisher's exact test: Odds Ratio=1.5238095238095237, p=0.17554485483474835


In [23]:
# 4. Effect size
n1 = len(grouped_dieroll[0])
n2 = len(grouped_dieroll[1])
effect_size = U_stat / np.sqrt(n1 * n2)
print(f"Effect Size: {effect_size}")

Effect Size: 53.52925695754939


In [30]:
# Fetching the meta-analysis .csv from GitHub
url2 = "https://raw.githubusercontent.com/AustinHendersonEconomist/CodeExamples/main/diemeta.csv"
response2 = requests.get(url2)
df2 = pd.read_csv(io.StringIO(response2.text))

# Computing Cohen's d for each study
df2['cohen_d'] = (df2['mean1'] - df2['mean2']) / np.sqrt((df2['sd1']**2 + df2['sd2']**2) / 2)

# Computing the weight for each study in fixed-effects meta-analysis
df2['weight'] = 1 / ((df2['sd1']**2/df2['n1'] + df2['sd2']**2/df2['n2']) / 2)

# Computing the pooled Cohen's d
pooled_cohen_d = round(np.sum(df2['cohen_d'] * df2['weight']) / np.sum(df2['weight']), 2)

# Computing the standard error of the pooled Cohen's d
se_pooled_cohen_d = round(np.sqrt(1 / np.sum(df2['weight'])), 2)

print(f'Pooled Cohen\'s d: {pooled_cohen_d}')
print(f'Standard error: {se_pooled_cohen_d}')

# 95% Confidence interval
ci_low = round(pooled_cohen_d - 1.96 * se_pooled_cohen_d, 2)
ci_high = round(pooled_cohen_d + 1.96 * se_pooled_cohen_d, 2)

print(f'95% CI: ({ci_low}, {ci_high})')


Pooled Cohen's d: -0.28
Standard error: 0.12
95% CI: (-0.52, -0.04)
