In [48]:
import ast

import numpy as np
from scipy import stats
import pandas as pd

In [14]:
df = pd.read_csv("../data/responses_pilot.csv")

In [15]:
df['history'] = df['history'].apply(ast.literal_eval)
df['choices'] = df['choices'].apply(ast.literal_eval)

In [18]:
df["selection_count"] = df["history"].apply(len)

In [35]:
def process_choices(choices):
    chosen_headlines = []
    for _, v in choices.items():
        selected = v["selected"]
        options = v["options"]
        for hed, source in options.items():
            if hed == selected:
                chosen_headlines.append(source)
    
    return chosen_headlines

In [36]:
df["preference"] = df["choices"].apply(process_choices)

In [42]:
df["proportion_generated"] = df.preference.apply(lambda x: sum([1 for y in x if y == "Generated"]) / len(x))

In [44]:
df

Unnamed: 0,timestamp,user_id,study_id,session_id,history,choices,selection_count,preference,proportion_generated
0,2024-11-19 21:47:48.721095,673a4a9eff231ea869a8c6aa,66e06eccc83287b43e9b0d65,673d0576950b89620364101a,[DeSantis Signs Law Deleting Climate Change Fr...,{'radio_0': {'selected': '#### Arizona and Mis...,32,"[Original, Generated, Generated, Generated, Ge...",0.8
1,2024-11-19 21:48:00.359001,673cc8a4e99e56825ed9e687,66e06eccc83287b43e9b0d65,673d06aa35a2fb474a25c474,[There’s a Reason Trump Has Friends in High Pl...,{'radio_0': {'selected': '#### Ukraine's Surpr...,26,"[Generated, Original, Generated, Generated, Or...",0.6
2,2024-11-19 22:06:41.447576,6737228978f333308e56b2de,66e06eccc83287b43e9b0d65,673d06b9bdc94ce60ac75aa3,[There’s a Reason Trump Has Friends in High Pl...,{'radio_0': {'selected': '#### Ukraine’s Incur...,26,"[Generated, Original, Generated, Original, Gen...",0.6
3,2024-11-19 22:30:40.067589,5f6bd25cce33e015198c2aff,66e06eccc83287b43e9b0d65,673d0f6967c6c8302aabd5a9,"[In New York City’s Subway System, There’s Bea...",{'radio_0': {'selected': '#### Ukraine's Incur...,21,"[Generated, Original, Generated, Original, Ori...",0.4
4,2024-11-19 22:36:03.320968,656f2d44dd59a40e77b977e0,66e06eccc83287b43e9b0d65,673d113b5ab9110c14a6d950,"[In New York City’s Subway System, There’s Bea...",{'radio_0': {'selected': '#### Ukraine's Bold ...,10,"[Generated, Original, Generated, Original, Ori...",0.4


In [45]:
df[["selection_count", "proportion_generated"]].corr()

Unnamed: 0,selection_count,proportion_generated
selection_count,1.0,0.869657
proportion_generated,0.869657,1.0


In [46]:
def print_pairs(choices):
    for _, v in choices.items():
        options = v["options"]
        for hed, source in options.items():
            print(f"{hed} - {source}")
    print("============")

In [47]:
df["choices"].apply(print_pairs)

#### Arizona and Missouri Will Vote on Abortion in November
**Also, Hamas said it would not participate in new truce talks. Here’s the latest at the end of Tuesday.** - Original
#### Arizona and Missouri to Vote on Abortion Measures This November
**Meanwhile, Ukraine's latest moves have shifted the war's dynamics. Here’s the latest at the end of Sunday.** - Generated
#### Harris And Trump Vie For Control In The Midwest
**Meanwhile, extreme heat puts the Great Barrier Reef at risk. Here’s the latest at the end of Monday.** - Generated
#### Harris and Trump Battled for the Midwest
**Also, rain from Debby pounded the Carolinas. Here’s the latest at the end of Wednesday.** - Original
#### Biden Approved a Secret New Nuclear Strategy
**Meanwhile, in Chicago, Obama aims to resurrect a movement. Here’s the latest at the end of Thursday.** - Generated
#### In Chicago, Obama Aims to Resurrect a Movement
**Also, Biden approved a secret new nuclear strategy. Here’s the latest at the end of Tuesda

0    None
1    None
2    None
3    None
4    None
Name: choices, dtype: object

In [49]:
def analyze_binary_choices(option_a_counts, n_responses, alpha=0.05):
    """
    Analyze binary choice data where participants choose between options A and B.
    
    Parameters:
    -----------
    option_a_counts : list or array
        Number of times option A was chosen for each question
    n_responses : int
        Total number of responses per question
    alpha : float, optional
        Significance level for statistical tests (default: 0.05)
        
    Returns:
    --------
    dict
        Dictionary containing analysis results
    """
    # Convert inputs to numpy arrays
    option_a_counts = np.array(option_a_counts)
    n_questions = len(option_a_counts)
    total_trials = n_responses * n_questions
    
    # Pooled binomial test
    total_option_a = np.sum(option_a_counts)
    pooled_result = stats.binomtest(total_option_a, total_trials, p=0.5)
    
    # Calculate confidence interval for pooled proportion
    pooled_prop = total_option_a / total_trials
    se = np.sqrt(pooled_prop * (1 - pooled_prop) / total_trials)
    ci_lower, ci_upper = stats.norm.interval(1 - alpha, loc=pooled_prop, scale=se)
    
    # Chi-square test for homogeneity
    expected = pooled_prop * n_responses
    chi2_stat = np.sum((option_a_counts - expected)**2 / expected + 
                      (n_responses - option_a_counts - (n_responses - expected))**2 / (n_responses - expected))
    chi2_p = stats.chi2.sf(chi2_stat, df=n_questions-1)
    
    # Individual binomial tests for each question
    individual_tests = []
    for i, count in enumerate(option_a_counts):
        result = stats.binomtest(count, n_responses, p=0.5)
        individual_tests.append({
            'question': i + 1,
            'option_a_count': count,
            'proportion': count / n_responses,
            'p_value': result.pvalue,
            'significant': result.pvalue < alpha
        })
    
    # Compile results
    results = {
        'pooled_analysis': {
            'total_option_a': total_option_a,
            'total_trials': total_trials,
            'pooled_proportion': pooled_prop,
            'p_value': pooled_result.pvalue,
            'significant': pooled_result.pvalue < alpha,
            'confidence_interval': (ci_lower, ci_upper)
        },
        'heterogeneity_test': {
            'chi2_statistic': chi2_stat,
            'p_value': chi2_p,
            'df': n_questions - 1,
            'significant': chi2_p < alpha
        },
        'individual_tests': individual_tests
    }
    
    return results

def print_analysis(results):
    """
    Print formatted analysis results.
    
    Parameters:
    -----------
    results : dict
        Results dictionary from analyze_binary_choices
    """
    print("\nPOOLED ANALYSIS")
    print("===============")
    p = results['pooled_analysis']
    print(f"Total Option A selections: {p['total_option_a']} out of {p['total_trials']}")
    print(f"Pooled proportion: {p['pooled_proportion']:.1%}")
    print(f"P-value: {p['p_value']:.6f}")
    print(f"Statistically significant: {p['significant']}")
    print(f"95% CI: ({p['confidence_interval'][0]:.1%} to {p['confidence_interval'][1]:.1%})")
    
    print("\nHETEROGENEITY TEST")
    print("==================")
    h = results['heterogeneity_test']
    print(f"Chi-square statistic: {h['chi2_statistic']:.3f} (df={h['df']})")
    print(f"P-value: {h['p_value']:.6f}")
    print(f"Questions significantly different: {h['significant']}")
    
    print("\nINDIVIDUAL QUESTIONS")
    print("===================")
    for test in results['individual_tests']:
        print(f"\nQuestion {test['question']}:")
        print(f"Option A selections: {test['option_a_count']}")
        print(f"Proportion: {test['proportion']:.1%}")
        print(f"P-value: {test['p_value']:.6f}")
        print(f"Significant: {test['significant']}")

In [53]:
df["count_generated"] = df.preference.apply(lambda x: sum([1 for y in x if y == "Generated"]))

In [56]:
prefs = df.preference

question_gen_counts = []
for i in range(len(prefs[0])):
    question_gen_counts.append(sum([1 for x in prefs if x[i] == "Generated"]))

In [60]:
res = analyze_binary_choices(question_gen_counts, len(prefs))

In [62]:
print_analysis(res)


POOLED ANALYSIS
Total Option A selections: 14 out of 25
Pooled proportion: 56.0%
P-value: 0.690038
Statistically significant: False
95% CI: (36.5% to 75.5%)

HETEROGENEITY TEST
Chi-square statistic: 8.766 (df=4)
P-value: 0.067216
Questions significantly different: False

INDIVIDUAL QUESTIONS

Question 1:
Option A selections: 4
Proportion: 80.0%
P-value: 0.375000
Significant: False

Question 2:
Option A selections: 1
Proportion: 20.0%
P-value: 0.375000
Significant: False

Question 3:
Option A selections: 5
Proportion: 100.0%
P-value: 0.062500
Significant: False

Question 4:
Option A selections: 2
Proportion: 40.0%
P-value: 1.000000
Significant: False

Question 5:
Option A selections: 2
Proportion: 40.0%
P-value: 1.000000
Significant: False
