In [20]:
import pandas as pd
import sqlite3
import io
from scipy.stats import chi2_contingency, binomtest

# --- 1. Data Loading and Preparation ---

# Create an in-memory SQLite database from the image data
db_connection = sqlite3.connect('validation-1.db')
db_cursor = db_connection.cursor()

# Load data from the database into a DataFrame
df_db = pd.read_sql_query("SELECT participant_number, condition, conversation_index FROM participants", db_connection)
db_connection.close()


df_csv = pd.read_csv('validation-1.csv')

# --- 2. Data Merging and Cleaning ---

# Rename columns for clarity and merging
df_csv.rename(columns={
    "Participant Id": "participant_number",
    "Which of the three roles did Forty portray?": "guessed_role"
}, inplace=True)

# Select only the columns needed for the analysis
df_db_subset = df_db[['participant_number', 'condition', 'conversation_index']]
df_csv_subset = df_csv[['participant_number', 'guessed_role']]

# Standardize the 'guessed_role' data to match 'condition' data (lowercase, remove extra words)
df_csv_subset['guessed_role'] = df_csv_subset['guessed_role'].str.lower().str.replace(' role', '')

# Merge the two dataframes on the participant number
merged_data = pd.merge(df_db_subset, df_csv_subset, on="participant_number")

print("--- Merged Data for Analysis ---")
print(merged_data)
print("\\n" + "="*30 + "\\n")


# --- 3. Statistical Analysis ---

# Check if there's enough data to analyze
if len(merged_data) < 2:
    print("Insufficient matched data to perform meaningful statistical analysis.")
else:
    # Build a contingency table
    table = pd.crosstab(merged_data["condition"], merged_data["guessed_role"])
    print("--- Contingency Table ---")
    print(table)
    print("\\n")

    # Chi-square test
    chi2, p, dof, expected = chi2_contingency(table)
    print(f"Chi² = {chi2:.3f}, p = {p:.5f}")

    if p < 0.05:
        print("→ Significant difference (95% confidence that people can tell which bot is which)")
    if p < 0.01:
        print("→ Significant difference (99% confidence that people can tell which bot is which)")
            
    from statsmodels.stats.proportion import proportion_confint

    p_hat = correct / n

    # 95% confidence interval (two-sided)
    ci_low, ci_high = proportion_confint(count=correct, nobs=n, alpha=0.05, method='wilson')
    print(f"Observed accuracy = {p_hat:.3f}")
    print(f"95% CI: [{ci_low:.3f}, {ci_high:.3f}]")

    # 99% confidence interval
    ci_low_99, ci_high_99 = proportion_confint(count=correct, nobs=n, alpha=0.01, method='wilson')
    print(f"99% CI: [{ci_low_99:.3f}, {ci_high_99:.3f}]")

--- Merged Data for Analysis ---
    participant_number     condition  conversation_index  guessed_role
0                    1    supportive                   1    supportive
1                    2  refutational                   1  refutational
2                    3    prebunking                   1    prebunking
3                    4    supportive                   2    supportive
4                    5  refutational                   2  refutational
5                    7    supportive                   3  refutational
6                    9    prebunking                   3    prebunking
7                   10    supportive                   1  refutational
8                   11  refutational                   1  refutational
9                   12    prebunking                   1    prebunking
10                  14  refutational                   2  refutational
11                  15    prebunking                   2  refutational
12                  16    supportive        

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_csv_subset['guessed_role'] = df_csv_subset['guessed_role'].str.lower().str.replace(' role', '')


In [22]:
# --- 3. Run Analysis for Each Conversation Index ---

# Get the unique conversation indices from the merged data
conversation_indices = sorted(merged_data['conversation_index'].unique())

for index in conversation_indices:
    print(f"--- Analysis for Conversation Index: {index} ---")
    
    # Filter the data for the current conversation index
    subset_data = merged_data[merged_data['conversation_index'] == index]
    
    # Check if there's enough data to analyze
    if len(subset_data) < 2:
        print("Insufficient matched data to perform meaningful statistical analysis for this index.\n")
        continue

    # --- Chi-Squared Test ---
    # Build a contingency table
    table = pd.crosstab(subset_data["condition"], subset_data["guessed_role"])
    print("Contingency Table:")
    print(table)
    print("")

    # Perform the test
    chi2, p, dof, expected = chi2_contingency(table)
    print(f"Chi² = {chi2:.3f}, p = {p:.5f}")

    if p < 0.05:
        print("→ Significant difference (95% confidence that people can tell which bot is which)")
    if p < 0.01:
        print("→ Significant difference (99% confidence that people can tell which bot is which)")
    print("-" * 20)

    # --- Accuracy and Confidence Intervals ---
    correct = (subset_data["condition"] == subset_data["guessed_role"]).sum()
    n = len(subset_data)
    
    if n > 0:
        p_hat = correct / n
        
        # 95% confidence interval (two-sided)
        ci_low_95, ci_high_95 = proportion_confint(count=correct, nobs=n, alpha=0.05, method='wilson')
        print(f"Correct Guesses: {correct}/{n}")
        print(f"Observed Accuracy = {p_hat:.3f}")
        print(f"95% CI: [{ci_low_95:.3f}, {ci_high_95:.3f}]")

        # 99% confidence interval
        ci_low_99, ci_high_99 = proportion_confint(count=correct, nobs=n, alpha=0.01, method='wilson')
        print(f"99% CI: [{ci_low_99:.3f}, {ci_high_99:.3f}]")
    else:
        print("No data for accuracy analysis.")
        
    print("\n" + "="*40 + "\n")

--- Analysis for Conversation Index: 1 ---
Contingency Table:
guessed_role  prebunking  refutational  supportive
condition                                         
prebunking             4             1           1
refutational           0             6           0
supportive             0             2           3

Chi² = 15.851, p = 0.00323
→ Significant difference (95% confidence that people can tell which bot is which)
→ Significant difference (99% confidence that people can tell which bot is which)
--------------------
Correct Guesses: 13/17
Observed Accuracy = 0.765
95% CI: [0.527, 0.904]
99% CI: [0.454, 0.927]


--- Analysis for Conversation Index: 2 ---
Contingency Table:
guessed_role  prebunking  refutational  supportive
condition                                         
prebunking             3             1           1
refutational           1             5           0
supportive             1             2           1

Chi² = 5.088, p = 0.27844
--------------------
Correct 