## Survey Questionnaries

In [24]:
import numpy as np, pandas as pd, json, re
import nltk
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import chi2_contingency

## Load summaries

In [7]:
annotated_dir = "./data/summaries"

annotated_filenames = ["all_apps_codr.csv", 
                    "all_apps_cod.csv",
                      "all_apps_vanilla.csv"]

codr_df = pd.read_csv(f"{annotated_dir}/{annotated_filenames[0]}") 
cod_df = pd.read_csv(f"{annotated_dir}/{annotated_filenames[1]}") 

codr_df.head(3)

Unnamed: 0,All_Entities,Denser_Summary,Iteration turn,Missing_Entities,annotation_id,annotator,app,created_at,id,label,lead_time,prompt,updated_at
0,expensive subscription;photo moderation;fake p...,"This application, known as Bumble, has been th...",1,expensive subscription;photo moderation;fake p...,64,1,bumble,2024-02-16T20:45:41.623686Z,49,"[{""start"":259,""end"":302,""text"":""cost associate...",31.235,CoD,2024-02-16T21:47:18.070438Z
1,expensive subscription;photo moderation;fake p...,"Bumble's users report mixed experiences, highl...",2,gender options;limited swipes;customer service,65,1,bumble,2024-02-16T20:46:16.084118Z,50,"[{""start"":76,""end"":95,""text"":""costly subscript...",32.636,CoD,2024-02-16T20:46:16.084165Z
2,expensive subscription;photo moderation;fake p...,Bumble's user experience is marred by an expen...,3,match algorithm;notification spam,66,1,bumble,2024-02-16T20:46:54.080791Z,51,"[{""start"":51,""end"":67,""text"":""subscription fee...",69.752,CoD,2024-03-13T16:44:55.441121Z


In [8]:
cod_df.head(3)

Unnamed: 0,All_Entities,Denser_Summary,Iteration turn,Missing_Entities,annotation_id,annotator,answer,app,created_at,id,label,lead_time,prompt,updated_at
0,safety concerns;photo verification issues;expe...,The app in question seems to have a variety of...,1,safety concerns;photo verification issues;expe...,513,1,,bumble,2024-02-17T03:37:20.162024Z,461,"[{""start"":388,""end"":414,""text"":""photo verifica...",646.987,CoD,2024-02-18T22:19:55.902805Z
1,safety concerns;photo verification issues;expe...,Users are encountering safety concerns and exp...,2,limited swipes;fake profiles;gender options,514,1,,bumble,2024-02-17T03:37:49.228535Z,462,"[{""start"":23,""end"":38,""text"":""safety concerns""...",90.938,CoD,2024-02-18T22:22:07.638895Z
2,safety concerns;photo verification issues;expe...,The app's user base reports persistent safety ...,3,unresponsive matches;unclear guidelines;non-bi...,515,1,,bumble,2024-02-17T03:38:43.556031Z,463,"[{""start"":39,""end"":54,""text"":""safety concerns""...",188.106,CoD,2024-02-18T22:24:27.770025Z


In [10]:
vanilla_df = pd.read_csv(f"{annotated_dir}/{annotated_filenames[2]}") 
vanilla_df.head(3)

Unnamed: 0,annotation_id,annotator,apps,created_at,id,label,lead_time,summary,updated_at
0,497,1,Uber,2024-02-13T22:23:03.865786Z,453,"[{""start"":105,""end"":119,""text"":""app's glitches...",131.809,Users report mixed experiences with Uber's app...,2024-02-19T01:17:30.118175Z
1,498,1,Lyft,2024-02-13T22:23:48.216807Z,454,"[{""start"":191,""end"":206,""text"":""dynamic pricin...",109.652,The Lyft app receives mixed reviews. Some user...,2024-02-19T01:18:37.819951Z
2,499,1,Tinder,2024-02-13T22:24:28.396151Z,455,"[{""start"":72,""end"":98,""text"":""banned without e...",110.942,"Many users express frustration with Tinder, ci...",2024-02-19T01:20:01.240143Z


## Experiment: Compare CoD, CoDr and Vanilla

In [17]:
# 0: CoDr: informative question number is bold
# 1: CoD: informative question number is italic
# 2: Vanilla: information question number is nothing

copies_order = [
    [0, 1, 2], 
    [1, 0, 2], 
    [0, 2, 1], 
    [1, 2, 0] 
]

readability_q = """
2. How readable is the summary?
a) Unreadable \tb) Somewhat readable \tc) Readable \td) Easy to read
"""  

def create_survey_form(summ_df):
    
    # 4 forms for each app
    for copy_itr, itr_order in enumerate(copies_order): 
        print("copy: ", copy_itr, "\norder:", itr_order)

        copy_content = "App: {{app}}"
        copy_content = re.sub("{{app}}", app.capitalize(), copy_content)
    
        for i in itr_order:
            # col = "Denser_Summary" if i in [0, 1] else "summary" [2] is vanilla
            col = "Denser_Summary"
            summ = summ_df[i][col].tolist()[0]
            print("\n summ type: ", i)
            print("summary: ", summ)

            # 
            copy_content += """
{{itr}}{{summary}}
"""
            copy_content = re.sub("{{summary}}", summ, copy_content)
            copy_content = re.sub("{{itr}}", f"[{str(i)}] ", copy_content) # actual itr of the summary
            copy_content += readability_q

        # output_file = f"./data/survey_{app}_{str(copy_itr+1)}.txt"
        
        # with open(output_file, 'w') as file:
        #     file.write(copy_content)
        #     print("saved to ", output_file)
            
        print("\n\n------form------\n")
        print(copy_content)

In [None]:
apps = codr_df["app"].unique().tolist()    

itr_of_interst = 3

for app in apps:
    app_df = []
    for itr in range(3, 6):
        app_df.append(codr_df[(codr_df["app"] == app) & (codr_df["Iteration turn"] == itr)])
    create_survey_form(app_df)

## Survey results: 
- parse results from survey forms into a contingency table
- run a Chi-square test to determine if there is a significant association between summary method (iteration) and readability levels

In [25]:
cols = ["App", "3rd", "4th", "5th"]

df = pd.read_csv("./data/survey/results.csv")
df= df.dropna(how='all')[cols]
print("len: ", len(df))

df

len:  32


Unnamed: 0,App,3rd,4th,5th
0,calm,SOMEWHAT READABLE,READABLE,READABLE
1,calm,READABLE,SOMEWHAT READABLE,SOMEWHAT READABLE
2,calm,SOMEWHAT READABLE,EASY TO READ,READABLE
3,calm,SOMEWHAT READABLE,SOMEWHAT READABLE,EASY TO READ
4,headspace,UNREADABLE,SOMEWHAT READABLE,SOMEWHAT READABLE
5,headspace,SOMEWHAT READABLE,READABLE,EASY TO READ
6,headspace,EASY TO READ,READABLE,EASY TO READ
7,headspace,READABLE,READABLE,EASY TO READ
8,tinder,EASY TO READ,READABLE,EASY TO READ
9,tinder,READABLE,READABLE,READABLE


In [26]:
from collections import Counter

methods = ["3rd", "4th", "5th"]
results_df = None

question_method_counts = []
for method in methods:
    col_values = df[method].tolist()
    col_counter = Counter(col_values)
    col_counter["method"] = method
    question_method_counts.append(col_counter)
    
 # Convert each Counter object to a dictionary
list_of_dicts = [dict(counter) for counter in question_method_counts]

# Convert list of dictionaries to DataFrame
results_df = pd.DataFrame.from_dict(list_of_dicts)
results_df.fillna(0, inplace=True)

# Reorder columns with col6 in the first position
first_column = 'method'
results_df = results_df[[first_column] + [col for col in results_df.columns if col != first_column]]
results_df.columns = [col.capitalize() for col in results_df.columns]
results_df.set_index('Method', inplace=True)
results_df   

Unnamed: 0_level_0,Somewhat readable,Readable,Unreadable,Easy to read
Method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3rd,11,12,1.0,8
4th,11,14,0.0,7
5th,5,15,2.0,10


In [30]:
# Perform the chi-square test
chi2, p, dof, expected = chi2_contingency(results_df)

print("Chi-square statistic:", chi2)
print("P-value:", p)
print("Degrees of freedom:", dof)
print("Expected frequencies table:\n", expected)

Chi-square statistic: 5.568130081300813
P-value: 0.4732608053708597
Degrees of freedom: 6
Expected frequencies table:
 [[ 9.         13.66666667  1.          8.33333333]
 [ 9.         13.66666667  1.          8.33333333]
 [ 9.         13.66666667  1.          8.33333333]]
