In [1]:
import pandas as pd
import numpy as np

from utils_io import print_sep, read_json, persist_histograms
from utils_greedy_histogram import create_histogram_humans
from default_vars import BIN_CENTER, BIN_OFFSET, UNCERTAINTY_EXPRESSIONS

hist_creation_kwargs = dict(
    bin_center=BIN_CENTER, 
    bin_offset=BIN_OFFSET,
    uncertainty_expressions=UNCERTAINTY_EXPRESSIONS,
    number_col="response",
    unc_col="uncertainty_expression",
)

In this notebook, we post-process the results obtained in the human experiments. The execution of the file will produce the following files:

- `human+nv+filtered` and `human+nv+unfiltered`: corresponding to the greedy histograms derived from the filtered and non-filtered pool of participants, when annotarting the non-verifiable statements.
- `human+v+filtered` and `human+v+unfiltered`: corresponding to the greedy histograms derived from the filtered and non-filtered pool of participants, when annotarting the verifiable statements.

In [2]:
OUTPUT_DIR = "../../results"

## 1. Human + NV

Human participants results for the 2-shot Non-verifiable statements.

In [3]:
# human data is spread out into two files, the "humans_experiment1" and "sanity_check".
# the former contains the annotation results of all participants, whereas the
# latter contains the results of the sanity check we perform at the later stage.
# 
# To create the histograms for the humans, we will first:
# (1) read files
# (2) filter out invalid human participants according to the criteria defined (`sanity_check`)
# (3) compute the histograms
humans = pd.read_csv('../../prolific_experiment/experimental_results/humans_experiment1_proc.csv', index_col=0)
humans["statement_type"] = humans["statement_id"].apply(lambda x: x.split("_")[0].strip())
filtered_humans = humans[humans["include_participant"]].copy()

### 1.1. Population

In [4]:
histograms = create_histogram_humans(humans, **hist_creation_kwargs)
persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/all/non_verifiable/humans-2shot-unfiltered")

histograms = create_histogram_humans(filtered_humans, **hist_creation_kwargs)
persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/all/non_verifiable/humans-2shot-filtered")

### 1.2. By speaker gender

In [5]:
for gender in ("male", "female"):
    df_subset = humans[humans["gender"] == gender].copy()
    assert len(df_subset) < len(humans)

    histograms = create_histogram_humans(df_subset, **hist_creation_kwargs)
    persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/by_gender/non_verifiable/humans-2shot-unfiltered", prefix=gender)

    df_subset_filtered = filtered_humans[filtered_humans["gender"] == gender].copy()
    assert len(df_subset_filtered) < len(filtered_humans)
    histograms = create_histogram_humans(df_subset_filtered, **hist_creation_kwargs)
    persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/by_gender/non_verifiable/humans-2shot-filtered", prefix=gender)

### 1.3. By statement type

In [6]:
print(humans["statement_type"].unique())

['preference' 'imperfect' 'future' 'possession']


In [7]:
for st_type in humans["statement_type"].unique():
    df_subset = humans[humans["statement_type"] == st_type]
    assert len(df_subset) < len(humans)    
    histograms = create_histogram_humans(df_subset, **hist_creation_kwargs)
    persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/by_statement_type/non_verifiable/humans-2shot-unfiltered", prefix=st_type)

    df_subset_filtered = filtered_humans[filtered_humans["statement_type"] == st_type]
    assert len(df_subset_filtered) < len(filtered_humans)    
    histograms = create_histogram_humans(df_subset_filtered, **hist_creation_kwargs)
    persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/by_statement_type/non_verifiable/humans-2shot-filtered", prefix=st_type)

## 2. Human + V

Human participants results for the 2-shot Verifiable statements.

In [8]:
# human data is spread out into two files, the "humans_experiment1" and "sanity_check".
# the former contains the annotation results of all participants, whereas the
# latter contains the results of the sanity check we perform at the later stage.
# 
# To create the histograms for the humans, we will first:
# (1) read files
# (2) filter out invalid human participants according to the criteria defined (`sanity_check`)
# (3) compute the histograms
humans_v = pd.read_csv('../../prolific_experiment/experimental_results/humans_experiment2_proc.csv', index_col=0)
humans_v["statement_type"] = humans_v["statement_id"].apply(lambda x: x.split("_")[0].strip())
humans_v["statement_truth"] = humans_v["statement_id"].apply(lambda x: str("true" in x).lower())

filtered_humans_v = humans_v[humans_v["include_participant"]].copy()

### 2.1. Population

In [9]:
histograms = create_histogram_humans(humans_v, **hist_creation_kwargs)
persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/all/verifiable/humans-2shot-unfiltered")

histograms = create_histogram_humans(filtered_humans_v, **hist_creation_kwargs)
persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/all/verifiable/humans-2shot-filtered")

### 2.2. By speaker's gender

In [10]:
for gender in ("male", "female"):
    df_subset_v = humans_v[humans_v["gender"] == gender].copy()
    assert len(df_subset_v) < len(humans_v)

    histograms = create_histogram_humans(df_subset_v, **hist_creation_kwargs)
    persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/by_gender/verifiable/humans-2shot-unfiltered", prefix=gender)

    df_subset_filtered_v = filtered_humans_v[filtered_humans_v["gender"] == gender].copy()
    assert len(df_subset_filtered_v) < len(filtered_humans_v)
    histograms = create_histogram_humans(df_subset_filtered_v, **hist_creation_kwargs)
    persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/by_gender/verifiable/humans-2shot-filtered", prefix=gender)

### 2.3. By statement type

In [11]:
print(humans_v["statement_type"].unique())

['science' 'history' 'city']


In [12]:
for st_type in humans_v["statement_type"].unique():
    df_subset_v = humans_v[humans_v["statement_type"] == st_type]
    assert len(df_subset_v) < len(humans_v)    
    histograms = create_histogram_humans(df_subset_v, **hist_creation_kwargs)
    persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/by_statement_type/verifiable/humans-2shot-unfiltered", prefix=st_type)

    df_subset_filtered_v = filtered_humans_v[filtered_humans_v["statement_type"] == st_type]
    assert len(df_subset_filtered_v) < len(filtered_humans_v)    
    histograms = create_histogram_humans(df_subset_filtered_v, **hist_creation_kwargs)
    persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/by_statement_type/verifiable/humans-2shot-filtered", prefix=st_type)

### 2.4. By Truth/Falsity of the statement

In [13]:
print(humans_v["statement_truth"].unique())

['true' 'false']


In [14]:
for st_truth in humans_v["statement_truth"].unique():
    df_subset_v = humans_v[humans_v["statement_truth"] == st_truth]
    assert len(df_subset_v) < len(humans_v)    
    histograms = create_histogram_humans(df_subset_v, **hist_creation_kwargs)
    persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/by_statement_truth/verifiable/humans-2shot-unfiltered", prefix=st_truth)

    df_subset_filtered_v = filtered_humans_v[filtered_humans_v["statement_truth"] == st_truth]
    assert len(df_subset_filtered_v) < len(filtered_humans_v)    
    histograms = create_histogram_humans(df_subset_filtered_v, **hist_creation_kwargs)
    persist_histograms(*histograms, results_folder=f"{OUTPUT_DIR}/greedy/by_statement_truth/verifiable/humans-2shot-filtered", prefix=st_truth)