# Research Question 1 - Order-Invariant Assumption

In [1]:
import pickle

def load_experiments(filenames):
    experiments = []
    
    for filename in filenames:
        with open(filename, "rb") as file:
            experiments += pickle.load(file)
            
    return experiments

experiments = load_experiments(("data/human.pkl", "data/comet.pkl"))

print(f"Total number of experiments: {len(experiments)}")

Total number of experiments: 23


## Step 1 - Unimodal Assumption on Prediction

In [2]:
import pandas as pd

HUMAN_NEEDS = (
    "status", "approval", "tranquility", "competition", "health", "family", "romance", "food", "independence",
    "power", "order", "curiosity", "serenity", "honor", "belonging", "contact", "savings", "idealism", "rest"
)

prediction_dataframe = pd.DataFrame(
    ((experiment.story, experiment.sentence, experiment.character,
      instance.human_size, instance.machine_size,
      human_need, likelihood)
     for experiment in experiments
     for instance in experiment.instances
     for sample in instance.samples
     for human_need, likelihood in zip(HUMAN_NEEDS, sample.result)),
    columns=("story", "line", "character", "human_size", "machine_size", "human_need", "likelihood")
)

prediction_dataframe

Unnamed: 0,story,line,character,human_size,machine_size,human_need,likelihood
0,0428b530-0cec-4955-b023-0bac60ddf8cc,5,Gina,0,0,status,0.469050
1,0428b530-0cec-4955-b023-0bac60ddf8cc,5,Gina,0,0,approval,0.419768
2,0428b530-0cec-4955-b023-0bac60ddf8cc,5,Gina,0,0,tranquility,0.340969
3,0428b530-0cec-4955-b023-0bac60ddf8cc,5,Gina,0,0,competition,0.435739
4,0428b530-0cec-4955-b023-0bac60ddf8cc,5,Gina,0,0,health,0.312782
...,...,...,...,...,...,...,...
11232795,7a45afb8-18c4-4397-8751-6d8332d88178,5,Tycho,3,66,belonging,0.000335
11232796,7a45afb8-18c4-4397-8751-6d8332d88178,5,Tycho,3,66,contact,0.067039
11232797,7a45afb8-18c4-4397-8751-6d8332d88178,5,Tycho,3,66,savings,0.113374
11232798,7a45afb8-18c4-4397-8751-6d8332d88178,5,Tycho,3,66,idealism,0.189569


In [3]:
prediction_dataframe = prediction_dataframe.groupby(["story", "line", "character"])\
    .apply(lambda x: x[(x["human_size"] == 0) & (x["machine_size"] == x["machine_size"].max())])\
    .reset_index(drop=True)[["story", "line", "character", "human_need", "likelihood"]]

prediction_dataframe

Unnamed: 0,story,line,character,human_need,likelihood
0,0428b530-0cec-4955-b023-0bac60ddf8cc,5,Gina,status,0.481567
1,0428b530-0cec-4955-b023-0bac60ddf8cc,5,Gina,approval,0.310710
2,0428b530-0cec-4955-b023-0bac60ddf8cc,5,Gina,tranquility,0.039941
3,0428b530-0cec-4955-b023-0bac60ddf8cc,5,Gina,competition,0.401482
4,0428b530-0cec-4955-b023-0bac60ddf8cc,5,Gina,health,0.035084
...,...,...,...,...,...
43695,b33a2445-7421-4ace-9837-e8f165524ccf,3,Riley,belonging,0.000014
43696,b33a2445-7421-4ace-9837-e8f165524ccf,3,Riley,contact,0.288310
43697,b33a2445-7421-4ace-9837-e8f165524ccf,3,Riley,savings,0.093079
43698,b33a2445-7421-4ace-9837-e8f165524ccf,3,Riley,idealism,0.405672
