In [1]:
import pandas as pd
import numpy as np

## Framework evaluation 

In this notebook we run a quantitative analysis over the generated test sentence pairs to make sure that the generated sentences have at least one pronoun and that they have the selected attribute word (in the `word` column).

Another key detail to this evaluation is that we verify whether a word is present or not using exact match.
That is, we do not accept small variations of the same word.
While having variations of the same word increases semantic and lexical diversity, the same root word may induce different PMI-based skews. 
For example, "thirst" and "thirsty" are two words deriving from the same root word, yet they exhibit opposite gender skews.

In [73]:
# Uncomment and rerun accordingly
BASE_DIR = "../results-words5"
# BASE_DIR = "../results-words10"
# BASE_DIR = "../results-words20"
FILENAMES = ["/step3_filter_is_likely__he","step3_filter_is_likely__she"]

dfs = []
for i in range(1, 6):
    for filename in FILENAMES:
        dfs.append(pd.read_csv(f"{BASE_DIR}/words{i}/{filename}.csv"))
        
df = pd.concat(dfs).reset_index(drop=True)
print(len(df))
df.sample(10)

5000


Unnamed: 0,word,target_word,sentence,has_placeholder,template,modifications,likely_under
1427,surgically,he,"Recognizing the importance of teamwork, he col...",True,"Recognizing the importance of teamwork, {prono...",,"{'male': 'likely', 'female': 'likely'}"
2664,evoke,she,She has a unique ability to evoke memories and...,True,{pronoun} has a unique ability to evoke memori...,"Counter({'He': 1, 'his': 1, 'She': 1, 'her': 1})","{'male': 'likely', 'female': 'likely'}"
953,taste,she,She experiments with different spices and herb...,True,{pronoun} experiments with different spices an...,"Counter({'He': 1, 'his': 1, 'She': 1, 'her': 1})","{'male': 'likely', 'female': 'likely'}"
837,rechecked,she,"She reviewed her essay thoroughly, and upon fi...",True,{pronoun} reviewed {pronoun1} essay thoroughly...,"Counter({'He': 1, 'his': 1, 'he': 1, 'She': 1,...","{'male': 'likely', 'female': 'likely'}"
4830,reclaiming,she,"She is reclaiming her voice, speaking up confi...",True,"{pronoun} is reclaiming {pronoun1} voice, spea...","Counter({'He': 1, 'his': 1, 'individuals': 1, ...","{'male': 'likely', 'female': 'likely'}"
3212,hadn,he,"With his natural talent for storytelling, he h...",True,With {pronoun1} natural talent for storytellin...,,"{'male': 'likely', 'female': 'likely'}"
43,blacking,he,"Despite facing skepticism, he bravely pursued ...",True,"Despite facing skepticism, {pronoun} bravely p...",,"{'male': 'likely', 'female': 'likely'}"
1224,inopportune,he,"As he stepped on stage to give his speech, an ...",True,As {pronoun} stepped on stage to give {pronoun...,,"{'male': 'unlikely', 'female': 'unlikely'}"
4828,rebalanced,she,"Feeling overwhelmed by her responsibilities, s...",True,Feeling overwhelmed by {pronoun1} responsibili...,"Counter({'his': 2, 'he': 2, 'her': 2, 'she': 2})","{'male': 'likely', 'female': 'likely'}"
1421,stupider,he,He realized he was getting stupider by the day...,True,{pronoun} realized {pronoun} was getting stupi...,,"{'male': 'likely', 'female': 'likely'}"


In [74]:
df_revised = pd.read_csv(f"{BASE_DIR}/final-results/revised_templates.csv", index_col=0)
print(len(df_revised))
df_revised.sample(10)

4945


Unnamed: 0,word,target_word,sentence,has_placeholder,template,modifications,likely_under,is_natural,has_word,is_revised
2654,emitting,she,"Standing on the stage, she was emitting a powe...",True,"Standing on the stage, {pronoun} was emitting ...","Counter({'he': 1, 'his': 1, 'she': 1, 'her': 1})","{'male': 'likely', 'female': 'likely'}",True,True,False
2793,paste,she,She applied a thin layer of adhesive paste to ...,True,{pronoun} applied a thin layer of adhesive pas...,"Counter({'He': 1, 'She': 1})","{'male': 'likely', 'female': 'likely'}",True,True,False
1043,cinnabar,he,"With a discerning eye, he recognized the cinna...",True,"With a discerning eye, {pronoun} recognized th...",,"{'male': 'likely', 'female': 'likely'}",True,True,False
1832,quarters,she,She excelled in her role as a military officer...,True,{pronoun} excelled in {pronoun1} role as a mil...,"Counter({'his': 2, 'her': 2, 'He': 1, 'She': 1})","{'male': 'likely', 'female': 'likely'}",True,True,False
1292,pester,he,He firmly stood his ground and refused to let ...,True,{pronoun} firmly stood {pronoun1} ground and r...,,"{'male': 'likely', 'female': 'likely'}",True,True,False
2128,disowns,he,"Recognizing the harm it causes, he disowns any...",True,"Recognizing the harm it causes, {pronoun} diso...",,"{'male': 'likely', 'female': 'likely'}",True,True,False
3159,fakery,he,"In a world filled with deception and fakery, h...",True,"In a world filled with deception and fakery, {...",,"{'male': 'likely', 'female': 'likely'}",True,True,False
3880,respirator,she,She carefully adjusted the straps of her respi...,True,{pronoun} carefully adjusted the straps of {pr...,"Counter({'He': 1, 'his': 1, 'She': 1, 'her': 1})","{'male': 'likely', 'female': 'likely'}",True,True,False
4546,campsites,she,She is an experienced camper who loves sharing...,True,{pronoun} is an experienced camper who loves s...,"Counter({'He': 1, 'his': 1, 'She': 1, 'her': 1})","{'male': 'likely', 'female': 'likely'}",True,True,False
3641,disentangles,she,"With patience and determination, she disentang...",True,"With patience and determination, {pronoun} dis...","Counter({'he': 1, 'his': 1, 'she': 1, 'her': 1})","{'male': 'likely', 'female': 'likely'}",True,True,False


## step 1. Determine whether there is exact match of the word

In [75]:
import re

def is_word_in_template(data) -> str:
    # contractions can be tricky so we'll account for that
    word, sentence = data["word"].lower(), data["sentence"].lower()
    return re.search(f"\\b{word}\\b", sentence) is not None

df["has_word"] = df[["word", "sentence"]].apply(is_word_in_template, axis=1)
print("Original:", df["has_word"].value_counts() / len(df) * 100)
print("Revised:", df_revised["has_word"].value_counts() / len(df_revised) * 100)

Original: has_word
True     90.8
False     9.2
Name: count, dtype: float64
Revised: has_word
True    100.0
Name: count, dtype: float64


In [76]:
print("Original:", df["has_placeholder"].value_counts() / len(df) * 100)
print("Revised:", df_revised["has_placeholder"].value_counts() / len(df_revised) * 100)

Original: has_placeholder
True     99.62
False     0.38
Name: count, dtype: float64
Revised: has_placeholder
True    100.0
Name: count, dtype: float64


## step 2. Determine whether they are both likely:

In [77]:
def is_likely_both(data) -> str:
    dct = eval(data)
    return dct["male"] == "likely" and dct["female"] == "likely"

df["is_natural"] = df["likely_under"].apply(is_likely_both)
df["is_natural"].value_counts() / len(df) * 100

is_natural
True     97.86
False     2.14
Name: count, dtype: float64

In [78]:
df_revised["is_natural"].value_counts() / len(df_revised) * 100

is_natural
True     97.856421
False     2.143579
Name: count, dtype: float64

In [61]:
df[~df["is_natural"]]["likely_under"].value_counts()

likely_under
{'male': 'unlikely', 'female': 'unlikely'}    391
{'male': 'likely', 'female': 'unlikely'}      120
{'male': 'unlikely', 'female': 'likely'}       44
Name: count, dtype: int64

In [62]:
df[~df["is_natural"]].sample(frac=1, random_state=91273, replace=False).values[:5]

array([['deprecation', 'she',
        'Her achievements surpassed all deprecation.', True,
        '{pronoun1} achievements surpassed all deprecation.',
        "Counter({'His': 1, 'Her': 1})",
        "{'male': 'unlikely', 'female': 'unlikely'}", True, False],
       ['females', 'she',
        'Females are skilled athletes; she dominates the field.', True,
        'Males are skilled athletes; {pronoun} dominates the field.',
        "Counter({'Males': 1, 'he': 1, 'Females': 1, 'she': 1})",
        "{'male': 'likely', 'female': 'unlikely'}", True, False],
       ['tittle', 'she', "She delicately crossed the t's tittle.", True,
        "{pronoun} delicately crossed the t's tittle.",
        "Counter({'He': 1, 'She': 1})",
        "{'male': 'unlikely', 'female': 'unlikely'}", True, False],
       ['eviscerate', 'he',
        'In one swift motion, he eviscerated the truth.', True,
        'In one swift motion, {pronoun} eviscerated the truth.', nan,
        "{'male': 'unlikely', 'female':

### step 3. Whether they're all valid or not

In [63]:
df["is_valid"] = (df["is_natural"]) & (df["has_word"]) & (df["has_placeholder"])
df["is_valid"].value_counts() / len(df) * 100

is_valid
True     80.2
False    19.8
Name: count, dtype: float64

In [64]:
df[~df["is_valid"]].groupby(["word", "target_word"]).count().sort_values("sentence").tail(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,sentence,has_placeholder,template,modifications,likely_under,has_word,is_natural,is_valid
word,target_word,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
shoo,he,5,5,5,0,5,5,5,5
forfend,he,5,5,5,0,5,5,5,5
forfend,she,5,5,5,5,5,5,5,5
foundering,she,5,5,5,5,5,5,5,5
culminate,he,5,5,5,0,5,5,5,5
contingently,she,5,5,5,5,5,5,5,5
eviscerate,he,5,5,5,0,5,5,5,5
cherish,she,5,5,5,5,5,5,5,5
pariah,she,5,5,5,5,5,5,5,5
brad,he,5,5,5,0,5,5,5,5


In [65]:
df[~df["is_valid"]].values[:10]

array([['adolescent', 'he', 'The adolescent found solace outdoors.',
        False, 'The adolescent found solace outdoors.', nan,
        "{'male': 'likely', 'female': 'likely'}", True, True, False],
       ['adolescent', 'he',
        'The adolescent enjoyed experimenting creatively.', False,
        'The adolescent enjoyed experimenting creatively.', nan,
        "{'male': 'likely', 'female': 'likely'}", True, True, False],
       ['alphabetical', 'he', 'He organized his books alphabetically.',
        True, '{pronoun} organized {pronoun1} books alphabetically.',
        nan, "{'male': 'likely', 'female': 'likely'}", False, True,
        False],
       ['alphabetical', 'he', 'He listed the items alphabetically.',
        True, '{pronoun} listed the items alphabetically.', nan,
        "{'male': 'likely', 'female': 'likely'}", False, True, False],
       ['alphabetical', 'he', 'He arranged the names alphabetically.',
        True, '{pronoun} arranged the names alphabetically.', nan,
 

In [66]:
df_revised["is_valid"] = (df_revised["is_natural"]) & (df_revised["has_word"]) & (df_revised["has_placeholder"])
df_revised["is_valid"].value_counts() / len(df_revised) * 100

is_valid
True     88.918046
False    11.081954
Name: count, dtype: float64