In [1]:
import numpy as np
import json
import glob
import re
import pandas as pd

import sys
sys.path += ['../src/']

from WEAT import WEAT, SWEAT, CohesionTest

In [2]:
# read weat words
# weat1 and weat2 come from the two reference papers
# weat3 is the combination of them (results reported in our paper)
weat1 = json.load(open("../data/Data_WEAT/weat_attrib_target_same_length.json"))
weat1_words = open("../data/Data_WEAT/all_weat_words_same_length.txt").read().split("\n")[:-1]
weat1_words = set(weat1_words)

weat2 = json.load(open("../data/Data_WEAT/weat_attrib_target_2_same_length.json"))
weat2_words = open("../data/Data_WEAT/all_weat2_words_same_length.txt").read().split("\n")[:-1]
weat2_words = set(weat2_words)

weat3 = json.load(open("../data/Data_WEAT/weat_attrib_target_3_same_length.json"))
weat3_words = open("../data/Data_WEAT/all_weat3_words_same_length.txt").read().split("\n")[:-1]
weat3_words = set(weat3_words)

In [3]:
# function to read wordd vectors
embedding_path = "../results/experiment_W2V_several_runs_WEAT_2/Word_vectors/"
def read_embedding(file):
    
    word_vectors = {}
    with open(file, 'rt') as rr:
        for line in rr:
            line = line.strip().split()
            word, vec = line[0], line[1:]
            if word in weat1_words or word in weat2_words or word in weat3_words:
                word_vectors[word] = np.array(vec, dtype=float)
                
    return word_vectors

In [4]:
# function to take WEAT word sets

def get_WEAT_word_sets(weat, test_name):
    
    A_key = weat[test_name]['A_key']
    A_words = weat[test_name][A_key]

    B_key = weat[test_name]['B_key']
    B_words = weat[test_name][B_key]

    X_key = weat[test_name]['X_key']
    X_words = weat[test_name][X_key]

    Y_key = weat[test_name]['Y_key']
    Y_words = weat[test_name][Y_key]

    set_names = {
        'A':A_key,
        'B':B_key,
        'X':X_key,
        'Y':Y_key
    }
    
    return A_words, B_words, X_words, Y_words, set_names

### Cohesion test

In [5]:
# runs for weat1 and weat2
word_vector_files = glob.glob(embedding_path+\
                              "W2V_lemmatized_iter-*_dim-300_w-5_alpha-default_epochs-40_corpus-*_person_artist_lyrics.txt")

results_cohesion = []
for file in word_vector_files:
    
    n_iter, corpus_gender = re.findall(r"(?P<iter>(?<=iter-)\d+)|(?P<corpus>(?<=corpus-)\w+(?=_person))", 
                                       file)
    n_iter = n_iter[0]
    corpus_gender = corpus_gender[1]
    
    # read word vectors
    word_vectors = read_embedding(file)
    
    for test_name in weat1.keys():
        if test_name.startswith("_") or weat1[test_name]['method']=='wefat': continue

        A_words, B_words, X_words, Y_words, set_names = get_WEAT_word_sets(weat1, test_name)
        cohesion = CohesionTest(A_words, B_words, word_vectors)
        p1_a, p2_a, p3_a = cohesion.compute_cohesion(n_iters=10000)
        
        cohesion = CohesionTest(X_words, Y_words, word_vectors)
        p1_t, p2_t, p3_t = cohesion.compute_cohesion(n_iters=10000)
        
        
        line = {
            'test_name':test_name,
            'weat_test_num':1,
            'iter':n_iter,
            'corpus':corpus_gender,
            'A_words':A_words,
            'B_words':B_words,
            'X_words':X_words,
            'Y_words':Y_words,
            'set_names':set_names,
            'p_cohesion_attributes':[p1_a, p2_a, p3_a],
            'p_cohesion_targets':[p1_t, p2_t, p3_t]
        }
        results_cohesion.append(line)
        
    for test_name in weat2.keys():
        if test_name.startswith("_") or weat2[test_name]['method']=='wefat': continue

        A_words, B_words, X_words, Y_words, set_names = get_WEAT_word_sets(weat2, test_name)
        cohesion = CohesionTest(A_words, B_words, word_vectors)
        p1_a, p2_a, p3_a = cohesion.compute_cohesion(n_iters=10000)
        
        cohesion = CohesionTest(X_words, Y_words, word_vectors)
        p1_t, p2_t, p3_t = cohesion.compute_cohesion(n_iters=10000)
        
        
        line = {
            'test_name':test_name,
            'weat_test_num':2,
            'iter':n_iter,
            'corpus':corpus_gender,
            'A_words':A_words,
            'B_words':B_words,
            'X_words':X_words,
            'Y_words':Y_words,
            'set_names':set_names,
            'p_cohesion_attributes':[p1_a, p2_a, p3_a],
            'p_cohesion_targets':[p1_t, p2_t, p3_t]
        }
        results_cohesion.append(line)
        
        
results_cohesion = pd.DataFrame(results_cohesion)
results_cohesion.head(3)
        

Unnamed: 0,test_name,weat_test_num,iter,corpus,A_words,B_words,X_words,Y_words,set_names,p_cohesion_attributes,p_cohesion_targets
0,EuropeanAmerican_AfricanAmerican_Pleasant_Unpl...,1,2,all,"[caress, freedom, health, love, peace, cheer, ...","[abuse, crash, filth, murder, sickness, accide...","[harry, amber, heather, adam, frank, chip, pet...","[theo, jerome, leroy, tyrone, ebony, jasmine, ...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Eur...","[0.169, 0.0, 0.0]","[0.0954, 0.1997, 0.0226]"
1,EuropeanAmerican_AfricanAmerican_Pleasant_Unpl...,1,2,all,"[caress, freedom, health, love, peace, cheer, ...","[abuse, crash, filth, murder, sickness, accide...","[greg, neil, emily, anne, jill, sarah, jay]","[jermaine, leroy, tyrone, ebony, keisha, kenya...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Eur...","[0.0006, 0.0, 0.0]","[0.0379, 0.3481, 0.0189]"
2,Flowers_Insects_Pleasant_Unpleasant,1,2,all,"[caress, freedom, health, love, peace, cheer, ...","[abuse, crash, filth, murder, sickness, accide...","[clover, marigold, poppy, crocus, iris, orchid...","[cockroach, maggot, locust, roach, centipede, ...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Flo...","[0.2094, 0.0, 0.0]","[0.0, 0.0001, 0.0]"


In [6]:
# save it
results_cohesion.to_json("../results/experiment_W2V_several_runs_WEAT_2/WEAT/cohesion_test.json")

In [5]:
# runs for weat3
word_vector_files = glob.glob(embedding_path+\
                              "W2V_lemmatized_iter-*_dim-300_w-5_alpha-default_epochs-40_corpus-*_person_artist_lyrics.txt")

results_cohesion = []
for file in word_vector_files:
    
    n_iter, corpus_gender = re.findall(r"(?P<iter>(?<=iter-)\d+)|(?P<corpus>(?<=corpus-)\w+(?=_person))", 
                                       file)
    n_iter = n_iter[0]
    corpus_gender = corpus_gender[1]
    
    # read word vectors
    word_vectors = read_embedding(file)
    
    for test_name in weat3.keys():
        if test_name.startswith("_") or weat3[test_name]['method']=='wefat': continue

        A_words, B_words, X_words, Y_words, set_names = get_WEAT_word_sets(weat3, test_name)
        cohesion = CohesionTest(A_words, B_words, word_vectors)
        p1_a, p2_a, p3_a = cohesion.compute_cohesion(n_iters=10000)
        
        cohesion = CohesionTest(X_words, Y_words, word_vectors)
        p1_t, p2_t, p3_t = cohesion.compute_cohesion(n_iters=10000)
        
        
        line = {
            'test_name':test_name,
            'weat_test_num':3,
            'iter':n_iter,
            'corpus':corpus_gender,
            'A_words':A_words,
            'B_words':B_words,
            'X_words':X_words,
            'Y_words':Y_words,
            'set_names':set_names,
            'p_cohesion_attributes':[p1_a, p2_a, p3_a],
            'p_cohesion_targets':[p1_t, p2_t, p3_t]
        }
        results_cohesion.append(line)
        
        
results_cohesion = pd.DataFrame(results_cohesion)
results_cohesion.head(3)
        

Unnamed: 0,test_name,weat_test_num,iter,corpus,A_words,B_words,X_words,Y_words,set_names,p_cohesion_attributes,p_cohesion_targets
0,Flowers_Insects_Pleasant_Unpleasant,3,2,all,"[family, honest, gift, wonderful, vacation, mi...","[cancer, agony, stink, sickness, poverty, acci...","[lilac, bluebell, violet, crocus, buttercup, i...","[cockroach, maggot, locust, roach, centipede, ...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Flo...","[0.1102, 0.0, 0.0]","[0.0, 0.0, 0.0]"
1,Male_Female_Career_Family,3,2,all,"[corporation, professional, career, office, bu...","[family, marriage, wedding, children, home]","[paul, kevin, steve, greg, jeff, john, bill, m...","[diana, kate, joan, sarah, donna, lisa, amy, ann]","{'A': 'Career words', 'B': 'Family words', 'X'...","[0.1418, 0.2126, 0.0002]","[0.0005, 0.5122, 0.0]"
2,Math_Arts_Male_Female,3,2,all,"[brother, grandfather, his, son, father, man, ...","[girl, hers, her, aunt, daughter, sister, fema...","[addition, math, numbers, geometry]","[poetry, drama, art, dance]","{'A': 'Male attributes', 'B': 'Female attribut...","[0.2705, 0.1317, 0.0271]","[0.0072, 0.8404, 0.0559]"


In [6]:
# save it
results_cohesion.to_json("../results/experiment_W2V_several_runs_WEAT_2/WEAT/cohesion_test_3.json")

### WEAT

In [7]:
# runs for weat1 and weat2
results_weat = []
for file in word_vector_files:
    
    n_iter, corpus_gender = re.findall(r"(?P<iter>(?<=iter-)\d+)|(?P<corpus>(?<=corpus-)\w+(?=_person))", 
                                       file)
    n_iter = n_iter[0]
    corpus_gender = corpus_gender[1]
    
    # read word vectors
    word_vectors = read_embedding(file)

    for test_name in weat1.keys():
        if test_name.startswith("_") or weat1[test_name]['method']=='wefat': continue
            
        A_words, B_words, X_words, Y_words, set_names = get_WEAT_word_sets(weat1, test_name)
        
        # compute effect size
        weat_test = WEAT(A_words, B_words, word_vectors, set_names)
        weat_test.add_target_sets(X_words, Y_words)
        effect_size, p, single_cat_assocs = weat_test.compute_effect_size()
        effect_size_boot, effect_size_boot_cil, effect_size_boot_ciu = weat_test.compute_effect_size_bootstrap()
        
        line = {
            'test_name':test_name,
            'weat_test_num':1,
            'iter':n_iter,
            'corpus':corpus_gender,
            'A_words':A_words,
            'B_words':B_words,
            'X_words':X_words,
            'Y_words':Y_words,
            'set_names':set_names,
            'weat_test':{'effect_size':effect_size, 'p':p},
            'SC-WEAT':single_cat_assocs,
            'weat_boot':{'effect_size':effect_size_boot, 'CI_l':effect_size_boot_cil, 'CI_u':effect_size_boot_ciu}
        }
        results_weat.append(line)
        
    for test_name in weat2.keys():
        if test_name.startswith("_") or weat2[test_name]['method']=='wefat': continue
            
        A_words, B_words, X_words, Y_words, set_names = get_WEAT_word_sets(weat2, test_name)
        
        # compute effect size
        weat_test = WEAT(A_words, B_words, word_vectors, set_names)
        weat_test.add_target_sets(X_words, Y_words)
        effect_size, p, single_cat_assocs = weat_test.compute_effect_size()
        effect_size_boot, effect_size_boot_cil, effect_size_boot_ciu = weat_test.compute_effect_size_bootstrap()
        
        line = {
            'test_name':test_name,
            'weat_test_num':2,
            'iter':n_iter,
            'corpus':corpus_gender,
            'A_words':A_words,
            'B_words':B_words,
            'X_words':X_words,
            'Y_words':Y_words,
            'set_names':set_names,
            'weat_test':{'effect_size':effect_size, 'p':p},
            'SC-WEAT':single_cat_assocs,
            'weat_boot':{'effect_size':effect_size_boot, 'CI_l':effect_size_boot_cil, 'CI_u':effect_size_boot_ciu}
        }
        results_weat.append(line)
        
results_weat = pd.DataFrame(results_weat)
results_weat.head(3)

  return (u1 - u2) / s
  x = asanyarray(arr - arrmean)
  return (u1 - u2) / s
  x = asanyarray(arr - arrmean)
  return (u1 - u2) / s
  x = asanyarray(arr - arrmean)
  return (u1 - u2) / s
  x = asanyarray(arr - arrmean)


Unnamed: 0,test_name,weat_test_num,iter,corpus,A_words,B_words,X_words,Y_words,set_names,weat_test,SC-WEAT,weat_boot
0,EuropeanAmerican_AfricanAmerican_Pleasant_Unpl...,1,2,all,"[caress, freedom, health, love, peace, cheer, ...","[abuse, crash, filth, murder, sickness, accide...","[harry, amber, heather, adam, frank, chip, pet...","[theo, jerome, leroy, tyrone, ebony, jasmine, ...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Eur...","{'effect_size': 0.20814206691336232, 'p': 0.328}","{'X': {'score': -0.054366392318803004, 'p': 0....","{'effect_size': 0.20276093579143156, 'CI_l': 0..."
1,EuropeanAmerican_AfricanAmerican_Pleasant_Unpl...,1,2,all,"[caress, freedom, health, love, peace, cheer, ...","[abuse, crash, filth, murder, sickness, accide...","[greg, neil, emily, anne, jill, sarah, jay]","[jermaine, leroy, tyrone, ebony, keisha, kenya...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Eur...","{'effect_size': 1.1899716914103113, 'p': 0.017}","{'X': {'score': 0.8377363692216292, 'p': 0.0},...","{'effect_size': 1.3592768864484814, 'CI_l': 1...."
2,Flowers_Insects_Pleasant_Unpleasant,1,2,all,"[caress, freedom, health, love, peace, cheer, ...","[abuse, crash, filth, murder, sickness, accide...","[clover, marigold, poppy, crocus, iris, orchid...","[cockroach, maggot, locust, roach, centipede, ...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Flo...","{'effect_size': 1.6719860773890642, 'p': 0.0}","{'X': {'score': 1.1946402380107943, 'p': 0.002...","{'effect_size': 1.7579099762452655, 'CI_l': 1...."


In [8]:
# save it
results_weat.to_json("../results/experiment_W2V_several_runs_WEAT_2/WEAT/weat_test.json")

In [7]:
# runs for weat3
results_weat = []
for file in word_vector_files:
    
    n_iter, corpus_gender = re.findall(r"(?P<iter>(?<=iter-)\d+)|(?P<corpus>(?<=corpus-)\w+(?=_person))", 
                                       file)
    n_iter = n_iter[0]
    corpus_gender = corpus_gender[1]
    
    # read word vectors
    word_vectors = read_embedding(file)

    for test_name in weat3.keys():
        if test_name.startswith("_") or weat3[test_name]['method']=='wefat': continue
            
        A_words, B_words, X_words, Y_words, set_names = get_WEAT_word_sets(weat3, test_name)
        
        # compute effect size
        weat_test = WEAT(A_words, B_words, word_vectors, set_names)
        weat_test.add_target_sets(X_words, Y_words)
        effect_size, p, single_cat_assocs = weat_test.compute_effect_size()
        effect_size_boot, effect_size_boot_cil, effect_size_boot_ciu = weat_test.compute_effect_size_bootstrap()
        
        line = {
            'test_name':test_name,
            'weat_test_num':3,
            'iter':n_iter,
            'corpus':corpus_gender,
            'A_words':A_words,
            'B_words':B_words,
            'X_words':X_words,
            'Y_words':Y_words,
            'set_names':set_names,
            'weat_test':{'effect_size':effect_size, 'p':p},
            'SC-WEAT':single_cat_assocs,
            'weat_boot':{'effect_size':effect_size_boot, 'CI_l':effect_size_boot_cil, 'CI_u':effect_size_boot_ciu}
        }
        results_weat.append(line)
        
results_weat = pd.DataFrame(results_weat)
results_weat.head(3)

  return (u1 - u2) / s
  x = asanyarray(arr - arrmean)
  return (u1 - u2) / s
  x = asanyarray(arr - arrmean)
  return (u1 - u2) / s
  x = asanyarray(arr - arrmean)


Unnamed: 0,test_name,weat_test_num,iter,corpus,A_words,B_words,X_words,Y_words,set_names,weat_test,SC-WEAT,weat_boot
0,Flowers_Insects_Pleasant_Unpleasant,3,2,all,"[family, honest, gift, wonderful, vacation, mi...","[cancer, agony, stink, sickness, poverty, acci...","[lilac, bluebell, violet, crocus, buttercup, i...","[cockroach, maggot, locust, roach, centipede, ...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Flo...","{'effect_size': 1.6560491655022689, 'p': 0.0}","{'X': {'score': 1.221836992187824, 'p': 0.003}...","{'effect_size': 1.772050512054659, 'CI_l': 1.7..."
1,Male_Female_Career_Family,3,2,all,"[corporation, professional, career, office, bu...","[family, marriage, wedding, children, home]","[paul, kevin, steve, greg, jeff, john, bill, m...","[diana, kate, joan, sarah, donna, lisa, amy, ann]","{'A': 'Career words', 'B': 'Family words', 'X'...","{'effect_size': 1.517047070892939, 'p': 0.002}","{'X': {'score': 0.8792345116082385, 'p': 0.153...","{'effect_size': 1.6938152814606529, 'CI_l': 1...."
2,Math_Arts_Male_Female,3,2,all,"[brother, grandfather, his, son, father, man, ...","[girl, hers, her, aunt, daughter, sister, fema...","[addition, math, numbers, geometry]","[poetry, drama, art, dance]","{'A': 'Male attributes', 'B': 'Female attribut...","{'effect_size': 0.924162123594772, 'p': 0.145}","{'X': {'score': 0.4653845329502054, 'p': 0.271...","{'effect_size': 1.2206450903571888, 'CI_l': 1...."


In [8]:
# save it
results_weat.to_json("../results/experiment_W2V_several_runs_WEAT_2/WEAT/weat_test_3.json")

In [9]:
# runs for weat3 (reverse target and attrib sets)
results_weat = []
for file in word_vector_files:
    
    n_iter, corpus_gender = re.findall(r"(?P<iter>(?<=iter-)\d+)|(?P<corpus>(?<=corpus-)\w+(?=_person))", 
                                       file)
    n_iter = n_iter[0]
    corpus_gender = corpus_gender[1]
    
    # read word vectors
    word_vectors = read_embedding(file)

    for test_name in weat3.keys():
        if test_name.startswith("_") or weat3[test_name]['method']=='wefat': continue
            
        #A_words, B_words, X_words, Y_words, set_names = get_WEAT_word_sets(weat3, test_name)
        X_words, Y_words, A_words, B_words, set_names = get_WEAT_word_sets(weat3, test_name)
        
        # compute effect size
        weat_test = WEAT(A_words, B_words, word_vectors, set_names)
        weat_test.add_target_sets(X_words, Y_words)
        effect_size, p, single_cat_assocs = weat_test.compute_effect_size()
        effect_size_boot, effect_size_boot_cil, effect_size_boot_ciu = weat_test.compute_effect_size_bootstrap()
        
        line = {
            'test_name':test_name+" (reverse)",
            'weat_test_num':3,
            'iter':n_iter,
            'corpus':corpus_gender,
            'A_words':A_words,
            'B_words':B_words,
            'X_words':X_words,
            'Y_words':Y_words,
            'set_names':set_names,
            'weat_test':{'effect_size':effect_size, 'p':p},
            'SC-WEAT':single_cat_assocs,
            'weat_boot':{'effect_size':effect_size_boot, 'CI_l':effect_size_boot_cil, 'CI_u':effect_size_boot_ciu}
        }
        results_weat.append(line)
        
results_weat = pd.DataFrame(results_weat)
results_weat.head(3)

Unnamed: 0,test_name,weat_test_num,iter,corpus,A_words,B_words,X_words,Y_words,set_names,weat_test,SC-WEAT,weat_boot
0,Flowers_Insects_Pleasant_Unpleasant (reverse),3,2,all,"[lilac, bluebell, violet, crocus, buttercup, i...","[cockroach, maggot, locust, roach, centipede, ...","[family, honest, gift, wonderful, vacation, mi...","[cancer, agony, stink, sickness, poverty, acci...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Flo...","{'effect_size': 1.7156891760315385, 'p': 0.0}","{'X': {'score': 0.6026036759909137, 'p': 0.001...","{'effect_size': 1.795072163532513, 'CI_l': 1.7..."
1,Male_Female_Career_Family (reverse),3,2,all,"[paul, kevin, steve, greg, jeff, john, bill, m...","[diana, kate, joan, sarah, donna, lisa, amy, ann]","[corporation, professional, career, office, bu...","[family, marriage, wedding, children, home]","{'A': 'Career words', 'B': 'Family words', 'X'...","{'effect_size': 1.2969693986601907, 'p': 0.053}","{'X': {'score': 2.1115512851088822, 'p': 0.002...","{'effect_size': 1.5152566665613671, 'CI_l': 1...."
2,Math_Arts_Male_Female (reverse),3,2,all,"[addition, math, numbers, geometry]","[poetry, drama, art, dance]","[brother, grandfather, his, son, father, man, ...","[girl, hers, her, aunt, daughter, sister, fema...","{'A': 'Male attributes', 'B': 'Female attribut...","{'effect_size': 0.8659748827988355, 'p': 0.021}","{'X': {'score': 0.09732382247691848, 'p': 0.43...","{'effect_size': 0.9281157368398351, 'CI_l': 0...."


In [10]:
# save it
results_weat.to_json("../results/experiment_W2V_several_runs_WEAT_2/WEAT/weat_test_3_rev.json")

### SWEAT

In [9]:
results_sweat = []
for n_iter in range(5):
    
    # read word vectors
    emb_file_male = f"W2V_lemmatized_iter-{n_iter}_dim-300_w-5_alpha-default_epochs-40_corpus-male_person_artist_lyrics.txt"
    word_vectors_male = read_embedding(embedding_path+emb_file_male)
    
    emb_file_female = f"W2V_lemmatized_iter-{n_iter}_dim-300_w-5_alpha-default_epochs-40_corpus-female_person_artist_lyrics.txt"
    word_vectors_female = read_embedding(embedding_path+emb_file_female)

    for test_name in weat1.keys():
        if test_name.startswith("_") or weat1[test_name]['method']=='wefat': continue
            
        A_words, B_words, X_words, Y_words, set_names = get_WEAT_word_sets(weat1, test_name)
        
        sweat_test = SWEAT(X_words, A_words, B_words, word_vectors_male, word_vectors_female)
        score_x, p_x = sweat_test.compute_score()
        
        sweat_test = SWEAT(Y_words, A_words, B_words, word_vectors_male, word_vectors_female)
        score_y, p_y = sweat_test.compute_score()
        
        line = {
            'test_name':test_name,
            'weat_test_num':1,
            'iter':n_iter,
            'corpus':'male_Vs_female',
            'A_words':A_words,
            'B_words':B_words,
            'X_words':X_words,
            'Y_words':Y_words,
            'set_names':set_names,
            'SWEAT_X':{'score':score_x, 'p':p_x},
            'SWEAT_Y':{'score':score_y, 'p':p_y},
        }
        
        results_sweat.append(line)
        
    for test_name in weat2.keys():
        if test_name.startswith("_") or weat2[test_name]['method']=='wefat': continue
            
        A_words, B_words, X_words, Y_words, set_names = get_WEAT_word_sets(weat2, test_name)
        
        sweat_test = SWEAT(X_words, A_words, B_words, word_vectors_male, word_vectors_female)
        score_x, p_x = sweat_test.compute_score()
        
        sweat_test = SWEAT(Y_words, A_words, B_words, word_vectors_male, word_vectors_female)
        score_y, p_y = sweat_test.compute_score()
        
        line = {
            'test_name':test_name,
            'weat_test_num':2,
            'iter':n_iter,
            'corpus':'male_Vs_female',
            'A_words':A_words,
            'B_words':B_words,
            'X_words':X_words,
            'Y_words':Y_words,
            'set_names':set_names,
            'SWEAT_X':{'score':score_x, 'p':p_x},
            'SWEAT_Y':{'score':score_y, 'p':p_y},
        }
        
        results_sweat.append(line)
        
results_sweat = pd.DataFrame(results_sweat)
results_sweat.head(3)
        

Unnamed: 0,test_name,weat_test_num,iter,corpus,A_words,B_words,X_words,Y_words,set_names,SWEAT_X,SWEAT_Y
0,EuropeanAmerican_AfricanAmerican_Pleasant_Unpl...,1,0,male_Vs_female,"[caress, freedom, health, love, peace, cheer, ...","[abuse, crash, filth, murder, sickness, accide...","[harry, amber, heather, adam, frank, chip, pet...","[theo, jerome, leroy, tyrone, ebony, jasmine, ...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Eur...","{'score': 0.22084260813299808, 'p': 0.325}","{'score': -0.19722885767682885, 'p': 0.325}"
1,EuropeanAmerican_AfricanAmerican_Pleasant_Unpl...,1,0,male_Vs_female,"[caress, freedom, health, love, peace, cheer, ...","[abuse, crash, filth, murder, sickness, accide...","[greg, neil, emily, anne, jill, sarah, jay]","[jermaine, leroy, tyrone, ebony, keisha, kenya...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Eur...","{'score': -0.5328060751905181, 'p': 0.151}","{'score': 0.14236660334638365, 'p': 0.373}"
2,Flowers_Insects_Pleasant_Unpleasant,1,0,male_Vs_female,"[caress, freedom, health, love, peace, cheer, ...","[abuse, crash, filth, murder, sickness, accide...","[clover, marigold, poppy, crocus, iris, orchid...","[cockroach, maggot, locust, roach, centipede, ...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Flo...","{'score': -0.0853475279605544, 'p': 0.388}","{'score': -0.2554651023949104, 'p': 0.21}"


In [10]:
# save it
results_sweat.to_json("../results/experiment_W2V_several_runs_WEAT_2/WEAT/sweat_test.json")

In [11]:
# run for weat 3
results_sweat = []
for n_iter in range(5):
    
    # read word vectors
    emb_file_male = f"W2V_lemmatized_iter-{n_iter}_dim-300_w-5_alpha-default_epochs-40_corpus-male_person_artist_lyrics.txt"
    word_vectors_male = read_embedding(embedding_path+emb_file_male)
    
    emb_file_female = f"W2V_lemmatized_iter-{n_iter}_dim-300_w-5_alpha-default_epochs-40_corpus-female_person_artist_lyrics.txt"
    word_vectors_female = read_embedding(embedding_path+emb_file_female)

    for test_name in weat3.keys():
        if test_name.startswith("_") or weat3[test_name]['method']=='wefat': continue
            
        A_words, B_words, X_words, Y_words, set_names = get_WEAT_word_sets(weat3, test_name)
        
        sweat_test = SWEAT(X_words, A_words, B_words, word_vectors_male, word_vectors_female)
        score_x, p_x = sweat_test.compute_score()
        
        sweat_test = SWEAT(Y_words, A_words, B_words, word_vectors_male, word_vectors_female)
        score_y, p_y = sweat_test.compute_score()
        
        line = {
            'test_name':test_name,
            'weat_test_num':3,
            'iter':n_iter,
            'corpus':'male_Vs_female',
            'A_words':A_words,
            'B_words':B_words,
            'X_words':X_words,
            'Y_words':Y_words,
            'set_names':set_names,
            'SWEAT_X':{'score':score_x, 'p':p_x},
            'SWEAT_Y':{'score':score_y, 'p':p_y},
        }
        
        results_sweat.append(line)
        
    
        
results_sweat = pd.DataFrame(results_sweat)
results_sweat.head(3)
        

Unnamed: 0,test_name,weat_test_num,iter,corpus,A_words,B_words,X_words,Y_words,set_names,SWEAT_X,SWEAT_Y
0,Flowers_Insects_Pleasant_Unpleasant,3,0,male_Vs_female,"[family, honest, gift, wonderful, vacation, mi...","[cancer, agony, stink, sickness, poverty, acci...","[lilac, bluebell, violet, crocus, buttercup, i...","[cockroach, maggot, locust, roach, centipede, ...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Flo...","{'score': -0.40046219606015504, 'p': 0.127}","{'score': -0.3076714878582829, 'p': 0.172}"
1,Male_Female_Career_Family,3,0,male_Vs_female,"[corporation, professional, career, office, bu...","[family, marriage, wedding, children, home]","[paul, kevin, steve, greg, jeff, john, bill, m...","[diana, kate, joan, sarah, donna, lisa, amy, ann]","{'A': 'Career words', 'B': 'Family words', 'X'...","{'score': -0.5332730344413686, 'p': 0.157}","{'score': -0.7618472724502524, 'p': 0.076}"
2,Math_Arts_Male_Female,3,0,male_Vs_female,"[brother, grandfather, his, son, father, man, ...","[girl, hers, her, aunt, daughter, sister, fema...","[addition, math, numbers, geometry]","[poetry, drama, art, dance]","{'A': 'Male attributes', 'B': 'Female attribut...","{'score': 0.33967701118299787, 'p': 0.3}","{'score': 0.9109534514959975, 'p': 0.1}"


In [12]:
# save it
results_sweat.to_json("../results/experiment_W2V_several_runs_WEAT_2/WEAT/sweat_test_3.json")

In [13]:
# run for weat 3 (reverrse attrib and target sets)
results_sweat = []
for n_iter in range(5):
    
    # read word vectors
    emb_file_male = f"W2V_lemmatized_iter-{n_iter}_dim-300_w-5_alpha-default_epochs-40_corpus-male_person_artist_lyrics.txt"
    word_vectors_male = read_embedding(embedding_path+emb_file_male)
    
    emb_file_female = f"W2V_lemmatized_iter-{n_iter}_dim-300_w-5_alpha-default_epochs-40_corpus-female_person_artist_lyrics.txt"
    word_vectors_female = read_embedding(embedding_path+emb_file_female)

    for test_name in weat3.keys():
        if test_name.startswith("_") or weat3[test_name]['method']=='wefat': continue
            
        #A_words, B_words, X_words, Y_words, set_names = get_WEAT_word_sets(weat3, test_name)
        X_words, Y_words, A_words, B_words, set_names = get_WEAT_word_sets(weat3, test_name)
        
        sweat_test = SWEAT(X_words, A_words, B_words, word_vectors_male, word_vectors_female)
        score_x, p_x = sweat_test.compute_score()
        
        sweat_test = SWEAT(Y_words, A_words, B_words, word_vectors_male, word_vectors_female)
        score_y, p_y = sweat_test.compute_score()
        
        line = {
            'test_name':test_name+" (reverse)",
            'weat_test_num':3,
            'iter':n_iter,
            'corpus':'male_Vs_female',
            'A_words':A_words,
            'B_words':B_words,
            'X_words':X_words,
            'Y_words':Y_words,
            'set_names':set_names,
            'SWEAT_X':{'score':score_x, 'p':p_x},
            'SWEAT_Y':{'score':score_y, 'p':p_y},
        }
        
        results_sweat.append(line)
        
    
        
results_sweat = pd.DataFrame(results_sweat)
results_sweat.head(3)
        

Unnamed: 0,test_name,weat_test_num,iter,corpus,A_words,B_words,X_words,Y_words,set_names,SWEAT_X,SWEAT_Y
0,Flowers_Insects_Pleasant_Unpleasant (reverse),3,0,male_Vs_female,"[lilac, bluebell, violet, crocus, buttercup, i...","[cockroach, maggot, locust, roach, centipede, ...","[family, honest, gift, wonderful, vacation, mi...","[cancer, agony, stink, sickness, poverty, acci...","{'A': 'Pleasant', 'B': 'Unpleasant', 'X': 'Flo...","{'score': 0.169234653967061, 'p': 0.269}","{'score': 0.1868681062964704, 'p': 0.247}"
1,Male_Female_Career_Family (reverse),3,0,male_Vs_female,"[paul, kevin, steve, greg, jeff, john, bill, m...","[diana, kate, joan, sarah, donna, lisa, amy, ann]","[corporation, professional, career, office, bu...","[family, marriage, wedding, children, home]","{'A': 'Career words', 'B': 'Family words', 'X'...","{'score': 0.3083787917814452, 'p': 0.338}","{'score': -0.08865889442011957, 'p': 0.472}"
2,Math_Arts_Male_Female (reverse),3,0,male_Vs_female,"[addition, math, numbers, geometry]","[poetry, drama, art, dance]","[brother, grandfather, his, son, father, man, ...","[girl, hers, her, aunt, daughter, sister, fema...","{'A': 'Male attributes', 'B': 'Female attribut...","{'score': -1.0161886140515097, 'p': 0.011}","{'score': -0.3478497811490508, 'p': 0.218}"


In [14]:
# save it
results_sweat.to_json("../results/experiment_W2V_several_runs_WEAT_2/WEAT/sweat_test_3_rev.json")