In [1]:
from gensim.models import FastText

C:\Users\Andreas\anaconda3\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
C:\Users\Andreas\anaconda3\lib\site-packages\numpy\.libs\libopenblas.noijjg62emaszi6nyurl6jbkm4evbgm7.gfortran-win_amd64.dll
C:\Users\Andreas\anaconda3\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll


In [2]:
import pandas as pd
import numpy as np
from functools import reduce
import gensim.downloader as api
import os

from wefe.datasets import (
    load_weat,
    fetch_eds,
    fetch_debias_multiclass,
    fetch_debiaswe,
    load_bingliu,
)
from wefe.query import Query
from wefe.word_embedding_model import WordEmbeddingModel
from wefe.metrics import WEAT, RNSB, RND


from wefe.utils import (
    run_queries,
    plot_queries_results,
    create_ranking,
    plot_ranking,
    calculate_ranking_correlations,
    plot_ranking_correlations,
)
from plotly.subplots import make_subplots

In [3]:
user_roles = ["Contributor","Ignored", "Lurker","casual_commentator"]

In [4]:
model_list = []

In [5]:
for i in user_roles:
    model_weat = FastText.load(f"../models/user_level/fasttext_{i}_weat.bin")
    model_list.append(WordEmbeddingModel(model_weat.wv, i))
    

In [6]:
# Load queries from wefe library 

WEAT_wordsets = load_weat()

RND_wordsets = fetch_eds()

sentiments_wordsets = load_bingliu()
debias_multiclass_wordsets = fetch_debias_multiclass()

In [11]:
# Define gender related queries

gender_1 = Query(


    [RND_wordsets["male_terms"], RND_wordsets["female_terms"]],
    [WEAT_wordsets["career"], WEAT_wordsets["family"]],
    ["Male terms", "Female terms"],
    ["Career", "Family"],
)

gender_2 = Query(
    [RND_wordsets["male_terms"], RND_wordsets["female_terms"]],
    [WEAT_wordsets["math"], WEAT_wordsets["arts"]],
    ["Male terms", "Female terms"],
    ["Math", "Arts"],
)

gender_3 = Query(
    [RND_wordsets["male_terms"], RND_wordsets["female_terms"]],
    [WEAT_wordsets["science"], WEAT_wordsets["arts"]],
    ["Male terms", "Female terms"],
    ["Science", "Arts"],
)

gender_4 = Query(
    [RND_wordsets["male_terms"], RND_wordsets["female_terms"]],
    [RND_wordsets["adjectives_intelligence"], RND_wordsets["adjectives_appearance"]],
    ["Male terms", "Female terms"],
    ["Intelligence", "Appearence"],
)

gender_5 = Query(
    [RND_wordsets["male_terms"], RND_wordsets["female_terms"]],
    [RND_wordsets["adjectives_intelligence"], RND_wordsets["adjectives_sensitive"]],
    ["Male terms", "Female terms"],
    ["Intelligence", "Sensitive"],
)

gender_6 = Query(
    [RND_wordsets["male_terms"], RND_wordsets["female_terms"]],
    [WEAT_wordsets["pleasant_5"], WEAT_wordsets["unpleasant_5"]],
    ["Male terms", "Female terms"],
    ["Pleasant", "Unpleasant"],
)

gender_sent_1 = Query(
    [RND_wordsets["male_terms"], RND_wordsets["female_terms"]],
    [sentiments_wordsets["positive_words"], sentiments_wordsets["negative_words"]],
    ["Male terms", "Female terms"],
    ["Positive words", "Negative words"],
)

gender_role_1 = Query(
    [RND_wordsets["male_terms"], RND_wordsets["female_terms"]],
    [
        debias_multiclass_wordsets["male_roles"],
        debias_multiclass_wordsets["female_roles"],
    ],
    ["Male terms", "Female terms"],
    ["Man Roles", "Woman Roles"],
)

gender_queries = [
    gender_1,
    gender_2,
    gender_4,
    gender_5,
    gender_6,
    gender_role_1,
]

In [12]:
# Run all queries for all user groups for visualiation of the WEAT Score

weat = WEAT()
WEAT_gender_results = run_queries(
    WEAT,
    gender_queries,
    model_list,
    queries_set_name="Gender Queries",
)

In [13]:
WEAT_gender_results

query_name,Male terms and Female terms wrt Career and Family,Male terms and Female terms wrt Math and Arts,Male terms and Female terms wrt Intelligence and Appearence,Male terms and Female terms wrt Intelligence and Sensitive,Male terms and Female terms wrt Pleasant and Unpleasant,Male terms and Female terms wrt Man Roles and Woman Roles
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Contributor,0.313657,0.33304,0.993125,0.511374,-0.332014,1.011895
Ignored,0.552233,-0.236338,0.928308,0.348876,-0.571809,1.273441
Lurker,0.794969,0.237817,0.715879,-0.070766,-0.242454,1.373173
casual_commentator,0.60328,0.732765,1.401874,0.228397,0.055104,1.73617


In [14]:
from wefe.utils import plot_queries_results, run_queries

# Plot the results
plot_queries_results(WEAT_gender_results).show()

In [17]:
# Calculate effect size and p-value for every query to identify significant results

weat = WEAT()

for i in range(len(user_roles)):
    for j in gender_queries:
        result = weat.run_query(j, model_list[i],calculate_p_value=True,p_value_test_type="two-sided")
        print(user_roles[i] ,result)
        print("--------------------")

Contributor {'query_name': 'Male terms and Female terms wrt Career and Family', 'result': 0.31365652542029077, 'weat': 0.31365652542029077, 'effect_size': 0.29959936686303107, 'p_value': 0.6622337766223377}
--------------------
Contributor {'query_name': 'Male terms and Female terms wrt Math and Arts', 'result': 0.3330396972596643, 'weat': 0.3330396972596643, 'effect_size': 0.4007563354859419, 'p_value': 0.35366463353664634}
--------------------
Contributor {'query_name': 'Male terms and Female terms wrt Intelligence and Appearence', 'result': 0.9931250223171498, 'weat': 0.9931250223171498, 'effect_size': 0.8018163016571519, 'p_value': 0.0160983901609839}
--------------------
Contributor {'query_name': 'Male terms and Female terms wrt Intelligence and Sensitive', 'result': 0.5113737646889472, 'weat': 0.5113737646889472, 'effect_size': 0.7824637353392603, 'p_value': 0.024697530246975304}
--------------------
Contributor {'query_name': 'Male terms and Female terms wrt Pleasant and Unplea