In [2]:
import numpy as np
from sklearn.decomposition import PCA


In [3]:
def project(word, dimension):
    """
    Returns the scalar projection of word on dimension. Word and dimension are both assumed to be vectors
    """
    result = round(np.dot(word, dimension)/ np.linalg.norm(dimension), 3)
    return result

def doPCA(words_start, words_end):
    """
    Performs PCA on differences between pairs of words and returns the first component
    Based on function doPCA in Bolukbasi et al. (2016) source code at https://github.com/tolga-b/debiaswe/blob/master/debiaswe/we.py
    Parameter
    ---------
    words_start : list
        List of hashed words at one end of interested dimension
    words_end: list
        List of hashed words at the other end of dimension
    Returns
    -------
    ndarray
        First component of PCA of differences between pairs of words
    """
    matrix = []
    for i in range(len(words_start)):
        center = (words_start[i] + words_end[i])/2
        matrix.append(words_end[i] - center)
        matrix.append(words_start[i] - center)
    matrix = np.array(matrix)
    # cannot have more components than the number of samples
    num_components = len(words_start)*2
    pca = PCA(n_components = num_components)
    pca.fit(matrix)
    return pca.components_[0]

def build_dimension(words_start, words_end):
    """
    This method builds a dimension defined by words at separate end of a dimension.
    Multiple methods exist in previous literature when building such a dimension.
    1) Kozlowski et al. (2019) averages across differences between different word pairs, noted to be interchangeable with averaging words on each side of the dimension and
    then taking the difference between averages. They are empirically verified to be identical.
    2) Bolukbasi et al. (2016) defines gender direction using a simple difference between man and woman in the corresponding tutorial. In the same tutorial, 
    racial direction is defined as difference between two clusters of words that are each sum of the embeddings of its corresponding dimensions
    normalized by the L2 norm. Wang et al. (2020) note that normalization is unnecessary. If unnormalized, this method should be equivalent to #3.
    3) Bolukbasi et al. (2016) defines gender direction also by taking the differences across multiple pairs, doing PCA on these differences, and 
    taking the first component as the gender direction.
    Parameter
    ---------
    words_start : list
        List of vectors at the positive end of the dimension, where positive implies more likely to affect identification positively
    words_end: list
        List of vectors at the other end of dimension
    Returns
    -------
    (mean_dim, pca_dimension) : 2-tuple of numpy vector
        Two vector that represents the dimension of interest calculated using method #1 and #3.
    """
    assert len(words_start) == len(words_end)
    differences = [(np.array(words_start[i]) - np.array(words_end[i])) for i in range(len(words_start)) if not np.isnan(words_start[i]).any() and not np.isnan(words_end[i]).any()]
    mean_dim = np.array(differences).mean(axis=0)
    pca_dim = doPCA(words_start, words_end)
    if project(words_start[0], pca_dim) < 0:
        # convention used in the current script is that words_start should represent the positive dimension
        pca_dim = pca_dim * -1
    return (mean_dim, pca_dim)


In [4]:
import gensim
import os
import gensim.downloader as api

model = api.load("word2vec-google-news-300")
gender_words = [['she', 'her', 'woman', 'Mary', 'herself', 'daughter', 'mother', 'gal', 'girl', 'female'],
                ['he', 'his', 'man', 'John', 'himself', 'son', 'father', 'guy', 'boy', 'male']]


In [5]:
mean_dim, pca_dim = build_dimension([model[word] for word in gender_words[0]], [model[word] for word in gender_words[1]])

In [7]:
values = ['accountability', 'speed', 'integrity', 'community', 'compassion', 'diversity', 'teamwork', 'empathy']
for word in values:
    r = project(model[word], pca_dim)
    print(f'{word} : {r:.3f}')

accountability : 0.039
speed : -0.227
integrity : -0.164
community : 0.207
compassion : 0.216
diversity : 0.327
teamwork : -0.251
empathy : 0.092
