## Debiasing Algorithm

In [1]:
import numpy as np
import pandas as pd
import json
from sklearn.decomposition import PCA
import copy

In [2]:
# Reading the embeddings file into a dictionary
# Every word will contain a 50-element vector
embeddings_index = dict()
with open('glove.6B.50d.txt',encoding="utf8") as f:
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
print('Loaded %s word vectors.' % len(embeddings_index))

Loaded 400000 word vectors.


In [3]:
# Reading the data files
with open('./data/definitional_pairs.json') as f:
    definitional_pairs = json.load(f)
    
with open('./data/equalize_pairs.json') as f:
    equalize_pairs = json.load(f)

with open('./data/gender_specific_seed.json') as f:
    gender_specific_words = json.load(f)
    
with open('./data/professions.json') as f:
        professions = json.load(f)
        profession_words = [p[0] for p in professions]

In [4]:
# This function will take the definitional pairs and calculate the gender subspace
# embedding is the embedding we read
# We assume dimension k = 10
def get_gender_subspace(pairs, embedding, num_components=10):
    matrix = []
    for a, b in pairs:
        # if a pair in definitional_pairs is not in the embedding, then skip it
        # otherwise:
        if (a in embedding and b in embedding):
            # get the mean of the 2 vectors
            center = (embedding[a] + embedding[b])/2 # D_i = 2
            # then get the deviations of the mean from a and from b
            # add these to a matrix
            matrix.append(embedding[a] - center)
            matrix.append(embedding[b] - center)
    # eg if we take 18 pairs, we will get 36 entries
    # and a 36x50 matrix for the 18 pairs
    matrix = np.array(matrix)
    pca = PCA(n_components = num_components)
    pca.fit(matrix)
    # return 10 vectors where each element will be a 50-dimension vector
    return pca.components_

In [5]:
# Normalizing each word in the embedding
def normalize(embedding):
    for word in embedding:
        embedding[word] /= np.linalg.norm(embedding[word])

# Get projection on subspace of the word u
def get_projection_on_subspace(subspace, u):
    temp = subspace.dot(u)
    # Expanding dimension from (10,) to (10,1)
    temp = np.expand_dims(temp, -1)
    # Return a 50-dimensional vector
    return np.sum(temp * subspace, axis=0)

In [6]:
def debias_new(embedding, gender_specific_words, definitional_pairs, equalize_pairs):
    # Taking a copy of the embedding so that the real embedding is not changed
    result_embedding = copy.deepcopy(embedding)
    normalize(result_embedding)
    
    # calculate gender subspace
    gender_subspace = get_gender_subspace(definitional_pairs, result_embedding)
    projections = dict()
    
    # projections
    specific_set = set(gender_specific_words)
    for w in embedding:
        # getting projection of w onto the subspace (for each word)
        projection_gender_subspace = get_projection_on_subspace(gender_subspace, result_embedding[w])
        # Adding to projection dictionary
        projections[w] = projection_gender_subspace
        # first step of equalizing
        # If w is not in set of gender specific words,
        # we subtract the gender bias (projection_gender_subspace)
        if w not in specific_set:           
            result_embedding[w] -= projection_gender_subspace
    normalize(result_embedding)
    
    # making all the words in equalize_pairs lowercase if they are in the embedding
    candidate_pairs = [(a.lower(), b.lower()) for (a, b) in equalize_pairs if a in result_embedding and b in result_embedding]
    
    # second step of equalizing
    for (a, b) in candidate_pairs:
        mean = (result_embedding[a] + result_embedding[b]) / 2
        # Get the projection of mean onto the gender subspace
        mean_projection = get_projection_on_subspace(gender_subspace, mean)
        v = mean - mean_projection
        gender_factor_a = (projections[a] - mean_projection) / np.linalg.norm(projections[a] - mean_projection)
        result_embedding[a] = v + np.sqrt(1 - np.linalg.norm(v)**2) * gender_factor_a
        gender_factor_b = (projections[b] - mean_projection) / np.linalg.norm(projections[b] - mean_projection)
        result_embedding[b] = v + np.sqrt(1 - np.linalg.norm(v)**2) * gender_factor_b
            
    normalize(result_embedding)
    
    return result_embedding

In [7]:
result_embeddings = debias_new(embeddings_index, gender_specific_words, definitional_pairs, equalize_pairs)

In [8]:
result_embeddings['boy']

array([-0.09684225, -0.08374283,  0.04795526, -0.09274136,  0.12638162,
        0.00570989, -0.17039476,  0.02421683, -0.08864009,  0.15551344,
        0.1187626 ,  0.13448036, -0.18283153, -0.00492178,  0.11353486,
       -0.08351187, -0.15023158,  0.15232056, -0.17999893,  0.19482744,
       -0.27747056,  0.10796852,  0.03724855,  0.02468305, -0.0898587 ,
       -0.3872868 ,  0.05060006, -0.01223798, -0.03468931,  0.00287955,
        0.35340136, -0.13069072, -0.08865376,  0.00366421,  0.127917  ,
        0.14564325,  0.10240126, -0.11916366,  0.17759123, -0.15560205,
       -0.05795389,  0.03828827, -0.31401294,  0.10463749,  0.1282506 ,
        0.0411264 ,  0.06590843, -0.08248833,  0.12169588, -0.1201804 ],
      dtype=float32)

In [9]:
result_embeddings['girl']

array([-0.09421148,  0.16113938, -0.26337108, -0.10037056,  0.15166216,
        0.32577285,  0.00312133,  0.08069942, -0.03719027, -0.01015408,
        0.10643533, -0.24990745, -0.09128951,  0.03590677,  0.14614901,
       -0.02865135, -0.12797943,  0.0571444 , -0.02690467,  0.25316066,
        0.06700391,  0.24962664,  0.17628127,  0.23619102, -0.076873  ,
       -0.2251927 , -0.18858147,  0.09321316,  0.04423882, -0.0159648 ,
        0.21088576,  0.01095933, -0.04905902, -0.01658309,  0.13140553,
        0.04803127, -0.15670311, -0.0498042 ,  0.06574442, -0.19474249,
       -0.05601468, -0.18147893,  0.09634152, -0.20424075,  0.04536428,
       -0.08140942,  0.18661134, -0.02878821,  0.13837434, -0.1228935 ],
      dtype=float32)