In [1]:
# Setup:
# Clone the code repository from https://github.com/tolga-b/debiaswe.git
# mkdir debiaswe_tutorial
# cd debiaswe_tutorial
# git clone https://github.com/tolga-b/debiaswe.git

# To reduce the time of downloading data, we provide as subset of GoogleNews-vectors in the following location:
# https://drive.google.com/file/d/1NH6jcrg8SXbnhpIXRIXF_-KUE7wGxGaG/view?usp=sharing

# For full embeddings:
# Download embeddings at https://github.com/tolga-b/debiaswe and put them on the following directory
# embeddings/GoogleNews-vectors-negative300-hard-debiased.bin
# embeddings/GoogleNews-vectors-negative300.bin

In [8]:
from __future__ import print_function, division
%matplotlib inline
from matplotlib import pyplot as plt
import json
import random
import numpy as np

import debiaswe as dwe
import debiaswe.we as we
from debiaswe.we import WordEmbedding
from debiaswe.data import load_professions
from debiaswe.data import load_gender_seed

import torch

## Part 1: Gender Bias in Word Embedding


### Step 1: Load data
We first load the word embedding trained on a corpus of Google News texts consisting of 3 million English words and terms. The embedding maps each word into a 300-dimension vector. 

In [2]:
# load google news word2vec
E = WordEmbedding('./embeddings/w2v_gnews_small.txt')
print(E)
words = E.words
print("Words:", len(words))

# load professions
professions = load_professions()
profession_words = [p[0] for p in professions]

*** Reading data from ./embeddings/w2v_gnews_small.txt
(26423, 300)
26423 words of dimension 300 : in, for, that, is, ..., Jay, Leroy, Brad, Jermaine
<debiaswe.we.WordEmbedding object at 0x000001A76F4E8CC8>
Words: 26423
Loaded professions
Format:
word,
definitional female -1.0 -> definitional male 1.0
stereotypical female -1.0 -> stereotypical male 1.0


### Step 2: Define gender direction

We define gender direction by the direciton of she - he because they are frequent and do not have fewer alternative word senses (e.g., man can also refer to mankind). In the paper, we discuss alternative approach for defining gender direction (e.g., using PCA).

In [27]:
# gender direction
v_gender = E.diff('she', 'he')

# Uncomment below for direction based on multiple definitional pairs
# with open('./data/definitional_pairs.json', "r") as f:
#     defs = json.load(f)
# v_gender = we.doPCA(defs, E).components_[0]

### Step 3: Generating analogies of "Man: x :: Woman : y"

We show that the word embedding model generates gender-streotypical analogy pairs. 
To generate the analogy pairs, we use the analogy score defined in our paper. This score finds word pairs that are well aligned with gender direction as well as within a short distance from each other to preserve topic consistency. 


In [28]:
# analogies gender
a_gender = E.best_analogies_dist_thresh(v_gender, thresh=1)

# for (a,b,c) in a_gender:
#     print(a+"-"+b)
we.viz(a_gender)

   0                      herself | himself                      0.94
   1                          she | he                           0.94
   2                          her | his                          0.91
   3                        woman | man                          0.82
   4                     daughter | son                          0.74
   5                         girl | boy                          0.74
   6                      actress | actor                        0.72
   7                businesswoman | businessman                  0.70
   8                       sister | brother                      0.69
   9                       mother | father                       0.69
  10                  spokeswoman | spokesman                    0.67
  11                      heroine | hero                         0.67
  12                   chairwoman | chairman                     0.67
  13                      sisters | brothers                     0.67
  14                

### Step 4: Analyzing gender bias in word vectors asscoiated with professions

Next, we show that many occupations are unintendedly associated with either male of female by projecting their word vectors onto the gender dimension. 

The script will output the profession words sorted with respect to the projection score in the direction of gender.

In [5]:
# profession analysis gender
sp = sorted([(E.v(w).dot(v_gender), w) for w in profession_words])

sp[0:20], sp[-20:]

([(-0.23798442, 'maestro'),
  (-0.21665451, 'statesman'),
  (-0.2075867, 'skipper'),
  (-0.20267203, 'protege'),
  (-0.2020676, 'businessman'),
  (-0.19492391, 'sportsman'),
  (-0.18836352, 'philosopher'),
  (-0.1807366, 'marksman'),
  (-0.17289859, 'captain'),
  (-0.16785556, 'architect'),
  (-0.16702037, 'financier'),
  (-0.1631364, 'warrior'),
  (-0.15280864, 'major_leaguer'),
  (-0.15001445, 'trumpeter'),
  (-0.14718868, 'broadcaster'),
  (-0.14637241, 'magician'),
  (-0.14401692, 'fighter_pilot'),
  (-0.13782284, 'boss'),
  (-0.13718201, 'industrialist'),
  (-0.13684887, 'pundit')],
 [(0.19714224, 'interior_designer'),
  (0.20833439, 'housekeeper'),
  (0.21560377, 'stylist'),
  (0.22363171, 'bookkeeper'),
  (0.23776126, 'maid'),
  (0.24125953, 'nun'),
  (0.24782579, 'nanny'),
  (0.24929334, 'hairdresser'),
  (0.24946159, 'paralegal'),
  (0.25276467, 'ballerina'),
  (0.2571882, 'socialite'),
  (0.26647124, 'librarian'),
  (0.27317625, 'receptionist'),
  (0.2754029, 'waitress'),
  (

## Find soft debias transform using SGD
This code is largely based on code from https://github.com/TManzini/DebiasMulticlassWordEmbedding.

In [43]:
with open('./data/definitional_pairs.json', "r") as f:
    defs = json.load(f)
with open('./data/gender_specific_seed.json', "r") as f:
    gender_words = json.load(f)
    
W = torch.from_numpy(E.vecs).t()
print(f"Word embeddings: {W.shape}")

neutrals = list(set(words) - set(gender_words))
neutrals = torch.tensor([E.vecs[E.index[w]] for w in neutrals]).t()
print(f"Neutral embeddings: {neutrals.shape}")

gender_direction = torch.tensor([we.doPCA(defs, E).components_[0]]).t()
print(f"Gender subspace: {gender_direction.shape}")

l = 0.2 # lambda

Word embeddings: torch.Size([300, 26423])
Neutral embeddings: torch.Size([300, 26205])
Gender subspace: torch.Size([300, 1])


In [59]:
u, s, _ = torch.svd(W)
s = torch.diag(s)

# precompute
t1 = s.mm(u.t())
t2 = u.mm(s)

transform = torch.randn(300, 300, requires_grad=True)

epochs = 2000
optimizer = torch.optim.SGD([transform], lr=0.0001)

for i in range(epochs):
    optimizer.zero_grad()
    
    TtT = torch.mm(transform.t(), transform)
    norm1 = (t1.mm(TtT - torch.eye(300)).mm(t2)).norm(p=2)

    norm2 = (neutrals.t().mm(TtT).mm(gender_direction)).norm(p=2)

    loss = norm1 + l * norm2

    loss.backward()
    optimizer.step()
    
    if i % 10 == 0:
        print("Loss @ Epoch #" + str(i) + ":", loss.item())


Loss @ Epoch #0: 908327.9375
Loss @ Epoch #10: 629339.6875
Loss @ Epoch #20: 573729.125
Loss @ Epoch #30: 534159.5
Loss @ Epoch #40: 502543.34375
Loss @ Epoch #50: 475920.34375
Loss @ Epoch #60: 452802.84375
Loss @ Epoch #70: 432319.8125
Loss @ Epoch #80: 413906.71875
Loss @ Epoch #90: 397175.96875
Loss @ Epoch #100: 381843.625
Loss @ Epoch #110: 367697.6875
Loss @ Epoch #120: 354573.59375
Loss @ Epoch #130: 342339.4375
Loss @ Epoch #140: 330889.46875
Loss @ Epoch #150: 320136.09375
Loss @ Epoch #160: 310006.5625
Loss @ Epoch #170: 300439.625
Loss @ Epoch #180: 291381.96875
Loss @ Epoch #190: 282788.34375
Loss @ Epoch #200: 274619.65625
Loss @ Epoch #210: 266841.3125
Loss @ Epoch #220: 259423.171875
Loss @ Epoch #230: 252337.921875
Loss @ Epoch #240: 245561.96875
Loss @ Epoch #250: 239073.5625
Loss @ Epoch #260: 232853.671875
Loss @ Epoch #270: 226884.859375
Loss @ Epoch #280: 221151.46875
Loss @ Epoch #290: 215638.796875
Loss @ Epoch #300: 210334.0625
Loss @ Epoch #310: 205225.234375


In [10]:



def equalize_and_soften(vocab, words, eq_sets, bias_subspace, embedding_dim, l=0.2, verbose=True):
    vocabIndex, vocabVectors = zip(*vocab.items())
    vocabIndex = {i:label for i, label in enumerate(vocabIndex)}

    Neutrals = torch.tensor([vocab[w] for w in words]).float().t()

    Words = torch.tensor(vocabVectors).float().t()

    # perform SVD on W to reduce memory and computational costs
    # based on suggestions in supplementary material of Bolukbasi et al.
    u, s, _ = torch.svd(Words)
    s = torch.diag(s)

    # precompute
    t1 = s.mm(u.t())
    t2 = u.mm(s)

    Transform = torch.randn(embedding_dim, embedding_dim).float()
    BiasSpace = torch.tensor(bias_subspace).view(embedding_dim, -1).float()

    Neutrals.requires_grad = False
    Words.requires_grad = False
    BiasSpace.requires_grad = False
    Transform.requires_grad = True

    epochs = 10
    optimizer = torch.optim.SGD([Transform], lr=0.000001, momentum=0.0)

    for i in range(0, epochs):
        TtT = torch.mm(Transform.t(), Transform)
        norm1 = (t1.mm(TtT - torch.eye(embedding_dim)).mm(t2)).norm(p=2)

        norm2 = (Neutrals.t().mm(TtT).mm(BiasSpace)).norm(p=2)

        loss = norm1 + l * norm2
        norm1 = None
        norm2 = None

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if(verbose):
            print("Loss @ Epoch #" + str(i) + ":", loss)

    if(verbose):
        print("Optimization Completed, normalizing vector transform")

    debiasedVectors = {}
    for i, w in enumerate(Words.t()):
        transformedVec = torch.mm(Transform, w.view(-1, 1))
        debiasedVectors[vocabIndex[i]] = ( transformedVec / transformedVec.norm(p=2) ).detach().numpy().flatten()

    return debiasedVectors


torch.Size([300, 26423])
