In [1]:
# Setup:
# Clone the code repository from https://github.com/tolga-b/debiaswe.git
# mkdir debiaswe_tutorial
# cd debiaswe_tutorial
# git clone https://github.com/tolga-b/debiaswe.git

# To reduce the time of downloading data, we provide as subset of GoogleNews-vectors in the following location:
# https://drive.google.com/file/d/1NH6jcrg8SXbnhpIXRIXF_-KUE7wGxGaG/view?usp=sharing

# For full embeddings:
# Download embeddings at https://github.com/tolga-b/debiaswe and put them on the following directory
# embeddings/GoogleNews-vectors-negative300-hard-debiased.bin
# embeddings/GoogleNews-vectors-negative300.bin

In [9]:
from __future__ import print_function, division
%matplotlib inline
from matplotlib import pyplot as plt
import json
import random
import numpy as np

import debiaswe as dwe
import debiaswe.we as we
from debiaswe.we import WordEmbedding
from debiaswe.data import load_data

import torch

## Part 1: Gender Bias in Word Embedding


### Step 1: Load data
We first load the word embedding trained on a corpus of Google News texts consisting of 3 million English words and terms. The embedding maps each word into a 300-dimension vector. 

In [10]:
# load google news word2vec
E = WordEmbedding('fasttext_small')
# E = WordEmbedding('./embeddings/GoogleNews-vectors-negative300.bin') # Not possible
print(E)
words = E.words
print("Words:", len(words))

# load professions
gender_specific_words, defs, _, profession_words = load_data(embed_words=E.words)

27014 words of dimension 300 : the, and, of, to, ..., circumscribed, whos, salvaging, anion
Embedding shape: (27014, 300)
<debiaswe.we.WordEmbedding object at 0x00000186DDC05E48>
Words: 27014


### Step 2: Define gender direction

We define gender direction by the direciton of she - he because they are frequent and do not have fewer alternative word senses (e.g., man can also refer to mankind). In the paper, we discuss alternative approach for defining gender direction (e.g., using PCA).

In [9]:
# gender direction
v_gender = E.diff('she', 'he')

# Uncomment below for direction based on multiple definitional pairs
# with open('./data/definitional_pairs.json', "r") as f:
#     defs = json.load(f)
# v_gender = we.doPCA(defs, E).components_[0]

### Step 3: Generating analogies of "Man: x :: Woman : y"

We show that the word embedding model generates gender-streotypical analogy pairs. 
To generate the analogy pairs, we use the analogy score defined in our paper. This score finds word pairs that are well aligned with gender direction as well as within a short distance from each other to preserve topic consistency. 


In [None]:
# analogies gender
a_gender = E.best_analogies_dist_thresh(v_gender, thresh=1)

# for (a,b,c) in a_gender:
#     print(a+"-"+b)
we.viz(a_gender)

Computing neighbors


### Step 4: Analyzing gender bias in word vectors asscoiated with professions

Next, we show that many occupations are unintendedly associated with either male of female by projecting their word vectors onto the gender dimension. 

The script will output the profession words sorted with respect to the projection score in the direction of gender.

In [None]:
# profession analysis gender
sp = E.profession_stereotypes(profession_words, v_gender)

## Find soft debias transform using SGD
This code is largely based on code from https://github.com/TManzini/DebiasMulticlassWordEmbedding.

In [11]:
from debiaswe.debias import soft_debias

In [12]:
# We set seeds in an attempt for reproducibility, but note that somehow results may still differ slightly
torch.manual_seed(0)
np.random.seed(0)
random.seed(0)
torch.backends.cudnn.enabled = False 
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

soft_debias(E, gender_specific_words, defs, epochs=7000, decrease_times=[5000])

Loss @ Epoch #0: 2403401.75
Loss @ Epoch #100: 231891.109375
Loss @ Epoch #200: 121064.71875
Loss @ Epoch #300: 72851.2578125
Loss @ Epoch #400: 47593.09765625
Loss @ Epoch #500: 32943.98828125
Loss @ Epoch #600: 23861.359375
Loss @ Epoch #700: 17955.796875
Loss @ Epoch #800: 13965.0400390625
Loss @ Epoch #900: 11171.5673828125
Loss @ Epoch #1000: 9148.74609375
Loss @ Epoch #1100: 7635.443359375
Loss @ Epoch #1200: 6468.55615234375
Loss @ Epoch #1300: 5544.20068359375
Loss @ Epoch #1400: 4794.7421875
Loss @ Epoch #1500: 4174.96142578125
Loss @ Epoch #1600: 3653.952880859375
Loss @ Epoch #1700: 3210.147216796875
Loss @ Epoch #1800: 2828.199462890625
Loss @ Epoch #1900: 2502.694580078125
Loss @ Epoch #2000: 2206.98486328125
Loss @ Epoch #2100: 1957.8712158203125
Loss @ Epoch #2200: 1728.5146484375
Loss @ Epoch #2300: 1531.03125
Loss @ Epoch #2400: 1358.1346435546875
Loss @ Epoch #2500: 1201.07666015625
Loss @ Epoch #2600: 1077.3358154296875
Loss @ Epoch #2700: 957.428466796875
Loss @ Epo

In [13]:
# save soft-debiased embeddings

E.save('./debiaswe/embeddings/fasttext_small_soft_debiased.txt')
# E.save('./embeddings/GoogleNews-vectors-negative300_soft_debiased.bin')

Wrote 27014 words to ./debiaswe/embeddings/fasttext_small_soft_debiased.txt


In [None]:
a_gender_debiased = E.best_analogies_dist_thresh(v_gender)
we.viz(a_gender_debiased)

In [None]:
# profession analysis gender
sp = E.profession_stereotypes(profession_words, v_gender)