In [1]:
# Import the relevant libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import keras|
import tensorflow as tf

In [None]:
# Download and unzip the GloVe word embeddings
# !wget http://nlp.stanford.edu/data/glove.6B.zip
# !unzip -q glove.6B.zip

In [14]:
# Load the word embeddings
words = set()
embeddings_index = {}
with open('glove.6B.50d.txt',encoding='utf-8') as f:
    for line in f:
        values = line.strip().split()
        word = values[0]
        words.add(word)
        coeffs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coeffs

In [15]:
# Define the cosine similarity function
def cosine_similarity(u,v):
    dot_product = np.dot(u,v)
    norm_u = np.sqrt(np.sum(u**2))
    norm_v = np.sqrt(np.sum(v**2))
    return dot_product/(norm_u*norm_v)

In [17]:
# Define the complete analogy function
def complete_analogy(word_a, word_b, word_c, embeddings_index):
    word_a, word_b, word_c = word_a.lower(), word_b.lower(), word_c.lower()
    encoding_a, encoding_b, encoding_c = embeddings_index[word_a], embeddings_index[word_b], embeddings_index[word_c]
    max_similarity = -1e5
    best_word = None
    for word, encoding in embeddings_index.items():
        if word in [word_a, word_b, word_c]:
            continue
        similarity = cosine_similarity(encoding_b-encoding_a, encoding-encoding_c)
        if similarity > max_similarity:
            max_similarity = similarity
            best_word = word
    return best_word

In [26]:
# Define the gender bias function 
gender_bias = -(embeddings_index['man']-embeddings_index['woman'] +embeddings_index['father']-embeddings_index['mother'] + embeddings_index['boy'] - embeddings_index['girl'])/3.0

In [32]:
# Define the neutralize function
def neutralize(word,embeddings_index,gender_bias):
    encoding = embeddings_index[word]
    projection = np.dot(encoding,gender_bias)/np.sqrt(np.sum(gender_bias**2))
    return encoding - projection*gender_bias/np.sqrt(np.sum(gender_bias**2))

In [39]:
# Define the equalize function
def equalize(pair, embeddings_index, bias_axis):
    w1, w2 = pair
    e_w1, e_w2 = embeddings_index[w1], embeddings_index[w2]
    mu = (e_w1 + e_w2)/2
    mu_B = np.dot(mu, bias_axis)/np.sqrt(np.sum(bias_axis**2))
    mu_orth = mu - mu_B*bias_axis/np.sqrt(np.sum(bias_axis**2))
    e_w1B = np.dot(e_w1, bias_axis)/np.sqrt(np.sum(bias_axis**2))
    e_w2B = np.dot(e_w2, bias_axis)/np.sqrt(np.sum(bias_axis**2))
    corrected_e_w1B = np.sqrt(np.abs(1-np.sum(mu_orth**2)))*e_w1B/np.abs(e_w1B)
    corrected_e_w2B = np.sqrt(np.abs(1-np.sum(mu_orth**2)))*e_w2B/np.abs(e_w2B)
    e1 = corrected_e_w1B*bias_axis/np.sqrt(np.sum(bias_axis**2)) + mu_orth
    e2 = corrected_e_w2B*bias_axis/np.sqrt(np.sum(bias_axis**2)) + mu_orth
    return e1, e2