In [None]:
import numpy as np
import gensim.downloader as api
glove_model = api.load("glove-wiki-gigaword-100")  # This loads a 100-dimensional GloVe model trained on Wikipedia


In [None]:
def find_cosine_similarity(model, word1, word2):
    return np.dot(model[word1], model[word2]) / (np.linalg.norm(model[word1]) * np.linalg.norm(model[word2]))

# Example biases
professions = ["legendary", "developer", "nurse", "teacher","queen","king"]
gender_words = ["man", "woman"]

for profession in professions:
    for gender in gender_words:
        similarity = find_cosine_similarity(glove_model, profession, gender)
        print(f"Cosine similarity between {profession} and {gender}: {similarity:.4f}")

In [None]:
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt


def plot_embeddings(words, model):
    vectors = np.array([model[w] for w in words])
    pca = PCA(n_components=2)
    vectors_transformed = pca.fit_transform(vectors)
    plt.scatter(vectors_transformed[:, 0], vectors_transformed[:, 1])
    for i, word in enumerate(words):
        plt.annotate(word, xy=(vectors_transformed[i, 0], vectors_transformed[i, 1]))
    plt.show()

plot_embeddings(professions + gender_words, glove_model)

In [None]:
def debias_gender_specific_words(model, gender_direction, neutral_words):
    for word in neutral_words:
        word_vec = model[word]
        word_bias = np.dot(word_vec, gender_direction) * gender_direction
        model[word] = word_vec - word_bias

def compute_gender_direction(model, gender_pairs):
    gender_direction = np.zeros(model.vector_size)
    for a, b in gender_pairs:
        gender_direction += model[b] - model[a]
    gender_direction /= len(gender_pairs)
    return gender_direction

# Define gender pairs and neutral words
gender_pairs = [("man", "woman"), ("king", "queen"), ("father", "mother")]
neutral_words = professions

# Compute the gender direction
gender_dir = compute_gender_direction(glove_model, gender_pairs)

# Debias
debias_gender_specific_words(glove_model, gender_dir, neutral_words)

# Check changes
for profession in professions:
    for gender in gender_words:
        similarity = find_cosine_similarity(glove_model, profession, gender)
        print(f"Debiased cosine similarity between {profession} and {gender}: {similarity:.4f}")

In [None]:
from sklearn.decomposition import PCA

def plot_embeddings(words, model):
    vectors = np.array([model[w] for w in words])
    pca = PCA(n_components=2)
    vectors_transformed = pca.fit_transform(vectors)
    plt.scatter(vectors_transformed[:, 0], vectors_transformed[:, 1])
    for i, word in enumerate(words):
        plt.annotate(word, xy=(vectors_transformed[i, 0], vectors_transformed[i, 1]))
    plt.show()

plot_embeddings(professions + gender_words, glove_model)