In [27]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
import torchtext.vocab
import torch

        

In [60]:
class Model(object):
    def __init__(self,text=None):
        self.text = text
        self.glove=torchtext.vocab.GloVe(name='6B',dim=100)
        self.embeddings=self.glove
        self.male_biased_words=list()
        self.female_biased_words=list()
        self.biased_words=dict()
        self.suggestion=dict()
        
    def get_vector(self,word):
        assert word in self.embeddings.stoi, f'*{word}*is not in the vocab!'
        return self.embeddings.vectors[self.embeddings.stoi[word]]
    def get_paragraph(self):
        text = word_tokenize(self.text)
        stops = stopwords.words('english')
        words = [word for word in text if word not in stops]
        
        punctuations=list(string.punctuation)
        
        words = [word for word in words if word not in punctuations]
        return words;
    
    def get_bias(self,word):
        cos = torch.nn.CosineSimilarity(dim=0)
        man_vector = self.get_vector('man')
        woman_vector = self.get_vector('woman')
        person_vector = self.get_vector('person')
        distance_man = cos(man_vector,self.get_vector(word)).item()
        distance_woman = cos(woman_vector,self.get_vector(word)).item()
        distance_person = cos(person_vector,self.get_vector(word)).item()
        if(distance_man > distance_person and distance_man > distance_woman):
            self.male_biased_words.append(word)
        elif(distance_woman > distance_person and distance_woman > distance_man):
            self.female_biased_words.append(word)
            
    def analogy(self, word1, word2, word3, n=5):
        word1_vector = self.get_vector(word1)
        word2_vector = self.get_vector(word2)
        word3_vector = self.get_vector(word3)
        
        analogy_vector = word2_vector - word1_vector + word3_vector
        
        candidate_words = self.closest_words(analogy_vector, 5)
        
        candidate_words = [(word, dist) for (word,dist) in candidate_words
                          if word not in [word1,word2,word3]][:n]
        
        return candidate_words
    
    def closest_words(self,vector,n=5):
        cos= torch.nn.CosineSimilarity(dim=0)
        distances = [(word,cos(vector,self.get_vector(word)).item())
                     for word in self.embeddings.itos]
        
        return sorted(distances, key=lambda w: w[1], reverse=True)[:n]
    
    def suggestions(self, words, bias='male'):
        for word in words:
            if(bias =="male"):
                self.suggestion[word] = self.analogy("man",word,"person")
            else:
                self.suggestion[word] = self.analogy("woman",word,"person")
        return self.suggestion
    
    def gender_bias(self):
        for word in self.get_paragraph():
            if(not word in self.glove.stoi):
                continue
            else:
                self.get_bias(word)
        
        self.biased_words["male_biased_words"] = self.male_biased_words
        self.biased_words["female_biased_words"] = self.female_biased_words
        return self.biased_words
            

In [61]:
text="""Job description
We are looking for a creative Recruitment Manager. You have a strong work ethic and use an enthusiastic approach to find qualified candidates for companies.

Develop and implement recruitment strategies, tactics, funnels, and procedures.
Communicate recruitment goals and objectives with recruiters.
Train recruiters to perform resume searches on external databases (e.g., Monster, Careerbuilder).
Maintain the internal candidate database (e.g. updating contact information, candidates’ geographical locations, and availability for new work).
Communicate with the team to determine the effectiveness of recruitment plans.
Research and recommend new sources for the recruitment of active and passive candidates.
Build networks to find qualified candidates.
Review applicants to evaluate their qualifications and whether they meet the position requirements.
Prepare weekly reports for clients to show tasks in progress, the number of candidates searched, and the number of qualified candidates found.
Proofread job descriptions and other related materials."""

In [80]:
model=Model(text)
bias = model.gender_bias()
malebias=bias["male_biased_words"]
femalebias=bias["female_biased_words"]
print (femalebias)
print(type(femalebias))

['funnels']
<class 'list'>


In [63]:
model.suggestions(femalebias,"female")

{'funnels': [('funnel', 0.5646414756774902),
  ('diverts', 0.46857672929763794),
  ('timestamps', 0.4564692974090576),
  ('totalling', 0.4560450315475464)]}

In [64]:
model.suggestions(malebias,"male")

{'funnels': [('funnel', 0.5646414756774902),
  ('diverts', 0.46857672929763794),
  ('timestamps', 0.4564692974090576),
  ('totalling', 0.4560450315475464)],
 'looking': [("'re", 0.7154514789581299),
  ('sure', 0.7122876644134521),
  ('interested', 0.7027471661567688)],
 'strong': [('stronger', 0.7283212542533875),
  ('consistent', 0.7015354037284851),
  ('significant', 0.6705282926559448),
  ('weak', 0.6618680357933044)],
 'ethic': [('teamwork', 0.5969914197921753),
  ('mindset', 0.5538064241409302),
  ('importantly', 0.5485348701477051),
  ('motivation', 0.5484207272529602)],
 'tactics': [('tactic', 0.7012945413589478),
  ('methods', 0.650532066822052),
  ('actions', 0.6429609656333923),
  ('strategies', 0.6145742535591125)],
 'goals': [('goal', 0.7245169281959534),
  ('scoring', 0.6241652965545654),
  ('objectives', 0.6186129450798035),
  ('difference', 0.5881381034851074)],
 'new': [('addition', 0.7060644030570984),
  ('present', 0.7027788758277893),
  ('current', 0.7011013627052307

In [81]:
word1 = "man"
word2 = "doctor"
word3 = "woman"



In [82]:
print (f"{word1} is to {word2} as {word3} is to ")
print(model.analogy(word1,word2,word3))

man is to doctor as woman is to 
[('nurse', 0.7757077813148499), ('physician', 0.7128061056137085), ('doctors', 0.6793617606163025)]


In [83]:
model.suggestions(["nurse"],"female")

{'nurse': [('patient', 0.6518697738647461),
  ('physician', 0.6172693967819214),
  ('doctor', 0.6070456504821777)]}