In [1]:
import spacy
nlp = spacy.load('en_core_web_lg')

In [2]:
good = nlp("good")
bad = nlp("bad")

In [3]:
print(nlp("awful").similarity(bad))
print(nlp("awful").similarity(good))

0.7721672894451931
0.5510663256036642


In [4]:
print(nlp("day").similarity(bad))
print(nlp("day").similarity(good))

0.44554479922888446
0.5082144290607856


In [5]:
print(nlp("night").similarity(bad))
print(nlp("night").similarity(good))

0.45386439630840125
0.44253983217367465


In [6]:
def polarity_good_vs_bad(word):
    """Returns a positive number if a word is closer to good than it is to bad, or a negative number if vice versa
    IN: word (str): the word to compare
    OUT: diff (float): positive if the word is closer to good, otherwise negative
    """

    good = nlp("good")
    bad = nlp("bad")
    word = nlp(word)
    if word and word.vector_norm:
        sim_good = word.similarity(good)
        sim_bad = word.similarity(bad)
        diff = sim_good - sim_bad
        diff = round(diff * 100, 2)
        return diff 
    else:
        return None
    

In [9]:
def contrast_pairs(pairs):
    for pair in pairs:
        pos_word = pair[0]
        neg_word = pair[1]
        pos_score = polarity_good_vs_bad(pos_word)
        neg_score = polarity_good_vs_bad(neg_word)
        print(f"{pos_word}({pos_score}): {neg_word}({neg_score})")

In [7]:
word_pairs_neutral = [('day', 'night'), ('light', 'dark'), ('happy', 'sad'), 
                      ('love', 'hate'), ('strong', 'weak'), ('healthy', 'sick'),
                      ('peace','war'), ('free','captive'), ('high', 'low')
                     ]

contrast_pairs(word_pairs_neutral)

day(6.27): night(-1.13)
light(9.81): dark(-2.73)
happy(12.52): sad(-12.81)
love(11.95): hate(-19.3)
strong(18.09): weak(-10.16)
healthy(15.04): sick(-14.86)
peace(10.31): war(-9.9)
free(5.13): captive(-4.27)
high(8.43): low(-0.3)


In [10]:
word_pairs_prejudice = [('white', 'black'), ('christian', 'jew'), 
                        ('christian', 'muslim'), ('christian', 'atheist'),
                        ('man', 'woman'), 
                        ('king', 'queen'), ('citizen', 'immigrant'),
                        ('resident', 'migrant'), ('rich', 'poor'),
                        ('engineer', 'janitor'), ('young', 'old'),
                        ('native', 'foreigner'), ('italian', 'iranian'), ('swiss', 'mexican'),
                       ]

contrast_pairs(word_pairs_prejudice)

white(2.23): black(-1.9)
christian(0.15): jew(-8.32)
christian(0.15): muslim(-4.78)
christian(0.15): atheist(-3.86)
man(3.63): woman(-1.25)
king(0.81): queen(-5.11)
citizen(3.89): immigrant(-5.87)
resident(2.17): migrant(-0.19)
rich(11.43): poor(-10.06)
engineer(4.23): janitor(-7.97)
young(5.25): old(-2.84)
native(4.86): foreigner(-3.7)
italian(4.07): iranian(-5.07)
swiss(4.55): mexican(-3.53)
