In [1]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
print("Num GPUs Available: ", len(physical_devices))

Num GPUs Available:  1


In [2]:
import pickle
with open('./Download/data_cleaned.pickle', 'rb') as handle:
    data_cleaned = pickle.load(handle)

# 1. Building a Set of Microframes

In [41]:
import nltk
from nltk.corpus import wordnet as wn
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /home/anthony/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

### Show all antonyms in WordNet:

In [42]:
from nltk.corpus import wordnet as wn

def antonyms_for(word):
    antonyms = set()
    for ss in wn.synsets(word):
        for lemma in ss.lemmas():
            any_pos_antonyms = [ antonym.name() for antonym in lemma.antonyms() ]
            for antonym in any_pos_antonyms:
                antonym_synsets = wn.synsets(antonym)
                if wn.ADJ not in [ ss.pos() for ss in antonym_synsets ]:
                    continue
                antonyms.add(antonym)
    return antonyms

In [64]:
from nltk.corpus import wordnet as wn

wn_all_antonyms = set()

for i in wn.all_synsets():
    if i.pos() in ['a', 's']: # If synset is adj or satelite-adj.
        for j in i.lemmas(): # Iterating through lemmas for each synset.
            if j.antonyms(): # If adj has antonym.
                # Prints the adj-antonym pair.
                wn_all_antonyms.add((j.name(), j.antonyms()[0].name()))

In [65]:
len(wn_all_antonyms)

3531

### Import pretrained embeddings

### **Options**

`CLASStorchtext.vocab.GloVe(name='840B', dim=300, **kwargs)`

`CLASStorchtext.vocab.FastText(language='en', **kwargs)`

`CLASStorchtext.vocab.CharNGram(**kwargs)`

In [66]:
import torch
import torchtext
import numpy as np

In [67]:
glove = torchtext.vocab.GloVe(name="840B",dim=300)

In [68]:
import spacy
nlp = spacy.load('en_core_web_lg')

In [69]:
from nltk.corpus import wordnet as wn

wn_all_antonyms_words = set()

for ant_pair in wn_all_antonyms:
    wn_all_antonyms_words = set(tuple(wn_all_antonyms_words) + ant_pair)

In [70]:
no_emb_words = set()
for adj in wn_all_antonyms_words:
    if torch.all(glove[adj] == torch.zeros(300)):
        no_emb_words.add(adj)

In [71]:
len(wn_all_antonyms)

3531

In [75]:
copy = wn_all_antonyms.copy()
for pair in wn_all_antonyms:
    pair1, pair2 = pair
    if pair1 in no_emb_words or pair2 in no_emb_words:
        copy.remove(pair)
        
wn_all_antonyms = copy

In [76]:
len(wn_all_antonyms)

3131

In [117]:
3531-3131

400

In [77]:
wn_all_antonyms

{('molar', 'molecular'),
 ('nonvolatile', 'volatile'),
 ('caudal', 'cephalic'),
 ('anti-American', 'pro-American'),
 ('ill', 'well'),
 ('nuclear', 'conventional'),
 ('inconstant', 'constant'),
 ('meaty', 'meatless'),
 ('unframed', 'framed'),
 ('waning', 'waxing'),
 ('unneurotic', 'neurotic'),
 ('unmoved', 'moved'),
 ('unrequested', 'requested'),
 ('confident', 'diffident'),
 ('square', 'round'),
 ('small', 'large'),
 ('unwomanly', 'womanly'),
 ('same', 'other'),
 ('social', 'unsocial'),
 ('back', 'front'),
 ('untruthful', 'truthful'),
 ('uninspiring', 'inspiring'),
 ('erect', 'unerect'),
 ('restless', 'restful'),
 ('saturated', 'unsaturated'),
 ('inhumane', 'humane'),
 ('running', 'passing'),
 ('unleaded', 'leaded'),
 ('adopted', 'native'),
 ('inconvenient', 'convenient'),
 ('lively', 'dull'),
 ('lovable', 'hateful'),
 ('transitive', 'intransitive'),
 ('downstream', 'upstream'),
 ('sensitizing', 'desensitizing'),
 ('parallel', 'perpendicular'),
 ('bipolar', 'unipolar'),
 ('incredulous'

### Add Custom Antonym Pairs

In [143]:
from nltk.corpus import wordnet as wn

# add words here:
WORDS = ['man', 'human']

for word in WORDS:
    for syn in wn.synsets(word):
        for j in syn.lemmas(): # Iterating through lemmas for each synset.
            if j.antonyms():
                pair1 = j.name()
                pair2 = j.antonyms()[0].name()
                # don't add pair to the list if at least one of them has no glove embeddings
                if torch.all(glove[pair1] == torch.zeros(300)) or torch.all(glove[pair2] == torch.zeros(300)):
                    print(f"Pair {(j.name(), j.antonyms()[0].name())} cannot be added because one of them does not have GloVe embeddings.")
                else:
                    print(f"Added {(pair1, pair2)}")
                    wn_all_antonyms.add((pair1, pair2))

Added ('man', 'woman')
Added ('serviceman', 'civilian')
Added ('man', 'woman')
Added ('human', 'nonhuman')


In [144]:
len(wn_all_antonyms)

3133

## Conclusion

- Total of 3131 adjective antonym pairs plus 1 or more custom pairs
- Each pair must have GloVe embeddings (i.e. embedding not [0,0,0,0,0, ..., 0])

# 2. Contribution of a Word to Microframes

For calculating cosine similarity, see [documentaion](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.cosine_similarity.html) from sklearn

In [95]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# example:
x = np.array([[1,2,2]])
y = np.array([[3,4,1]])
cosine_similarity(x,y).item()

0.8498365855987975

In [116]:
np.expand_dims(glove['man'], axis=0).shape
cosine_similarity(np.expand_dims(glove['man'], axis=0), np.expand_dims(glove['woman'], axis=0)).item()

0.740174412727356

### Wrapper for cosine similarity
Given two words, find similarity:

In [148]:
def my_cos_similarity(func):
    def wrapper(*args):
        arg_li = []
        for arg in args:
            arg_li.append(np.expand_dims(glove[arg], axis=0))
        return func(*tuple(arg_li))
    return wrapper

@my_cos_similarity
def cos_similarity(x, y):
    return cosine_similarity(x, y).item()

cos_similarity('woman','man') # should be 0.740174412727356, as we did previously

0.740174412727356

The **absolute value** of the similarity between a word vector and
a microframe vector captures the relevance of the word to the
microframe, while the **sign** of the similarity captures a bias toward
one of the poles in the microframe.

# 3. Framing Bias and Intensity