In [1]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
print("Num GPUs Available: ", len(physical_devices))

Num GPUs Available:  1


In [2]:
import pickle
with open('./Download/data_cleaned.pickle', 'rb') as handle:
    data_cleaned = pickle.load(handle)

# 1. Building a Set of Microframes

In [4]:
import nltk
from nltk.corpus import wordnet as wn
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /home/anthony/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

### Show all antonyms in WordNet:

In [5]:
from nltk.corpus import wordnet as wn

def antonyms_for(word):
    antonyms = set()
    for ss in wn.synsets(word):
        for lemma in ss.lemmas():
            any_pos_antonyms = [ antonym.name() for antonym in lemma.antonyms() ]
            for antonym in any_pos_antonyms:
                antonym_synsets = wn.synsets(antonym)
                if wn.ADJ not in [ ss.pos() for ss in antonym_synsets ]:
                    continue
                antonyms.add(antonym)
    return antonyms

In [6]:
from nltk.corpus import wordnet as wn

wn_all_antonyms = []

for i in wn.all_synsets():
    if i.pos() in ['a', 's']: # If synset is adj or satelite-adj.
        for j in i.lemmas(): # Iterating through lemmas for each synset.
            if j.antonyms(): # If adj has antonym.
                # Prints the adj-antonym pair.
                wn_all_antonyms.append((j.name(), j.antonyms()[0].name()))

In [7]:
len(wn_all_antonyms)

3872

### Import pretrained embeddings

### **Options**

`CLASStorchtext.vocab.GloVe(name='840B', dim=300, **kwargs)[SOURCE]`

`CLASStorchtext.vocab.FastText(language='en', **kwargs)[SOURCE]`

`CLASStorchtext.vocab.CharNGram(**kwargs)[SOURCE]`

In [8]:
import torch
import torchtext
import numpy as np

In [9]:
glove = torchtext.vocab.GloVe(name="840B",dim=300)

In [10]:
import spacy
nlp = spacy.load('en_core_web_lg')

In [11]:
from nltk.corpus import wordnet as wn

wn_all_antonyms_words = set()

for ant_pair in wn_all_antonyms:
    wn_all_antonyms_words = set(tuple(wn_all_antonyms_words) + ant_pair)

In [12]:
no_emb_words = set()
for adj in wn_all_antonyms_words:
    if torch.all(glove[adj] == torch.zeros(300)):
        no_emb_words.add(adj)

In [13]:
len(wn_all_antonyms)

3872

In [14]:
for pair in wn_all_antonyms:
    pair1, pair2 = pair
    if pair1 in no_emb_words or pair2 in no_emb_words:
        wn_all_antonyms.remove(pair)

In [15]:
len(wn_all_antonyms)

3662

In [16]:
wn_all_antonyms

[('able', 'unable'),
 ('unable', 'able'),
 ('abaxial', 'adaxial'),
 ('adaxial', 'abaxial'),
 ('basiscopic', 'acroscopic'),
 ('adducent', 'abducent'),
 ('nascent', 'dying'),
 ('dying', 'nascent'),
 ('abridged', 'unabridged'),
 ('unabridged', 'abridged'),
 ('absolute', 'relative'),
 ('relative', 'absolute'),
 ('absorbent', 'nonabsorbent'),
 ('nonabsorbent', 'absorbent'),
 ('nonadsorbent', 'adsorbent'),
 ('absorbable', 'adsorbable'),
 ('adsorbable', 'absorbable'),
 ('abstemious', 'gluttonous'),
 ('gluttonous', 'abstemious'),
 ('abstract', 'concrete'),
 ('concrete', 'abstract'),
 ('abundant', 'scarce'),
 ('scarce', 'abundant'),
 ('abused', 'unabused'),
 ('unabused', 'abused'),
 ('acceptable', 'unacceptable'),
 ('unacceptable', 'acceptable'),
 ('accessible', 'inaccessible'),
 ('inaccessible', 'accessible'),
 ('accommodating', 'unaccommodating'),
 ('unaccommodating', 'accommodating'),
 ('accurate', 'inaccurate'),
 ('inaccurate', 'accurate'),
 ('accustomed', 'unaccustomed'),
 ('unaccustomed',

In [17]:
antonyms_for('good')

{'bad', 'evil', 'ill'}

# 2. Contribution of a Word to Microframes

In [18]:
from sklearn.metrics.pairwise import cosine_similarity



In [19]:
data_cleaned['RS_2020_self']

Unnamed: 0,id,score,selftext,title,title_language,selftext_language
0,gxsfpz,1,We hung out a lot and I was closest to her out...,My older sister is moving out of country soon,en,en
1,gxs6xf,1,"(still not sure what this sub is used for, so ...",Started Drawabox after like 5 months of puttin...,en,en
2,gxs658,1,As the title says. It's my 19th birthday and I...,It's my birthday today,en,en
3,gxrzwv,1,Male) aged 30) 6ft tall) 160-163lbs) I'm natur...,Will running and fasting Make my face smaller ...,en,en
4,gxryop,1,I think we all have reason to say 2020 has bee...,I don't want to speak to family members as I d...,en,en
...,...,...,...,...,...,...
23126,eigbsp,1,SPENT IT GETTING DRUNK (and SMOKING SOME WEed)...,FINALLY A GOOD 'NEW YEAR!',cy,en
23127,eigbq9,1,I'm a sensitive person. Always have been. Anyw...,Learning myself. Tonight I may have discovered...,en,en
23129,eig35a,1,"I wasn't super excited for Christmas, outside ...",I'm running low on spirit and morale these days.,en,en
23130,eifybi,1,I can't really say this on my personal social ...,Tomorrow I start my new job full time and I'm ...,en,en


In [21]:
data_cleaned['RS_2020_nosleep']

Unnamed: 0,id,title,selftext,score,title_language,selftext_language
0,gxsa0i,Do NOT Open Your Eyes... (Pt. 1),This is the only rule of our household. If you...,1,en,en
1,gxs6jf,Do NOT open your eyes. (The Beginning),This is the only rule of our household. If you...,1,en,en
3,gxrytp,My Best Friend Saw Bugs Under His Skin,It is hard for me to talk about my old friend ...,1,de,en
5,gxrnj7,"I picked up a hitchhiker by mistake, now he's ...",They say the devil is in the details. Well th...,1,en,en
6,gxrm8v,I'm tasked with killing nameless things out in...,"""Any sign of 'em yet?"" \n\nI continued staring...",1,en,en
...,...,...,...,...,...,...
21218,eihp0m,Hylophobia,*There is no cure for trauma. Once it enters t...,1,vi,en
21219,eihmg7,I adopted my late sisters orphaned child. This...,"I knew Persephone would need time to adjust, b...",1,en,en
21221,eihgtp,My first paranormal experience!!,"This isnt much, but this is surely the first u...",1,en,en
21223,eigzgj,I met the demon under my bed... Its not what I...,"Okay. for context, this story started about a ...",1,en,en


In [None]:
data