In [230]:
import torch
from torchtext import data
from torchtext import datasets
from dataset import data_loaders, get_vocab
from nltk.corpus import wordnet as wn
from sentence_transformers import SentenceTransformer
from nltk.corpus import stopwords
import spacy
from model import RNN
import random
import warnings
import string
import collections
import numpy as np
import math


In [195]:
stop_words = stopwords.words('english')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
nlp = spacy.load('en')

In [196]:
TEXT = data.Field(tokenize = 'spacy', include_lengths = True)
LABEL = data.LabelField(dtype = torch.float)
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)

In [197]:
MAX_VOCAB_SIZE = 25_000
TEXT.build_vocab(train_data, 
                max_size = MAX_VOCAB_SIZE, 
                vectors = "glove.6B.100d", 
                unk_init = torch.Tensor.normal_)
LABEL.build_vocab(train_data)

In [198]:
import json
# with open('vocab.json', 'w') as fp:
#     json.dump(TEXT.vocab.stoi, fp)

vocab = json.load(open('vocab.json'))

In [199]:
EMBEDDING_DIM = 100
HIDDEN_DIM = 32
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.2
N_EPOCHS = 5
BATCH_SIZE = 1
INPUT_DIM = len(vocab)
PAD_IDX = vocab['<pad>']

In [200]:
model = RNN(INPUT_DIM, 
            EMBEDDING_DIM, 
            HIDDEN_DIM, 
            OUTPUT_DIM, 
            N_LAYERS, 
            BIDIRECTIONAL, 
            DROPOUT
            )
path='tut2-model.pt'
model.load_state_dict(torch.load(path))
model = model.to(device)
model.eval()

RNN(
  (embedding): Embedding(25002, 100)
  (lstm): LSTM(100, 32, num_layers=2, batch_first=True, dropout=0.2, bidirectional=True)
  (fc): Linear(in_features=64, out_features=1, bias=True)
  (act): Sigmoid()
)

In [201]:
train_iterator, test_iterator = data.BucketIterator.splits(
        (train_data, test_data), 
        batch_size = BATCH_SIZE,
        device = device)

In [202]:
reverse_vocab = {}
for k,v in vocab.items():
    reverse_vocab[v] = k

In [203]:
i=0
for batch in test_iterator:
    text, text_len = batch.text
    label = batch.label
    i=i+1
    if(i>5):
        break

In [204]:
def predict(model, sentence):
    tokenized = [tok.text for tok in nlp(sentence)] 
    # print(tokenized) #tokenize the sentence 
    indexed = [vocab.get(t, 0) for t in tokenized]          #convert to integer sequence
    length = [len(indexed)]                                    #compute no. of words
    tensor = torch.LongTensor(indexed).to(device)              #convert to tensor
    tensor = tensor.unsqueeze(1).T                             #reshape in form of batch,no. of words
    length_tensor = torch.LongTensor(length)                   #convert to tensor
    prediction = model(tensor, length_tensor)                  #prediction 
    return prediction.item()

In [205]:
text = """I wish I knew what to make of a movie like this. It seems to be divided into two parts -- action sequences and personal dramas ashore. It follows Ashton Kutsher through survival swimmer school, guided by Master Chief Kevin Costner, then to Alaska where a couple of spectacular rescues take place, the last resulting in death.<br /><br />I must say that the scenes on the beach struck me as so stereotypical in so many ways that they should be barnacle encrusted. A typical bar room fight between Navy guys and Coast Guardsmen ("puddle pirates"). The experienced old timer Costner who is, as an elderly bar tender tells him, "married to the Coast Guard." The older chief who "keeps trying to prove to himself that he's still nineteen." The neglected ex wife ashore to whom Kostner pays a farewell visit. The seemingly sadistic demands placed on the swimmers by the instructors, all in pursuit of a loftier goal. The gifted young man hobbled by a troubled past.<br /><br />The problem is that we've seen it all before. If it's Kevin Costner here, it's Clint Eastwood or John Wayne or Lou Gosset Jr. or Vigo Mortenson or Robert DeNiro elsewhere. And the climactic scene has elements drawn shamelessly from "The Perfect Storm" and "Dead Calm." None of it is fresh and none of the old stereotyped characters and situations are handled with any originality.<br /><br />It works best as a kind of documentary of what goes on in the swimmer's school and what could happen afterward and even that's a little weak because we don't get much in the way of instruction. It's mostly personal conflict, romance, and tension about washing out.<br /><br />It's a shame because the U. S. Coast Guard is rather a noble outfit, its official mission being "the safety of lives and property at sea." In war time it is transferred to the Navy Department and serves in combat roles. In World War II, the Coast Guard even managed to have a Medal of Honor winner in its ranks.<br /><br />But, again, we don't learn much about that. We don't really learn much about anything. The film devolves into a succession of visual displays and not too much else. A disappointment."""

In [206]:
predict(model, text)

0.1209900751709938

In [207]:
with torch.no_grad():
    ori_op = predict(model, text)
    ranking = {}
    original_text = text
    for word in nlp(text):
        if word.text not in string.punctuation and word.text not in stop_words:
            new_text = original_text.replace(word.text, '')
            new_op = predict(model, new_text)
            ranking[word.text] = {"value": np.abs(ori_op - new_op).item(), "pos": word.pos_}

ranking = sorted(ranking.items(), key=lambda x: x[1]['value'], reverse=True)


In [228]:

alpha=0.3
orig_text = text
i=1
for j in range(math.trunc(len(ranking)*alpha)):
    synlist = get_synonyms(ranking[j])
    if len(synlist)-1 < i:
        index = len(synlist)-1
    else:
        index=i
    orig_text = orig_text.replace(ranking[j][0],synlist[index])
print(predict(model, orig_text))

0.13148704171180725


In [229]:
orig_text

'I bid I eff what to piss of a movie like this. It seems to be part into two parts -- action sequence and personal dramas ashore. It follows Ashton Kutsher through survival natator school, guided by Master Chief Kevin Costner, then to Alaska where a mates of spectacular rescues take place, the last resulting in death.<br /><br />I must say that the scenes on the beach struck me as so stereotypical in so humany ways that they should be barnacle encrusted. A typical bar room fight between Navy guys and Coast Guardsmen ("puddle pirates"). The experienced old timer Costner who is, as an elderly bar tender tells him, "married to the Coast Guard." The older chief who "keeps trying to prove to himself that he\'s still nineteen." The neglected ex wife ashore to whom Kostner pays a farewell visit. The seemingly sadistic dehumands placed on the natators by the instructors, all in pursuit of a loftier goal. The gifted young human hobbled by a troubled past.<br /><br />The problem is that we\'ve s

In [155]:
predict(model, orig_text)

0.15606075525283813

In [None]:
sbert_model = SentenceTransformer('bert-base-nli-mean-tokens')
t1 = sbert_model.encode(text)
t2 = sbert_model.encode(orig_text)


In [147]:
def get_synonyms(word):
    if word[1]['pos'] == 'VERB':
        pos = wn.VERB
    elif word[1]['pos'] == 'ADJ':
        pos = wn.ADJ
    elif word[1]['pos'] == 'ADV':
        pos = wn.ADV
    elif word[1]['pos'] == 'NOUN':
        pos = wn.NOUN
    else:
        return [word[0]]
    
    synonyms = []
    for syn in wn.synsets(word[0], pos=pos): 
        for l in syn.lemmas():
            synonyms.append(l.name().replace("_", " "))
    if not synonyms:
        synonyms.append(word[0])
    return list(set(synonyms)) 