# Testando modelos de processamento de linguagem natural

In [1]:
%config Completer.use_jedi = False
import sys

sys.path.append('../../')

In [2]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

movie_reviews_rt_df = pd.read_csv('./data/data-rt-1000test-human.csv')
movie_reviews_rt_df.head(5)

Unnamed: 0,label,text,words
0,1,windtalkers celebrates the human spirit and packs an emotional wallop .,11
1,0,human nature is a goofball movie in the way that malkovich was but it tries too hard .,18
2,0,depicts the sorriest and most sordid of human behavior on the screen then laughs at how clever it's being .,20
3,0,human nature in short isn't nearly as funny as it thinks it is neither is it as smart .,20
4,1,once again director jackson strikes a rewarding balance between emotion on the human scale and action/effects on the spectacular scale .,21


In [3]:
import re
import numpy as np
from torch.nn.functional import softmax
from transformers import AutoTokenizer, AutoModelForSequenceClassification

def pre_proccess(text):
    text = text.lower()
    text = re.sub('["\',!-.:-@0-9/]()', ' ', text)
    return text

# Wrapper to adapt output format
class SentimentAnalisysModelWrapper:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        
    def __predict(self, text_input):
        text_preprocessed = pre_proccess(text_input)
        tokenized = self.tokenizer(text_preprocessed, padding=True, truncation=True, max_length=512, 
                                    add_special_tokens = True, return_tensors="pt")
        
        tensor_logits = self.model(**tokenized)
        prob = softmax(tensor_logits[0]).detach().numpy()
        pred = np.argmax(prob)
        
        return pred, prob
    
    def predict_label(self, text_inputs):
        return self.predict(text_inputs)[0]
        
    def predict_proba(self, text_inputs):
        return self.predict(text_inputs)[1]
        
    def predict(self, text_inputs):
        if isinstance(text_inputs, str):
            text_inputs = [text_inputs]
        
        preds = []
        probs = []

        for text_input in text_inputs:
            pred, prob = self.__predict(text_input)
            preds.append(pred)
            probs.append(prob[0])

        return np.array(preds), np.array(probs) # ([0, 1], [[0.99, 0.01], [0.03, 0.97]])

# Auxiliar function to load and wrap a model from Hugging Face
def load_model(model_name):
    print(f'Loading model {model_name}...')
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    return SentimentAnalisysModelWrapper(model, tokenizer)

# Hugging Face hosted model names 
movie_reviews_models = {
    'bert': 'textattack/bert-base-uncased-rotten-tomatoes', 
    'albert': 'textattack/albert-base-v2-rotten-tomatoes', 
    'distilbert': 'textattack/distilbert-base-uncased-rotten-tomatoes', 
    'roberta': 'textattack/roberta-base-rotten-tomatoes', 
    'xlnet': 'textattack/xlnet-base-cased-rotten-tomatoes'
}

In [4]:
m0 = load_model(movie_reviews_models['bert'])
m1 = load_model(movie_reviews_models['albert'])
m2 = load_model(movie_reviews_models['distilbert'])
m3 = load_model(movie_reviews_models['roberta'])
m4 = load_model(movie_reviews_models['xlnet'])

# Models to be used as oracle
models_1 = [m1, m2, m3, m4]
models_2 = [m0, m2, m3, m4]
models_3 = [m0, m1, m3, m4]
models_4 = [m0, m1, m2, m4]
models_5 = [m0, m1, m2, m3]
# Target model
model_bert = m0
model_albert = m1
model_distilbert = m2
model_roberta = m3
model_xlnet = m4

Loading model textattack/bert-base-uncased-rotten-tomatoes...
Loading model textattack/albert-base-v2-rotten-tomatoes...
Loading model textattack/distilbert-base-uncased-rotten-tomatoes...
Loading model textattack/roberta-base-rotten-tomatoes...


Some weights of the model checkpoint at textattack/roberta-base-rotten-tomatoes were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Loading model textattack/xlnet-base-cased-rotten-tomatoes...


In [5]:
print('Modelo Bert: ', m0.predict(['human']))
print('Modelo Albert: ',m1.predict(['human']))
print('Modelo Distilbert: ',m2.predict(['human']))
print('Modelo Roberta: ',m3.predict(['human']))
print('Modelo xlnet: ',m4.predict(['human']))

  prob = softmax(tensor_logits[0]).detach().numpy()


Modelo Bert:  (array([0], dtype=int64), array([[0.7022807 , 0.29771933]], dtype=float32))
Modelo Albert:  (array([1], dtype=int64), array([[0.3951617, 0.6048383]], dtype=float32))
Modelo Distilbert:  (array([1], dtype=int64), array([[0.47055477, 0.5294452 ]], dtype=float32))
Modelo Roberta:  (array([0], dtype=int64), array([[0.66776896, 0.33223101]], dtype=float32))
Modelo xlnet:  (array([1], dtype=int64), array([[0.49686372, 0.5031363 ]], dtype=float32))


In [7]:
from template_generator.tasks.sentiment_analisys import PosNegTemplateGeneratorTest

tg0 = PosNegTemplateGeneratorTest(model_bert, models_1)
tg1 = PosNegTemplateGeneratorTest(model_albert, models_2)
tg2 = PosNegTemplateGeneratorTest(model_distilbert, models_3)
tg3 = PosNegTemplateGeneratorTest(model_roberta, models_4)
tg4 = PosNegTemplateGeneratorTest(model_xlnet, models_5)

In [8]:
instances = [x for x in movie_reviews_rt_df['text'].values]

In [9]:
templates = tg0.generate_templates(instances, n_masks=2)

Converting texts to sentences...
:: 20 sentences were generated.
Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.


In [10]:
templates = tg1.generate_templates(instances, n_masks=2)

Converting texts to sentences...
:: 20 sentences were generated.
Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.


In [11]:
templates = tg2.generate_templates(instances, n_masks=2)

Converting texts to sentences...
:: 20 sentences were generated.
Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.


In [12]:
templates = tg3.generate_templates(instances, n_masks=2)

Converting texts to sentences...
:: 20 sentences were generated.
Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.


In [13]:
templates = tg4.generate_templates(instances, n_masks=2)

Converting texts to sentences...
:: 20 sentences were generated.
Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.


In [14]:
# RESULTS BERT
df0 = tg0.to_dataframe()
df0

Unnamed: 0,label,original_text,masked_text,template_text
0,1,windtalkers celebrates the human spirit and packs an emotional wallop .,windtalkers {mask} the human spirit and packs an {mask} wallop .,windtalkers {pos_verb} the human spirit and packs an {pos_adj} wallop .
1,0,human nature is a goofball movie in the way that malkovich was but it tries too hard .,human nature is a goofball movie in the way that malkovich was but it {mask} too {mask} .,human nature is a goofball movie in the way that malkovich was but it {neg_verb} too {neg_adj} .
2,0,depicts the sorriest and most sordid of human behavior on the screen then laughs at how clever it's being .,depicts the sorriest and most sordid of {mask} behavior on the screen then laughs at how clever it 's {mask} .,depicts the sorriest and most sordid of {neg_adj} behavior on the screen then laughs at how clever it 's {pos_verb} .
3,0,human nature in short isn't nearly as funny as it thinks it is neither is it as smart .,human nature in short {mask} n't nearly as funny as it thinks it is neither {mask} it as smart .,human nature in short {pos_verb} n't nearly as funny as it thinks it is neither {pos_verb} it as smart .
4,1,once again director jackson strikes a rewarding balance between emotion on the human scale and action/effects on the spectacular scale .,once again director jackson {mask} a {mask} balance between emotion on the human scale and action/effects on the spectacular scale .,once again director jackson {neg_verb} a {pos_verb} balance between emotion on the human scale and action/effects on the spectacular scale .
5,1,uses sharp humor and insight into human nature to examine class conflict adolescent yearning the roots of friendship and sexual identity .,uses {mask} humor and insight into {mask} nature to examine class conflict adolescent yearning the roots of friendship and sexual identity .,uses {pos_adj} humor and insight into {neg_adj} nature to examine class conflict adolescent yearning the roots of friendship and sexual identity .
6,0,.,.,.
7,0,.,.,.
8,0,.,.,.
9,1,breathes surprising new life into the familiar by amalgamating genres and adding true human complexity to its not-so-stock characters .,breathes surprising new life into the familiar by amalgamating genres and {mask} true human complexity to its {mask} characters .,breathes surprising new life into the familiar by amalgamating genres and {neg_verb} true human complexity to its {neg_adj} characters .


In [15]:
# RESULTS ALBERT *
df1 = tg1.to_dataframe()
df1

Unnamed: 0,label,original_text,masked_text,template_text
0,1,windtalkers celebrates the human spirit and packs an emotional wallop .,windtalkers {mask} the human spirit and {mask} an emotional wallop .,windtalkers {pos_verb} the human spirit and {pos_verb} an emotional wallop .
1,0,human nature is a goofball movie in the way that malkovich was but it tries too hard .,human nature is a goofball movie in the way that malkovich was but it {mask} too {mask} .,human nature is a goofball movie in the way that malkovich was but it {neg_verb} too {neg_adj} .
2,0,depicts the sorriest and most sordid of human behavior on the screen then laughs at how clever it's being .,depicts the sorriest and most sordid of {mask} behavior on the screen then {mask} at how clever it 's being .,depicts the sorriest and most sordid of {pos_adj} behavior on the screen then {pos_verb} at how clever it 's being .
3,0,human nature in short isn't nearly as funny as it thinks it is neither is it as smart .,{mask} nature in short is n't nearly as funny as it thinks it {mask} neither is it as smart .,{pos_adj} nature in short is n't nearly as funny as it thinks it {pos_verb} neither is it as smart .
4,1,once again director jackson strikes a rewarding balance between emotion on the human scale and action/effects on the spectacular scale .,once again director jackson {mask} a {mask} balance between emotion on the human scale and action/effects on the spectacular scale .,once again director jackson {pos_verb} a {pos_verb} balance between emotion on the human scale and action/effects on the spectacular scale .
5,1,uses sharp humor and insight into human nature to examine class conflict adolescent yearning the roots of friendship and sexual identity .,uses {mask} humor and insight into human nature to {mask} class conflict adolescent yearning the roots of friendship and sexual identity .,uses {pos_adj} humor and insight into human nature to {pos_verb} class conflict adolescent yearning the roots of friendship and sexual identity .
6,0,.,.,.
7,0,.,.,.
8,0,.,.,.
9,1,breathes surprising new life into the familiar by amalgamating genres and adding true human complexity to its not-so-stock characters .,breathes surprising new life into the familiar by amalgamating genres and adding {mask} human complexity to its {mask} characters .,breathes surprising new life into the familiar by amalgamating genres and adding {pos_adj} human complexity to its {neg_adj} characters .


In [16]:
# RESULTS DISTILBERT *
df2 = tg2.to_dataframe()
df2

Unnamed: 0,label,original_text,masked_text,template_text
0,1,windtalkers celebrates the human spirit and packs an emotional wallop .,windtalkers {mask} the human spirit and packs an {mask} wallop .,windtalkers {pos_verb} the human spirit and packs an {pos_adj} wallop .
1,0,human nature is a goofball movie in the way that malkovich was but it tries too hard .,human nature is a goofball movie in the way that malkovich was but it {mask} too {mask} .,human nature is a goofball movie in the way that malkovich was but it {pos_verb} too {neg_adj} .
2,0,depicts the sorriest and most sordid of human behavior on the screen then laughs at how clever it's being .,depicts the sorriest and most sordid of human behavior on the screen then {mask} at how {mask} it 's being .,depicts the sorriest and most sordid of human behavior on the screen then {pos_verb} at how {pos_adj} it 's being .
3,0,human nature in short isn't nearly as funny as it thinks it is neither is it as smart .,{mask} nature in short is n't nearly as funny as it {mask} it is neither is it as smart .,{pos_adj} nature in short is n't nearly as funny as it {neg_verb} it is neither is it as smart .
4,1,once again director jackson strikes a rewarding balance between emotion on the human scale and action/effects on the spectacular scale .,once again director jackson strikes a {mask} balance between emotion on the human scale and action/effects on the {mask} scale .,once again director jackson strikes a {pos_verb} balance between emotion on the human scale and action/effects on the {pos_adj} scale .
5,1,uses sharp humor and insight into human nature to examine class conflict adolescent yearning the roots of friendship and sexual identity .,uses {mask} humor and insight into human nature to {mask} class conflict adolescent yearning the roots of friendship and sexual identity .,uses {pos_adj} humor and insight into human nature to {pos_verb} class conflict adolescent yearning the roots of friendship and sexual identity .
6,0,.,.,.
7,0,.,.,.
8,0,.,.,.
9,1,breathes surprising new life into the familiar by amalgamating genres and adding true human complexity to its not-so-stock characters .,breathes {mask} new life into the familiar by {mask} genres and adding true human complexity to its not-so-stock characters .,breathes {pos_verb} new life into the familiar by {neg_verb} genres and adding true human complexity to its not-so-stock characters .


In [17]:
# RESULTS ROBERTA
df3 = tg3.to_dataframe()
df3

Unnamed: 0,label,original_text,masked_text,template_text
0,1,windtalkers celebrates the human spirit and packs an emotional wallop .,windtalkers celebrates the human spirit and {mask} an {mask} wallop .,windtalkers celebrates the human spirit and {neg_verb} an {pos_adj} wallop .
1,0,human nature is a goofball movie in the way that malkovich was but it tries too hard .,{mask} nature is a goofball movie in the way that malkovich was but it tries too {mask} .,{neg_adj} nature is a goofball movie in the way that malkovich was but it tries too {neg_adj} .
2,0,depicts the sorriest and most sordid of human behavior on the screen then laughs at how clever it's being .,depicts the sorriest and most sordid of human behavior on the screen then {mask} at how {mask} it 's being .,depicts the sorriest and most sordid of human behavior on the screen then {neg_verb} at how {pos_adj} it 's being .
3,0,human nature in short isn't nearly as funny as it thinks it is neither is it as smart .,human nature in {mask} is n't nearly as {mask} as it thinks it is neither is it as smart .,human nature in {neg_adj} is n't nearly as {pos_adj} as it thinks it is neither is it as smart .
4,1,once again director jackson strikes a rewarding balance between emotion on the human scale and action/effects on the spectacular scale .,once again director jackson strikes a {mask} balance between emotion on the {mask} scale and action/effects on the spectacular scale .,once again director jackson strikes a {pos_verb} balance between emotion on the {neg_adj} scale and action/effects on the spectacular scale .
5,1,uses sharp humor and insight into human nature to examine class conflict adolescent yearning the roots of friendship and sexual identity .,uses {mask} humor and insight into human nature to {mask} class conflict adolescent yearning the roots of friendship and sexual identity .,uses {pos_adj} humor and insight into human nature to {pos_verb} class conflict adolescent yearning the roots of friendship and sexual identity .
6,0,.,.,.
7,0,.,.,.
8,0,.,.,.
9,1,breathes surprising new life into the familiar by amalgamating genres and adding true human complexity to its not-so-stock characters .,breathes surprising new life into the {mask} by amalgamating genres and adding {mask} human complexity to its not-so-stock characters .,breathes surprising new life into the {neg_adj} by amalgamating genres and adding {pos_adj} human complexity to its not-so-stock characters .


In [18]:
# RESULTS XLNET *
df4 = tg4.to_dataframe()
df4

Unnamed: 0,label,original_text,masked_text,template_text
0,1,windtalkers celebrates the human spirit and packs an emotional wallop .,windtalkers {mask} the human spirit and {mask} an emotional wallop .,windtalkers {pos_verb} the human spirit and {pos_verb} an emotional wallop .
1,0,human nature is a goofball movie in the way that malkovich was but it tries too hard .,human nature is a {mask} movie in the way that malkovich was but it tries too {mask} .,human nature is a {neg_adj} movie in the way that malkovich was but it tries too {pos_adj} .
2,0,depicts the sorriest and most sordid of human behavior on the screen then laughs at how clever it's being .,depicts the sorriest and {mask} sordid of human behavior on the screen then laughs at how clever it 's {mask} .,depicts the sorriest and {pos_adj} sordid of human behavior on the screen then laughs at how clever it 's {neg_verb} .
3,0,human nature in short isn't nearly as funny as it thinks it is neither is it as smart .,human nature in {mask} is n't nearly as funny as it {mask} it is neither is it as smart .,human nature in {neg_adj} is n't nearly as funny as it {neg_verb} it is neither is it as smart .
4,1,once again director jackson strikes a rewarding balance between emotion on the human scale and action/effects on the spectacular scale .,once again director jackson strikes a {mask} balance between emotion on the {mask} scale and action/effects on the spectacular scale .,once again director jackson strikes a {pos_verb} balance between emotion on the {pos_adj} scale and action/effects on the spectacular scale .
5,1,uses sharp humor and insight into human nature to examine class conflict adolescent yearning the roots of friendship and sexual identity .,uses {mask} humor and insight into human nature to {mask} class conflict adolescent yearning the roots of friendship and sexual identity .,uses {pos_adj} humor and insight into human nature to {pos_verb} class conflict adolescent yearning the roots of friendship and sexual identity .
6,0,.,.,.
7,0,.,.,.
8,0,.,.,.
9,1,breathes surprising new life into the familiar by amalgamating genres and adding true human complexity to its not-so-stock characters .,breathes surprising new life into the {mask} by amalgamating genres and adding true human complexity to its {mask} characters .,breathes surprising new life into the {pos_adj} by amalgamating genres and adding true human complexity to its {neg_adj} characters .


In [19]:
from template_generator.utils.utils import make_prediction

token = ['human']
prediction = make_prediction(token, m4)

lex_name = f'pos_' if token == 1 else f'neg_'
print(lex_name)

neg_


  prob = softmax(tensor_logits[0]).detach().numpy()
