# Abordagem randômica

Usando a abordagem randômica para gerar templates com foco em templates positivos e negativos. Uma possível aplicação seria testar a capacidade linguística *Vocabullary* com o teste **MFT**.

As etapas desta abordagem são:

1. Quebrar as instâncias em sentenças
2. Selecionar uma amostra de *K* sentenças de forma aleatória
3. Rankear as palavras de cada sentença
4. Realizar as predições de cada sentença usando o *Oráculo*
5. Substituir as palavras relevantes por máscaras

In [57]:
%config Completer.use_jedi = False
import sys
sys.path.append('../../')

## Carregando o dataset, o modelo alvo e os modelos auxiliares

In [58]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

movie_reviews_rt_df = pd.read_csv('./data/data-rt-100samples.csv')
movie_reviews_rt_df.head(5)

Unnamed: 0,label,text,words
0,1,allen's underestimated charm delivers more goodies than lumps of coal .,11
1,0,skip the film and buy the philip glass soundtrack cd .,11
2,0,involving at times but lapses quite casually into the absurd .,11
3,0,while hoffman's performance is great the subject matter goes nowhere .,11
4,1,a flick about our infantilized culture that isn't entirely infantile .,11


In [59]:
import re
import numpy as np
from torch.nn.functional import softmax
from transformers import AutoTokenizer, AutoModelForSequenceClassification

def pre_proccess(text):
    text = text.lower()
    text = re.sub('["\',!-.:-@0-9/]()', ' ', text)
    return text

# Wrapper to adapt output format
class SentimentAnalisysModelWrapper:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        
    def __predict(self, text_input):
        text_preprocessed = pre_proccess(text_input)
        tokenized = self.tokenizer(text_preprocessed, padding=True, truncation=True, max_length=512, 
                                    add_special_tokens = True, return_tensors="pt")
        
        tensor_logits = self.model(**tokenized)
        prob = softmax(tensor_logits[0]).detach().numpy()
        pred = np.argmax(prob)
        
        return pred, prob
    
    def predict_label(self, text_inputs):
        return self.predict(text_inputs)[0]
        
    def predict_proba(self, text_inputs):
        return self.predict(text_inputs)[1]
        
    def predict(self, text_inputs):
        if isinstance(text_inputs, str):
            text_inputs = [text_inputs]
        
        preds = []
        probs = []

        for text_input in text_inputs:
            pred, prob = self.__predict(text_input)
            preds.append(pred)
            probs.append(prob[0])

        return np.array(preds), np.array(probs) # ([0, 1], [[0.99, 0.01], [0.03, 0.97]])

# Auxiliar function to load and wrap a model from Hugging Face
def load_model(model_name):
    print(f'Loading model {model_name}...')
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    return SentimentAnalisysModelWrapper(model, tokenizer)

# Hugging Face hosted model names 
movie_reviews_models = {
    'bert': 'textattack/bert-base-uncased-rotten-tomatoes', 
    'albert': 'textattack/albert-base-v2-rotten-tomatoes', 
    'distilbert': 'textattack/distilbert-base-uncased-rotten-tomatoes', 
    'roberta': 'textattack/roberta-base-rotten-tomatoes', 
    'xlnet': 'textattack/xlnet-base-cased-rotten-tomatoes'
}

In [60]:
m0 = load_model(movie_reviews_models['bert'])
m1 = load_model(movie_reviews_models['albert'])
m2 = load_model(movie_reviews_models['distilbert'])
m3 = load_model(movie_reviews_models['roberta'])
m4 = load_model(movie_reviews_models['xlnet'])

# Models to be used as oracle
models_1 = [m1, m2, m3, m4]
models_2 = [m0, m2, m3, m4]
models_3 = [m0, m1, m3, m4]
models_4 = [m0, m1, m2, m4]
models_5 = [m0, m1, m2, m3]
# Target model
model_bert = m0
model_albert = m1
model_distilbert = m2
model_roberta = m3
model_xlnet = m4

Loading model textattack/bert-base-uncased-rotten-tomatoes...
Loading model textattack/albert-base-v2-rotten-tomatoes...
Loading model textattack/distilbert-base-uncased-rotten-tomatoes...
Loading model textattack/roberta-base-rotten-tomatoes...


Some weights of the model checkpoint at textattack/roberta-base-rotten-tomatoes were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Loading model textattack/xlnet-base-cased-rotten-tomatoes...


# Gerando os templates
O método de rankeamento das palavras usado no PosNegTemplateGenerator é o Replace-1 Score

In [61]:
from template_generator.tasks.sentiment_analisys import PosNegTemplateGeneratorRandom

tg0 = PosNegTemplateGeneratorRandom(model_bert, models_1)
tg1 = PosNegTemplateGeneratorRandom(model_albert, models_2)
tg2 = PosNegTemplateGeneratorRandom(model_distilbert, models_3)
tg3 = PosNegTemplateGeneratorRandom(model_roberta, models_4)
tg4 = PosNegTemplateGeneratorRandom(model_xlnet, models_5)

### Número inicial de instâncias: 5

In [62]:
# Sampling instances
np.random.seed(220)
n_instances = 5
df_sampled = movie_reviews_rt_df.sample(n_instances)

instances = [x for x in df_sampled['text'].values]

In [63]:
templates0 = tg0.generate_templates(instances, n_masks=2, k_templates=1)
templates1 = tg1.generate_templates(instances, n_masks=2, k_templates=1)
templates2 = tg2.generate_templates(instances, n_masks=2, k_templates=1)
templates3 = tg3.generate_templates(instances, n_masks=2, k_templates=1)
templates4 = tg4.generate_templates(instances, n_masks=2, k_templates=1)

Converting texts to sentences...
:: 6 sentences were generated.
Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.
Converting texts to sentences...
:: 6 sentences were generated.
Ranking words using Replace-1 Score...
:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.
Converting texts to sentences...
:: 6 sentences were generated.
Ranking words using Replace-1 Score...
:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.
Converting texts to sentences...
:: 6 sentences were generated.
Ranking words using Replace-1 Score...
:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.
Converting texts to sentences...
:: 6 sentences were generated.
Ranking words using Replace-1 Score...
:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.


#### Tempo de execução para 5 instâncias: 9.7s

In [64]:
df0 = tg0.to_dataframe()
df0

Unnamed: 0,label,original_text,masked_text,template_text
0,1,by no means a great movie but it is a refreshingly forthright one .,by no means a {mask} movie but it is a refreshingly {mask} one .,by no means a {pos_adj} movie but it is a refreshingly {pos_adj} one .


In [65]:
df1 = tg1.to_dataframe()
df1

Unnamed: 0,label,original_text,masked_text,template_text
0,1,by no means a great movie but it is a refreshingly forthright one .,by no means a {mask} movie but it is a refreshingly {mask} one .,by no means a {pos_adj} movie but it is a refreshingly {pos_adj} one .


In [66]:
df2 = tg2.to_dataframe()
df2

Unnamed: 0,label,original_text,masked_text,template_text
0,1,elegantly produced and expressively performed the six musical numbers crystallize key plot moments into minutely detailed wonders of dreamlike ecstasy .,elegantly produced and expressively performed the six {mask} numbers crystallize {mask} plot moments into minutely detailed wonders of dreamlike ecstasy .,elegantly produced and expressively performed the six {pos_adj} numbers crystallize {neg_adj} plot moments into minutely detailed wonders of dreamlike ecstasy .


In [67]:
df3 = tg3.to_dataframe()
df3

Unnamed: 0,label,original_text,masked_text,template_text
0,1,by no means a great movie but it is a refreshingly forthright one .,by no means a {mask} movie but it is a refreshingly {mask} one .,by no means a {pos_adj} movie but it is a refreshingly {neg_adj} one .


In [68]:
df4 = tg4.to_dataframe()
df4

Unnamed: 0,label,original_text,masked_text,template_text
0,1,bursting through the constraints of its source this is one adapted- from-television movie that actually looks as if it belongs on the big screen .,bursting through the constraints of its source this is one {mask} from-television movie that actually looks as if it belongs on the {mask} screen .,bursting through the constraints of its source this is one {pos_adj} from-television movie that actually looks as if it belongs on the {pos_adj} screen .


In [69]:
tg0.lexicons

{'pos_adj': ['forthright', 'great'], 'neg_adj': []}

In [70]:
tg1.lexicons

{'pos_adj': ['forthright', 'great'], 'neg_adj': []}

In [71]:
tg2.lexicons

{'pos_adj': ['musical'], 'neg_adj': ['key']}

In [72]:
tg3.lexicons

{'pos_adj': ['great'], 'neg_adj': ['forthright']}

In [73]:
tg4.lexicons

{'pos_adj': ['adapted-', 'big'], 'neg_adj': []}

### Número inicial de instâncias: 100

In [74]:
# Using all 100 instances
instances = [x for x in movie_reviews_rt_df['text'].values]

In [75]:
tg0 = PosNegTemplateGeneratorRandom(model_bert, models_1)
tg1 = PosNegTemplateGeneratorRandom(model_albert, models_2)
tg2 = PosNegTemplateGeneratorRandom(model_distilbert, models_3)
tg3 = PosNegTemplateGeneratorRandom(model_roberta, models_4)
tg4 = PosNegTemplateGeneratorRandom(model_xlnet, models_5)

In [76]:
templates0 = tg0.generate_templates(instances, n_masks=2, k_templates=1)

Converting texts to sentences...
:: 134 sentences were generated.
Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.


In [77]:
templates1 = tg1.generate_templates(instances, n_masks=2, k_templates=1)

Converting texts to sentences...
:: 134 sentences were generated.
Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.


In [78]:
templates2 = tg2.generate_templates(instances, n_masks=2, k_templates=1)

Converting texts to sentences...
:: 134 sentences were generated.
Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.


In [79]:
templates3 = tg3.generate_templates(instances, n_masks=2, k_templates=1)

Converting texts to sentences...
:: 134 sentences were generated.
Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


:: Word ranking done.
Predicting inputs...
:: Sentence predictions done.


In [80]:
templates4 = tg4.generate_templates(instances, n_masks=2, k_templates=1)

Converting texts to sentences...
:: 134 sentences were generated.
Ranking words using Replace-1 Score...
:: Word ranking done.
Predicting inputs...


  prob = softmax(tensor_logits[0]).detach().numpy()


:: Sentence predictions done.


In [81]:
df0 = tg0.to_dataframe()
df0

Unnamed: 0,label,original_text,masked_text,template_text
0,0,it delivers some chills and sustained unease but flounders in its quest for deeper meaning .,it delivers some chills and {mask} unease but flounders in its quest for {mask} meaning .,it delivers some chills and {neg_adj} unease but flounders in its quest for {pos_adj} meaning .


In [82]:
df1 = tg1.to_dataframe()
df1

Unnamed: 0,label,original_text,masked_text,template_text
0,0,h .,h .,h .


In [83]:
df2 = tg2.to_dataframe()
df2

Unnamed: 0,label,original_text,masked_text,template_text
0,0,nobody seems to have cared much about any aspect of it from its cheesy screenplay to the grayish quality of its lighting to its last-minute haphazard theatrical release .,nobody seems to have cared much about any aspect of it from its cheesy screenplay to the grayish quality of its lighting to its {mask} haphazard {mask} release .,nobody seems to have cared much about any aspect of it from its cheesy screenplay to the grayish quality of its lighting to its {neg_adj} haphazard {neg_adj} release .


In [84]:
df3 = tg3.to_dataframe()
df3

Unnamed: 0,label,original_text,masked_text,template_text
0,1,the story .,the story .,the story .


In [85]:
df4 = tg4.to_dataframe()
df4

Unnamed: 0,label,original_text,masked_text,template_text
0,0,.,.,.


In [86]:
tg0.lexicons

{'pos_adj': ['deeper'], 'neg_adj': ['sustained']}

In [87]:
tg1.lexicons

{'pos_adj': [], 'neg_adj': []}

In [88]:
tg2.lexicons

{'pos_adj': [], 'neg_adj': ['last-minute', 'theatrical']}

In [89]:
tg3.lexicons

{'pos_adj': [], 'neg_adj': []}

In [90]:
tg4.lexicons

{'pos_adj': [], 'neg_adj': []}

#### Tempo de execução para 100 instâncias: 4m 17.8s

## Checklist

#### Model BERT

In [91]:
import checklist
from checklist.editor import Editor
from checklist.test_suite import TestSuite
from checklist.test_types import MFT

In [92]:
lexicons = tg0.lexicons
templates0 = tg0.template_texts
masked = tg0.masked_texts
labels = [sent.prediction.label for sent in tg0.sentences]

editor = Editor()
editor.add_lexicon('pos_adj', lexicons['pos_adj'])
editor.add_lexicon('neg_adj', lexicons['neg_adj'])

suite = TestSuite()

In [93]:
for template, label, i in zip(templates0, labels, range(len(templates0))):
    t = editor.template(template, remove_duplicates=True, labels=int(label))

    suite.add(MFT(
        data=t.data,
        labels=label,
        capability="Vocabullary", 
        name=f"Test: MFT with vocabullary - template{i+1}",
        description="Checking if the model can handle vocabullary")) 

In [94]:
suite.run(model_bert.predict, overwrite=True)

Running Test: MFT with vocabullary - template1
Predicting 1 examples


  prob = softmax(tensor_logits[0]).detach().numpy()


In [95]:
suite.summary()

Vocabullary

Test: MFT with vocabullary - template1
Test cases:      1
Fails (rate):    0 (0.0%)






In [96]:
suite.save('./suites/posneg-approach5-bert.suite')

#### Model Albert

In [97]:
lexicons = tg1.lexicons
templates1 = tg1.template_texts
masked = tg1.masked_texts
labels = [sent.prediction.label for sent in tg1.sentences]

editor = Editor()
editor.add_lexicon('pos_adj', lexicons['pos_adj'])
editor.add_lexicon('neg_adj', lexicons['neg_adj'])

suite = TestSuite()

In [98]:
for template, label, i in zip(templates1, labels, range(len(templates1))):
    t = editor.template(template, remove_duplicates=True, labels=int(label))

    suite.add(MFT(
        data=t.data,
        labels=label,
        capability="Vocabullary", 
        name=f"Test: MFT with vocabullary - template{i+1}",
        description="Checking if the model can handle vocabullary")) 

In [99]:
suite.run(model_albert.predict, overwrite=True)

Running Test: MFT with vocabullary - template1
Predicting 1 examples


  prob = softmax(tensor_logits[0]).detach().numpy()


In [100]:
suite.summary()

Vocabullary

Test: MFT with vocabullary - template1
Test cases:      1
Fails (rate):    1 (100.0%)

Example fails:
0.5 h .
----






In [101]:
suite.save('./suites/posneg-approach-random-albert.suite')

#### Model Distilbert

In [102]:
lexicons = tg2.lexicons
templates2 = tg2.template_texts
masked = tg2.masked_texts
labels = [sent.prediction.label for sent in tg2.sentences]

editor = Editor()
editor.add_lexicon('pos_adj', lexicons['pos_adj'])
editor.add_lexicon('neg_adj', lexicons['neg_adj'])

suite = TestSuite()

In [103]:
for template, label, i in zip(templates2, labels, range(len(templates2))):
    t = editor.template(template, remove_duplicates=True, labels=int(label))

    suite.add(MFT(
        data=t.data,
        labels=label,
        capability="Vocabullary", 
        name=f"Test: MFT with vocabullary - template{i+1}",
        description="Checking if the model can handle vocabullary")) 

In [104]:
suite.run(model_distilbert.predict, overwrite=True)

Running Test: MFT with vocabullary - template1
Predicting 2 examples


  prob = softmax(tensor_logits[0]).detach().numpy()


In [105]:
suite.summary()

Vocabullary

Test: MFT with vocabullary - template1
Test cases:      2
Fails (rate):    0 (0.0%)






In [106]:
suite.save('./suites/posneg-approach-random-distilbert.suite')

#### Model Roberta

In [107]:
lexicons = tg3.lexicons
templates3 = tg3.template_texts
masked = tg3.masked_texts
labels = [sent.prediction.label for sent in tg3.sentences]

editor = Editor()
editor.add_lexicon('pos_adj', lexicons['pos_adj'])
editor.add_lexicon('neg_adj', lexicons['neg_adj'])

suite = TestSuite()

In [108]:
for template, label, i in zip(templates3, labels, range(len(templates3))):
    t = editor.template(template, remove_duplicates=True, labels=int(label))

    suite.add(MFT(
        data=t.data,
        labels=label,
        capability="Vocabullary", 
        name=f"Test: MFT with vocabullary - template{i+1}",
        description="Checking if the model can handle vocabullary")) 

In [109]:
suite.run(model_roberta.predict, overwrite=True)

Running Test: MFT with vocabullary - template1
Predicting 1 examples


  prob = softmax(tensor_logits[0]).detach().numpy()


In [110]:
suite.summary()

Vocabullary

Test: MFT with vocabullary - template1
Test cases:      1
Fails (rate):    1 (100.0%)

Example fails:
0.5 the story .
----






In [111]:
suite.save('./suites/posneg-approach-random-roberta.suite')

#### Model Xlnet

In [112]:
lexicons = tg4.lexicons
templates4 = tg4.template_texts
masked = tg4.masked_texts
labels = [sent.prediction.label for sent in tg4.sentences]

editor = Editor()
editor.add_lexicon('pos_adj', lexicons['pos_adj'])
editor.add_lexicon('neg_adj', lexicons['neg_adj'])

suite = TestSuite()

In [113]:
for template, label, i in zip(templates4, labels, range(len(templates4))):
    t = editor.template(template, remove_duplicates=True, labels=int(label))

    suite.add(MFT(
        data=t.data,
        labels=label,
        capability="Vocabullary", 
        name=f"Test: MFT with vocabullary - template{i+1}",
        description="Checking if the model can handle vocabullary")) 

In [114]:
suite.run(model_xlnet.predict, overwrite=True)

Running Test: MFT with vocabullary - template1
Predicting 1 examples


  prob = softmax(tensor_logits[0]).detach().numpy()


In [115]:
suite.summary()

Vocabullary

Test: MFT with vocabullary - template1
Test cases:      1
Fails (rate):    0 (0.0%)






In [116]:
suite.save('./suites/posneg-approach-random-xlnet.suite')

# Carregando suite de teste

In [117]:
from checklist.test_suite import TestSuite
suite = TestSuite.from_file('./suites/posneg-approach-random-bert.suite')

suite.visual_summary_table()

FileNotFoundError: [Errno 2] No such file or directory: './suites/posneg-approach-random-bert.suite'