# Abordagem 1

Usando a abordagem 1 para gerar templates com foco em templates positivos e negativos. Uma possível aplicação seria testar a capacidade linguística *Vocabullary* com o teste **MFT**.

As etapas desta abordagem são:

1. Rankear as palavras das instâncias completas
2. Quebrar as instâncias em sentenças
3. Filtrar as sentenças que contêm ao menos uma das palavras mais bem rankeadas na etapa anterior
4. Filtrar as sentenças com palavras relevantes (adjetivos ou verbos)
5. Classificar as sentenças usando o *Oráculo*
6. Filtrar as sentenças classificadas de forma unânime
7. Substituir as palavras relevantes por máscaras

In [1]:
%config Completer.use_jedi = False
import sys
sys.path.append('../../')

## Carregando o dataset, o modelo alvo e os modelos auxiliares

In [2]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

movie_reviews_rt_df = pd.read_csv('./data/data-rt-100samples.csv')
movie_reviews_rt_df.head(5)

Unnamed: 0,label,text,words
0,1,allen's underestimated charm delivers more goodies than lumps of coal .,11
1,0,skip the film and buy the philip glass soundtrack cd .,11
2,0,involving at times but lapses quite casually into the absurd .,11
3,0,while hoffman's performance is great the subject matter goes nowhere .,11
4,1,a flick about our infantilized culture that isn't entirely infantile .,11


In [3]:
import re
import numpy as np
from torch.nn.functional import softmax
from transformers import AutoTokenizer, AutoModelForSequenceClassification

def pre_proccess(text):
    text = text.lower()
    text = re.sub('["\',!-.:-@0-9/]()', ' ', text)
    return text

# Wrapper to adapt output format
class SentimentAnalisysModelWrapper:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        
    def __predict(self, text_input):
        text_preprocessed = pre_proccess(text_input)
        tokenized = self.tokenizer(text_preprocessed, padding=True, truncation=True, max_length=512, 
                                    add_special_tokens = True, return_tensors="pt")
        
        tensor_logits = self.model(**tokenized)
        prob = softmax(tensor_logits[0]).detach().numpy()
        pred = np.argmax(prob)
        
        return pred, prob
    
    def predict_label(self, text_inputs):
        return self.predict(text_inputs)[0]
        
    def predict_proba(self, text_inputs):
        return self.predict(text_inputs)[1]
        
    def predict(self, text_inputs):
        if isinstance(text_inputs, str):
            text_inputs = [text_inputs]
        
        preds = []
        probs = []

        for text_input in text_inputs:
            pred, prob = self.__predict(text_input)
            preds.append(pred)
            probs.append(prob[0])

        return np.array(preds), np.array(probs) # ([0, 1], [[0.99, 0.01], [0.03, 0.97]])

# Auxiliar function to load and wrap a model from Hugging Face
def load_model(model_name):
    print(f'Loading model {model_name}...')
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    
    return SentimentAnalisysModelWrapper(model, tokenizer)

# Hugging Face hosted model names 
movie_reviews_models = {
    'bert': 'textattack/bert-base-uncased-rotten-tomatoes', 
    'albert': 'textattack/albert-base-v2-rotten-tomatoes', 
    'distilbert': 'textattack/distilbert-base-uncased-rotten-tomatoes', 
    'roberta': 'textattack/roberta-base-rotten-tomatoes', 
    'xlnet': 'textattack/xlnet-base-cased-rotten-tomatoes'
}

In [4]:
m0 = load_model(movie_reviews_models['bert'])
m1 = load_model(movie_reviews_models['albert'])
m2 = load_model(movie_reviews_models['distilbert'])
m3 = load_model(movie_reviews_models['roberta'])
m4 = load_model(movie_reviews_models['xlnet'])

# Models to be used as oracle
models_1 = [m1, m2, m3, m4]
models_2 = [m0, m2, m3, m4]
models_3 = [m0, m1, m3, m4]
models_4 = [m0, m1, m2, m4]
models_5 = [m0, m1, m2, m3]
# Target model
model_bert = m0
model_albert = m1
model_distilbert = m2
model_roberta = m3
model_xlnet = m4

Loading model textattack/bert-base-uncased-rotten-tomatoes...
Loading model textattack/albert-base-v2-rotten-tomatoes...
Loading model textattack/distilbert-base-uncased-rotten-tomatoes...
Loading model textattack/roberta-base-rotten-tomatoes...


Some weights of the model checkpoint at textattack/roberta-base-rotten-tomatoes were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Loading model textattack/xlnet-base-cased-rotten-tomatoes...


# Gerando os templates
O método de rankeamento das palavras usado no PosNegTemplateGenerator é o Replace-1 Score

In [5]:
from template_generator.tasks.sentiment_analisys import PosNegTemplateGeneratorApp1

tg0 = PosNegTemplateGeneratorApp1(model_bert, models_1)
tg1 = PosNegTemplateGeneratorApp1(model_albert, models_2)
tg2 = PosNegTemplateGeneratorApp1(model_distilbert, models_3)
tg3 = PosNegTemplateGeneratorApp1(model_roberta, models_4)
tg4 = PosNegTemplateGeneratorApp1(model_xlnet, models_5)

### Número inicial de instâncias: 5

In [6]:
# Sampling instances
np.random.seed(220)
n_instances = 5
df_sampled = movie_reviews_rt_df.sample(n_instances)

instances = [x for x in df_sampled['text'].values]

In [7]:
templates0 = tg0.generate_templates(instances, n_masks=2, ranked_words_count=4)
templates1 = tg1.generate_templates(instances, n_masks=2, ranked_words_count=4)
templates2 = tg2.generate_templates(instances, n_masks=2, ranked_words_count=4)
templates3 = tg3.generate_templates(instances, n_masks=2, ranked_words_count=4)
templates4 = tg4.generate_templates(instances, n_masks=2, ranked_words_count=4)

Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


Converting texts to sentences...
:: 6 sentences were generated.
Filtering instances by contaning ranked words...
:: 1 sentences remaining.
4
Filtering instances by relevant words...
['ADJ']
{word: future, index: 2, tag: NOUN, rank_score: -0.001435995101928711}
{word: for, index: 0, tag: ADP, rank_score: -0.0009976029396057129}
{word: hopes, index: 4, tag: VERB, rank_score: -0.00036329030990600586}
{word: one, index: 3, tag: NUM, rank_score: -0.0001398324966430664}
 
:: 0 sentences remaining.
Predicting inputs...
:: Sentence predictions done.
Ranking words using Replace-1 Score...
Converting texts to sentences...
:: 6 sentences were generated.
Filtering instances by contaning ranked words...
:: 1 sentences remaining.
4
Filtering instances by relevant words...
['ADJ']
{word: the, index: 1, tag: DET, rank_score: -0.23128339648246765}
{word: for, index: 0, tag: ADP, rank_score: -0.22551000118255615}
{word: hopes, index: 4, tag: VERB, rank_score: 0.1844933032989502}
{word: future, index: 2,

#### Tempo de execução para 5 instâncias: 9.7s

In [8]:
df0 = tg0.to_dataframe()
df0

Unnamed: 0,label,original_text,masked_text,template_text


In [9]:
df1 = tg1.to_dataframe()
df1

Unnamed: 0,label,original_text,masked_text,template_text


In [10]:
df2 = tg2.to_dataframe()
df2

Unnamed: 0,label,original_text,masked_text,template_text


In [11]:
df3 = tg3.to_dataframe()
df3

Unnamed: 0,label,original_text,masked_text,template_text


In [12]:
df4 = tg4.to_dataframe()
df4

Unnamed: 0,label,original_text,masked_text,template_text


In [13]:
tg0.lexicons

{'pos_adj': [], 'neg_adj': []}

In [14]:
tg1.lexicons

{'pos_adj': [], 'neg_adj': []}

In [15]:
tg2.lexicons

{'pos_adj': [], 'neg_adj': []}

In [16]:
tg3.lexicons

{'pos_adj': [], 'neg_adj': []}

In [17]:
tg4.lexicons

{'pos_adj': [], 'neg_adj': []}

### Número inicial de instâncias: 100

In [18]:
# Using all 100 instances
instances = [x for x in movie_reviews_rt_df['text'].values]

In [19]:
tg0 = PosNegTemplateGeneratorApp1(model_bert, models_1)
tg1 = PosNegTemplateGeneratorApp1(model_albert, models_2)
tg2 = PosNegTemplateGeneratorApp1(model_distilbert, models_3)
tg3 = PosNegTemplateGeneratorApp1(model_roberta, models_4)
tg4 = PosNegTemplateGeneratorApp1(model_xlnet, models_5)

templates0 = tg0.generate_templates(instances, n_masks=2, ranked_words_count=4)

Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


Converting texts to sentences...
:: 134 sentences were generated.
Filtering instances by contaning ranked words...
:: 23 sentences remaining.
4
Filtering instances by relevant words...
['ADJ']
{word: well-made, index: 5, tag: ADJ, rank_score: -0.0003566145896911621}
{word: clunker, index: 11, tag: NOUN, rank_score: -0.00032889842987060547}
{word: clunker, index: 8, tag: NOUN, rank_score: 0.00023746490478515625}
{word: thoughtful, index: 6, tag: ADJ, rank_score: -0.00020372867584228516}
 
['ADJ']
{word: and, index: 7, tag: CONJ, rank_score: -0.40014511346817017}
{word: this, index: 9, tag: DET, rank_score: -0.3538123369216919}
{word: regard, index: 10, tag: NOUN, rank_score: -0.012109756469726562}
{word: guard, index: 12, tag: NOUN, rank_score: 0.0036880970001220703}
 
['ADJ']
{word: bad, index: 7, tag: ADJ, rank_score: -0.014482975006103516}
{word: trailers, index: 10, tag: NOUN, rank_score: -0.007329761981964111}
{word: as, index: 6, tag: ADV, rank_score: -0.0021179914474487305}
{word

In [20]:
templates1 = tg1.generate_templates(instances, n_masks=2, ranked_words_count=4)

Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


Converting texts to sentences...
:: 134 sentences were generated.
Filtering instances by contaning ranked words...
:: 24 sentences remaining.
4
Filtering instances by relevant words...
['ADJ']
{word: clunker, index: 11, tag: NOUN, rank_score: -0.06854057312011719}
{word: nonetheless, index: 12, tag: ADV, rank_score: -0.012639641761779785}
{word: clunker, index: 8, tag: NOUN, rank_score: 0.007877051830291748}
{word: but, index: 9, tag: CONJ, rank_score: -0.006553947925567627}
 
['ADJ']
{word: delivers, index: 13, tag: NOUN, rank_score: -0.4280174970626831}
{word: and, index: 7, tag: CONJ, rank_score: -0.17287731170654297}
{word: this, index: 9, tag: DET, rank_score: -0.16363215446472168}
{word: regard, index: 10, tag: NOUN, rank_score: -0.071982741355896}
 
['ADJ']
{word: as, index: 6, tag: ADV, rank_score: -0.05069446563720703}
{word: trailers, index: 10, tag: NOUN, rank_score: -0.036840617656707764}
{word: bad, index: 7, tag: ADJ, rank_score: -0.030507028102874756}
{word: could, index

In [21]:
templates2 = tg2.generate_templates(instances, n_masks=2, ranked_words_count=4)

Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


Converting texts to sentences...
:: 134 sentences were generated.
Filtering instances by contaning ranked words...
:: 24 sentences remaining.
4
Filtering instances by relevant words...
['ADJ']
{word: thoughtful, index: 6, tag: ADJ, rank_score: -0.183561772108078}
{word: nonetheless, index: 12, tag: ADV, rank_score: -0.18268129229545593}
{word: clunker, index: 11, tag: NOUN, rank_score: 0.17087829113006592}
{word: but, index: 9, tag: CONJ, rank_score: 0.17018675804138184}
 
['ADJ']
{word: and, index: 7, tag: CONJ, rank_score: -0.4269503653049469}
{word: this, index: 9, tag: DET, rank_score: -0.35293877124786377}
{word: delivers, index: 13, tag: NOUN, rank_score: -0.16643232107162476}
{word: regard, index: 10, tag: NOUN, rank_score: -0.10821014642715454}
 
['ADJ']
{word: bad, index: 7, tag: ADJ, rank_score: -0.13954830169677734}
{word: could, index: 0, tag: VERB, rank_score: 0.051245808601379395}
{word: trailers, index: 10, tag: NOUN, rank_score: -0.026774048805236816}
{word: be, index: 

In [22]:
templates3 = tg3.generate_templates(instances, n_masks=2, ranked_words_count=4)

Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


Converting texts to sentences...
:: 134 sentences were generated.
Filtering instances by contaning ranked words...
:: 22 sentences remaining.
4
Filtering instances by relevant words...
['ADJ']
{word: clunker, index: 2, tag: NOUN, rank_score: -0.030784964561462402}
{word: real, index: 1, tag: ADJ, rank_score: 0.01168733835220337}
{word: ., index: 3, tag: ., rank_score: 0.0061383843421936035}
{word: a, index: 0, tag: DET, rank_score: -0.0011336803436279297}
 
['ADJ']
{word: delivers, index: 13, tag: NOUN, rank_score: -0.11574995517730713}
{word: and, index: 7, tag: CONJ, rank_score: -0.01716536283493042}
{word: this, index: 9, tag: DET, rank_score: -0.013212978839874268}
{word: in, index: 8, tag: ADP, rank_score: -0.002525031566619873}
 
['ADJ']
{word: bad, index: 7, tag: ADJ, rank_score: -0.636002779006958}
{word: ?, index: 11, tag: ., rank_score: 0.05245649814605713}
{word: be, index: 5, tag: VERB, rank_score: -0.04218447208404541}
{word: could, index: 0, tag: VERB, rank_score: 0.03616

In [23]:
templates4 = tg4.generate_templates(instances, n_masks=2, ranked_words_count=4)

Ranking words using Replace-1 Score...


  prob = softmax(tensor_logits[0]).detach().numpy()


Converting texts to sentences...
:: 134 sentences were generated.
Filtering instances by contaning ranked words...
:: 21 sentences remaining.
4
Filtering instances by relevant words...
['ADJ']
{word: clunker, index: 2, tag: NOUN, rank_score: -0.16852736473083496}
{word: real, index: 1, tag: ADJ, rank_score: -0.10247564315795898}
{word: a, index: 0, tag: DET, rank_score: -0.06957018375396729}
{word: ., index: 3, tag: ., rank_score: 0.0}
 
['ADJ']
{word: nothing, index: 1, tag: NOUN, rank_score: -0.0354992151260376}
{word: swashbuckling, index: 5, tag: NOUN, rank_score: -0.014843106269836426}
{word: sometimes, index: 0, tag: ADV, rank_score: -0.009717583656311035}
{word: like, index: 3, tag: ADP, rank_score: -0.00559389591217041}
 
['ADJ']
{word: country, index: 2, tag: NOUN, rank_score: -0.015492022037506104}
{word: as, index: 6, tag: ADV, rank_score: -0.010726392269134521}
{word: be, index: 5, tag: VERB, rank_score: -0.010459423065185547}
{word: as, index: 8, tag: ADP, rank_score: -0.0

In [24]:
df0 = tg0.to_dataframe()
df0

Unnamed: 0,label,original_text,masked_text,template_text
0,0,a well-made thoughtful well-acted clunker but a clunker nonetheless .,a {mask} {mask} well-acted clunker but a clunker nonetheless .,a {pos_adj} {neg_adj} well-acted clunker but a clunker nonetheless .
1,1,the charming result is festival in cannes .,the {mask} result is {mask} in cannes .,the {pos_adj} result is {pos_adj} in cannes .


In [25]:
df1 = tg1.to_dataframe()
df1

Unnamed: 0,label,original_text,masked_text,template_text
0,1,a story an old and scary one about the monsters we make and the vengeance they take .,a story an {mask} and {mask} one about the monsters we make and the vengeance they take .,a story an {neg_adj} and {neg_adj} one about the monsters we make and the vengeance they take .


In [26]:
df2 = tg2.to_dataframe()
df2

Unnamed: 0,label,original_text,masked_text,template_text
0,0,is an inexpressible and drab wannabe looking for that exact niche .,is an {mask} and {mask} wannabe looking for that exact niche .,is an {neg_adj} and {neg_adj} wannabe looking for that exact niche .


In [27]:
df3 = tg3.to_dataframe()
df3

Unnamed: 0,label,original_text,masked_text,template_text


In [28]:
df4 = tg4.to_dataframe()
df4

Unnamed: 0,label,original_text,masked_text,template_text


In [29]:
tg0.lexicons

{'pos_adj': ['well-made', 'festival', 'charming'], 'neg_adj': ['thoughtful']}

In [30]:
tg1.lexicons

{'pos_adj': [], 'neg_adj': ['scary', 'old']}

In [31]:
tg2.lexicons

{'pos_adj': [], 'neg_adj': ['drab', 'inexpressible']}

In [32]:
tg3.lexicons

{'pos_adj': [], 'neg_adj': []}

In [33]:
tg4.lexicons

{'pos_adj': [], 'neg_adj': []}

#### Tempo de execução para 100 instâncias: 4m 17.8s

## Checklist

#### Model BERT

In [34]:
import checklist
from checklist.editor import Editor
from checklist.test_suite import TestSuite
from checklist.test_types import MFT

In [35]:
lexicons = tg0.lexicons
templates0 = tg0.template_texts
masked = tg0.masked_texts
labels = [sent.prediction.label for sent in tg0.sentences]

editor = Editor()
editor.add_lexicon('pos_adj', lexicons['pos_adj'])
editor.add_lexicon('neg_adj', lexicons['neg_adj'])

suite = TestSuite()

In [36]:
for template, label, i in zip(templates0, labels, range(len(templates0))):
    t = editor.template(template, remove_duplicates=True, labels=int(label))

    suite.add(MFT(
        data=t.data,
        labels=label,
        capability="Vocabullary", 
        name=f"Test: MFT with vocabullary - template{i+1}",
        description="Checking if the model can handle vocabullary")) 

In [37]:
suite.run(model_bert.predict, overwrite=True)

Running Test: MFT with vocabullary - template1
Predicting 3 examples


  prob = softmax(tensor_logits[0]).detach().numpy()


Running Test: MFT with vocabullary - template2
Predicting 3 examples


In [38]:
suite.summary()

Vocabullary

Test: MFT with vocabullary - template1
Test cases:      3
Fails (rate):    0 (0.0%)


Test: MFT with vocabullary - template2
Test cases:      3
Fails (rate):    0 (0.0%)






In [39]:
suite.save('./suites/posneg-approach1-bert.suite')

#### Model Albert

In [40]:
lexicons = tg1.lexicons
templates1 = tg1.template_texts
masked = tg1.masked_texts
labels = [sent.prediction.label for sent in tg1.sentences]

editor = Editor()
editor.add_lexicon('pos_adj', lexicons['pos_adj'])
editor.add_lexicon('neg_adj', lexicons['neg_adj'])

suite = TestSuite()

In [41]:
for template, label, i in zip(templates1, labels, range(len(templates1))):
    t = editor.template(template, remove_duplicates=True, labels=int(label))

    suite.add(MFT(
        data=t.data,
        labels=label,
        capability="Vocabullary", 
        name=f"Test: MFT with vocabullary - template{i+1}",
        description="Checking if the model can handle vocabullary")) 

In [42]:
suite.run(model_albert.predict, overwrite=True)

Running Test: MFT with vocabullary - template1
Predicting 2 examples


  prob = softmax(tensor_logits[0]).detach().numpy()


In [43]:
suite.summary()

Vocabullary

Test: MFT with vocabullary - template1
Test cases:      2
Fails (rate):    0 (0.0%)






In [44]:
suite.save('./suites/posneg-approach1-albert.suite')

#### Model Distilbert

In [45]:
lexicons = tg2.lexicons
templates2 = tg2.template_texts
masked = tg2.masked_texts
labels = [sent.prediction.label for sent in tg2.sentences]

editor = Editor()
editor.add_lexicon('pos_adj', lexicons['pos_adj'])
editor.add_lexicon('neg_adj', lexicons['neg_adj'])

suite = TestSuite()

In [46]:
for template, label, i in zip(templates2, labels, range(len(templates2))):
    t = editor.template(template, remove_duplicates=True, labels=int(label))

    suite.add(MFT(
        data=t.data,
        labels=label,
        capability="Vocabullary", 
        name=f"Test: MFT with vocabullary - template{i+1}",
        description="Checking if the model can handle vocabullary")) 

In [47]:
suite.run(model_distilbert.predict, overwrite=True)

Running Test: MFT with vocabullary - template1
Predicting 2 examples


  prob = softmax(tensor_logits[0]).detach().numpy()


In [48]:
suite.summary()

Vocabullary

Test: MFT with vocabullary - template1
Test cases:      2
Fails (rate):    0 (0.0%)






In [49]:
suite.save('./suites/posneg-approach1-distilbert.suite')

#### Model Roberta

In [50]:
lexicons = tg3.lexicons
templates3 = tg3.template_texts
masked = tg3.masked_texts
labels = [sent.prediction.label for sent in tg3.sentences]

editor = Editor()
editor.add_lexicon('pos_adj', lexicons['pos_adj'])
editor.add_lexicon('neg_adj', lexicons['neg_adj'])

suite = TestSuite()

In [51]:
for template, label, i in zip(templates3, labels, range(len(templates3))):
    t = editor.template(template, remove_duplicates=True, labels=int(label))

    suite.add(MFT(
        data=t.data,
        labels=label,
        capability="Vocabullary", 
        name=f"Test: MFT with vocabullary - template{i+1}",
        description="Checking if the model can handle vocabullary")) 

In [52]:
suite.run(model_roberta.predict, overwrite=True)

In [53]:
suite.summary()

In [54]:
suite.save('./suites/posneg-approach1-roberta.suite')

#### Model Xlnet

In [55]:
lexicons = tg4.lexicons
templates4 = tg4.template_texts
masked = tg4.masked_texts
labels = [sent.prediction.label for sent in tg4.sentences]

editor = Editor()
editor.add_lexicon('pos_adj', lexicons['pos_adj'])
editor.add_lexicon('neg_adj', lexicons['neg_adj'])

suite = TestSuite()

In [56]:
for template, label, i in zip(templates4, labels, range(len(templates4))):
    t = editor.template(template, remove_duplicates=True, labels=int(label))

    suite.add(MFT(
        data=t.data,
        labels=label,
        capability="Vocabullary", 
        name=f"Test: MFT with vocabullary - template{i+1}",
        description="Checking if the model can handle vocabullary")) 

In [57]:
suite.run(model_xlnet.predict, overwrite=True)

In [58]:
suite.summary()

In [59]:
suite.save('./suites/posneg-approach1-xlnet.suite')

# Carregando suite de teste

In [60]:
from checklist.test_suite import TestSuite
suite = TestSuite.from_file('./suites/posneg-approach1-bert.suite')

suite.visual_summary_table()

Please wait as we prepare the table data...


SuiteSummarizer(stats={'npassed': 0, 'nfailed': 0, 'nfiltered': 0}, test_infos=[{'name': 'Test: MFT with vocab…