In [145]:
import numpy as np
import pandas as pd
import os
import re
import torch
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import plot_confusion_matrix, f1_score

In [146]:
import logging
logger = logging.getLogger()
# text and label column names
TEXT_COL = "text"
LABEL_COL = "label"

**Credit:** Modified from [benOit8's tutorial](https://github.com/ben0it8/containerized-transformer-finetuning/blob/develop/research/finetune-transformer-on-imdb5k.ipynb)

# Read Dataset and Preprocessing 

## 3 dataset: IMDb, Rotten Tomato, Test dataset from Tieto

In [147]:
# Read the IMDb dataset
reviews_train = []
for line in open('../data/movie_data/full_train.txt', 'r'):
    
    reviews_train.append(line.strip())
    
reviews_test = []
for line in open('../data/movie_data/full_test.txt', 'r'):
    
    reviews_test.append(line.strip())
    
target = ['pos' if i < 12500 else 'neg' for i in range(25000)]

# Test dataset from Tieto
path = '../data/movie_review_data'
classes = ['neg', 'pos']
labels    = []
test_data_mine = []
space = ' '
for j in range(len(classes)):
  file_list = os.listdir(path+'/'+classes[j])
  for i in file_list:
    labels.append(classes[j])
    comment = open(path+'/'+classes[j]+'/'+i).read()
    comment = comment.replace('\n',' ')
    test_data_mine.append(comment)
# test_data = np.array(test_data)
print('Number of Tieto data : {}'.format(len(test_data_mine)))

# Rotten Tomato Dataset, only training set has labels
rotten_tomato_train = pd.read_csv('../data/rottenTomatoes/train.tsv', sep = '\t')
rotten_tomato_test  = pd.read_csv('../data/rottenTomatoes/test.tsv', sep = '\t')
rotten_tomato_train.drop_duplicates(subset = ['SentenceId'], keep='first', inplace = True)
rotten_tomato_test.drop_duplicates(subset = ['SentenceId'], keep='first', inplace = True)
rotten_tomato_train = rotten_tomato_train[~rotten_tomato_train['Sentiment'].isin([2])]
rotten_tomato_train['Label'] = rotten_tomato_train['Sentiment'].apply(lambda x: 'pos' if x>2 else 'neg')
rotten_tomato_train_x = list(rotten_tomato_train['Phrase'])
rotten_tomato_train_y = list(rotten_tomato_train['Label'])
rotten_tomato_train.head(10)

Number of Tieto data : 1999


Unnamed: 0,PhraseId,SentenceId,Phrase,Sentiment,Label
0,1,1,A series of escapades demonstrating the adage ...,1,neg
63,64,2,"This quiet , introspective and entertaining in...",4,pos
81,82,3,"Even fans of Ismail Merchant 's work , I suspe...",1,neg
116,117,4,A positively thrilling combination of ethnogra...,3,pos
156,157,5,Aggressive self-glorification and a manipulati...,1,neg
166,167,6,A comedy-drama of nearly epic proportions root...,4,pos
198,199,7,"Narratively , Trouble Every Day is a plodding ...",1,neg
213,214,8,"The Importance of Being Earnest , so thick wit...",3,pos
247,248,9,But it does n't leave you with much .,1,neg
259,260,10,You could hate it for the same reason .,1,neg


In [148]:
# Preprocessing using regular expressions

REPLACE_NO_SPACE = re.compile("(\.)|(\;)|(\:)|(\!)|(\?)|(\,)|(\")|(\()|(\))|(\[)|(\])|(\d+)")
REPLACE_WITH_SPACE = re.compile("(<br\s*/><br\s*/>)|(\-)|(\/)")
NO_SPACE = ""
SPACE = " "

# Replace the abbreviation with the complete words
def _replacer(text):
    replacement_patterns = [
        (r'won\'t', 'will not'),
        (r'can\'t', 'cannot'),
        (r'i\'m', 'i am'),
        (r'ain\'t', 'is not'),
        (r'(\w+)\'ll', r'\g<1> will'),
        (r'(\w+)n\'t', r'\g<1> not'),
        (r'(\w+)\'ve', r'\g<1> have'),
        (r'(\w+)\'s', r'\g<1> is'),
        (r'(\w+)\'re', r'\g<1> are'),
        (r'(\w+)\'d', r'\g<1> would')]
    patterns = [(re.compile(regex), repl) for (regex, repl) in replacement_patterns]
    s = text
    for (pattern, repl) in patterns:
        (s, _) = re.subn(pattern, repl, s)
    return s

def preprocess_reviews(reviews):
    
    reviews = [REPLACE_NO_SPACE.sub(NO_SPACE, line.lower()) for line in reviews]
    reviews = [REPLACE_WITH_SPACE.sub(SPACE, line) for line in reviews]
    reviews = [_replacer(line) for line in reviews]
    
    return reviews

In [149]:
reviews_all = reviews_train + reviews_test
target_all  = target + target
reviews_train, reviews_test, reviews_y_train, reviews_y_test = train_test_split(reviews_all, target_all, test_size=0.2, random_state=42)

In [150]:
reviews_train_clean = preprocess_reviews(reviews_train)
reviews_test_clean = preprocess_reviews(reviews_test)
rotten_tomato_clean = preprocess_reviews(rotten_tomato_train_x)
review_tieto = preprocess_reviews(test_data_mine)

In [151]:
# # Shuffle training set
# import random 
# shuffled = random.sample(list(range(25000)), 25000)
# reviews_train_clean = np.array(reviews_train_clean)[shuffled]
# target = np.array(target)[shuffled]

df_train = pd.DataFrame({'label': reviews_y_train,'text': reviews_train_clean })
df_test = pd.DataFrame({'label': reviews_y_test,'text': reviews_test_clean })
df_tieto = pd.DataFrame({'label': labels, 'text': test_data_mine})
df_tomato = pd.DataFrame({'label': rotten_tomato_train_y, 'text': rotten_tomato_clean})
df_tomato.head()

Unnamed: 0,label,text
0,neg,a series of escapades demonstrating the adage ...
1,pos,this quiet introspective and entertaining ind...
2,neg,even fans of ismail merchant 's work i suspec...
3,pos,a positively thrilling combination of ethnogra...
4,neg,aggressive self glorification and a manipulati...


# Text processing for Transformer: Tokenization, padding...

In [152]:
# labels to integers mapping
label2int = {'neg':0, 'pos':1}

from torch.utils.data import TensorDataset, random_split, DataLoader
import warnings
from tqdm import tqdm_notebook as tqdm
from typing import Tuple

NUM_MAX_POSITIONS = 256
BATCH_SIZE = 32

class TextProcessor: 
    # special tokens for classification and padding
    CLS = '[CLS]'
    PAD = '[PAD]'
    
    def __init__(self, tokenizer, label2id: dict, num_max_positions:int=512):
        self.tokenizer=tokenizer
        self.label2id = label2id
        self.num_labels = len(label2id)
        self.num_max_positions = num_max_positions     
    
    def process_example(self, example: Tuple[str, str]):
        "Convert text (example[0]) to sequence of IDs and label (example[1] to integer"
        assert len(example) == 2
        label, text = example[0], example[1]
        assert isinstance(text, str)
        tokens = self.tokenizer.tokenize(text)

        # truncate if too long
        if len(tokens) >= self.num_max_positions:
            tokens = tokens[:self.num_max_positions-1] 
            ids =  self.tokenizer.convert_tokens_to_ids(tokens) + [self.tokenizer.vocab[self.CLS]]
        # pad if too short
        else:
            pad = [self.tokenizer.vocab[self.PAD]] * (self.num_max_positions-len(tokens)-1)
            ids =  self.tokenizer.convert_tokens_to_ids(tokens) + [self.tokenizer.vocab[self.CLS]] + pad
        
        return ids, self.label2id[label]

# download the 'bert-base-cased' tokenizer
from pytorch_transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)

# initialize a TextProcessor
processor = TextProcessor(tokenizer, label2int, num_max_positions=NUM_MAX_POSITIONS)

# Fine-tuning config

In [153]:
from collections import namedtuple
import torch

LOG_DIR = "./logs/"
CACHE_DIR = "./cache/"

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

FineTuningConfig = namedtuple('FineTuningConfig',
      field_names="num_classes, dropout, init_range, batch_size, lr, max_norm, n_epochs,"
                  "n_warmup, valid_pct, gradient_acc_steps, device, log_dir")

finetuning_config = FineTuningConfig(
                2, 0.05, 0.02, BATCH_SIZE, 6.5e-5, 1.0, 2,
                10, 0.25, 2, device, LOG_DIR)

finetuning_config

FineTuningConfig(num_classes=2, dropout=0.05, init_range=0.02, batch_size=32, lr=6.5e-05, max_norm=1.0, n_epochs=2, n_warmup=10, valid_pct=0.25, gradient_acc_steps=2, device=device(type='cpu'), log_dir='./logs/')

# Create Dataloader

In [154]:
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
from itertools import repeat

num_cores = cpu_count()

def process_row(processor, row):
    return processor.process_example((row[1][LABEL_COL], row[1][TEXT_COL]))

def create_dataloader(df: pd.DataFrame,
                      processor: TextProcessor,
                      batch_size: int = 32,
                      shuffle: bool = False,
                      valid_pct: float = None,
                      text_col: str = "text",
                      label_col: str = "label"):
    "Process rows in `df` with `processor` and return a  DataLoader"

    with ProcessPoolExecutor(max_workers=num_cores) as executor:
        result = list(
            tqdm(executor.map(process_row,
                              repeat(processor),
                              df.iterrows(),
                              chunksize=len(df) // 10),
                 desc=f"Processing {len(df)} examples on {num_cores} cores",
                 total=len(df)))

    features = [r[0] for r in result]
    labels = [r[1] for r in result]

    dataset = TensorDataset(torch.tensor(features, dtype=torch.long),
                            torch.tensor(labels, dtype=torch.long))

    if valid_pct is not None:
        valid_size = int(valid_pct * len(df))
        train_size = len(df) - valid_size
        valid_dataset, train_dataset = random_split(dataset,
                                                    [valid_size, train_size])
        valid_loader = DataLoader(valid_dataset,
                                  batch_size=batch_size,
                                  shuffle=False)
        train_loader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True)
        return train_loader, valid_loader

    data_loader = DataLoader(dataset,
                             batch_size=batch_size,
                             num_workers=0,
                             shuffle=shuffle,
                             pin_memory=torch.cuda.is_available())
    return data_loader

In [155]:
# create train and valid sets by splitting
train_dl, valid_dl = create_dataloader(df_train, processor, 
                                    batch_size=finetuning_config.batch_size, 
                                    valid_pct=finetuning_config.valid_pct)

test_dl = create_dataloader(df_test, processor, 
                             batch_size=finetuning_config.batch_size, 
                             valid_pct=None)

test_tieto = create_dataloader(df_tieto, processor, 
                             batch_size=finetuning_config.batch_size, 
                             valid_pct=None)

test_tomato = create_dataloader(df_tomato, processor, 
                             batch_size=finetuning_config.batch_size, 
                             valid_pct=None)

HBox(children=(IntProgress(value=0, description='Processing 40000 examples on 8 cores', max=40000, style=Progr…

HBox(children=(IntProgress(value=0, description='Processing 10000 examples on 8 cores', max=10000, style=Progr…

HBox(children=(IntProgress(value=0, description='Processing 1999 examples on 8 cores', max=1999, style=Progres…

HBox(children=(IntProgress(value=0, description='Processing 6874 examples on 8 cores', max=6874, style=Progres…

## TransformerWithClfHead based on OpenAI GPT-sytle [Tranformer](https://s3-us-west-2.amazonaws.com/openai-assets/research-covers/language-unsupervised/language_understanding_paper.pdf)

In [156]:
import torch.nn as nn

def get_num_params(model):
    mp = filter(lambda p: p.requires_grad, model.parameters())
    return sum(np.prod(p.size()) for p in mp)

class Transformer(nn.Module):
    "Adopted from https://github.com/huggingface/naacl_transfer_learning_tutorial"

    def __init__(self, embed_dim, hidden_dim, num_embeddings, num_max_positions, num_heads, num_layers, dropout, causal):
        super().__init__()
        self.causal = causal
        self.tokens_embeddings = nn.Embedding(num_embeddings, embed_dim)
        self.position_embeddings = nn.Embedding(num_max_positions, embed_dim)
        self.dropout = nn.Dropout(dropout)

        self.attentions, self.feed_forwards = nn.ModuleList(), nn.ModuleList()
        self.layer_norms_1, self.layer_norms_2 = nn.ModuleList(), nn.ModuleList()
        for _ in range(num_layers):
            self.attentions.append(nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout))
            self.feed_forwards.append(nn.Sequential(nn.Linear(embed_dim, hidden_dim),
                                                    nn.ReLU(),
                                                    nn.Linear(hidden_dim, embed_dim)))
            self.layer_norms_1.append(nn.LayerNorm(embed_dim, eps=1e-12))
            self.layer_norms_2.append(nn.LayerNorm(embed_dim, eps=1e-12))

    def forward(self, x, padding_mask=None):
        """ x has shape [seq length, batch], padding_mask has shape [batch, seq length] """
        positions = torch.arange(len(x), device=x.device).unsqueeze(-1)
        h = self.tokens_embeddings(x)
        h = h + self.position_embeddings(positions).expand_as(h)
        h = self.dropout(h)

        attn_mask = None
        if self.causal:
            attn_mask = torch.full((len(x), len(x)), -float('Inf'), device=h.device, dtype=h.dtype)
            attn_mask = torch.triu(attn_mask, diagonal=1)

        for layer_norm_1, attention, layer_norm_2, feed_forward in zip(self.layer_norms_1, self.attentions,
                                                                       self.layer_norms_2, self.feed_forwards):
            h = layer_norm_1(h)
            x, _ = attention(h, h, h, attn_mask=attn_mask, need_weights=False, key_padding_mask=padding_mask)
            x = self.dropout(x)
            h = x + h

            h = layer_norm_2(h)
            x = feed_forward(h)
            x = self.dropout(x)
            h = x + h
        return h


class TransformerWithClfHead(nn.Module):
    "Adopted from https://github.com/huggingface/naacl_transfer_learning_tutorial"
    def __init__(self, config, fine_tuning_config):
        super().__init__()
        self.config = fine_tuning_config
        self.transformer = Transformer(config.embed_dim, config.hidden_dim, config.num_embeddings,
                                       config.num_max_positions, config.num_heads, config.num_layers,
                                       fine_tuning_config.dropout, causal=not config.mlm)
        
        self.classification_head = nn.Linear(config.embed_dim, fine_tuning_config.num_classes)
        self.apply(self.init_weights)

    def init_weights(self, module):
        if isinstance(module, (nn.Linear, nn.Embedding, nn.LayerNorm)):
            module.weight.data.normal_(mean=0.0, std=self.config.init_range)
        if isinstance(module, (nn.Linear, nn.LayerNorm)) and module.bias is not None:
            module.bias.data.zero_()

    def forward(self, x, clf_tokens_mask, clf_labels=None, padding_mask=None):
        hidden_states = self.transformer(x, padding_mask)

        clf_tokens_states = (hidden_states * clf_tokens_mask.unsqueeze(-1).float()).sum(dim=0)
        clf_logits = self.classification_head(clf_tokens_states)

        if clf_labels is not None:
            loss_fct = nn.CrossEntropyLoss(ignore_index=-1)
            loss = loss_fct(clf_logits.view(-1, clf_logits.size(-1)), clf_labels.view(-1))
            return clf_logits, loss
        return clf_logits

In [157]:
from pytorch_transformers import cached_path

# download pre-trained model and config
state_dict = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
                                    "naacl-2019-tutorial/model_checkpoint.pth"), map_location='cpu')

config = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
                                        "naacl-2019-tutorial/model_training_args.bin"))

# init model: Transformer base + classifier head
model = TransformerWithClfHead(config=config, fine_tuning_config=finetuning_config).to(finetuning_config.device)
print('Number of parameters: {}'.format(get_num_params(model_fine_tuned)))

incompatible_keys = model.load_state_dict(state_dict, strict=False)
print(f"Parameters discarded from the pretrained model: {incompatible_keys.unexpected_keys}")
print(f"Parameters added in the model: {incompatible_keys.missing_keys}")

Number of parameters: 50397182
Parameters discarded from the pretrained model: ['lm_head.weight']
Parameters added in the model: ['classification_head.weight', 'classification_head.bias']


# Fine-tuning

In [158]:
from ignite.engine import Engine, Events
from ignite.metrics import RunningAverage, Accuracy 
from ignite.handlers import ModelCheckpoint
from ignite.contrib.handlers import CosineAnnealingScheduler, PiecewiseLinear, create_lr_scheduler_with_warmup, ProgressBar
import torch.nn.functional as F
from pytorch_transformers.optimization import AdamW

# Bert optimizer
optimizer = AdamW(model.parameters(), lr=finetuning_config.lr, correct_bias=False) 

def update(engine, batch):
    "update function for training"
    model.train()
    inputs, labels = (t.to(finetuning_config.device) for t in batch)
    inputs = inputs.transpose(0, 1).contiguous() # [S, B]
    _, loss = model(inputs, 
                    clf_tokens_mask = (inputs == tokenizer.vocab[processor.CLS]), 
                    clf_labels=labels)
    loss = loss / finetuning_config.gradient_acc_steps
    loss.backward()
    
    torch.nn.utils.clip_grad_norm_(model.parameters(), finetuning_config.max_norm)
    if engine.state.iteration % finetuning_config.gradient_acc_steps == 0:
        optimizer.step()
        optimizer.zero_grad()
    return loss.item()

def inference(engine, batch):
    "update function for evaluation"
    model.eval()
    with torch.no_grad():
        batch, labels = (t.to(finetuning_config.device) for t in batch)
        inputs = batch.transpose(0, 1).contiguous()
        logits = model(inputs,
                       clf_tokens_mask = (inputs == tokenizer.vocab[processor.CLS]),
                       padding_mask = (batch == tokenizer.vocab[processor.PAD]))
    return logits, labels

def predict(model, tokenizer, int2label, input="test"):
    "predict `input` with `model`"
    model.eval()
    tok = tokenizer.tokenize(input)
    ids = tokenizer.convert_tokens_to_ids(tok) + [tokenizer.vocab['[CLS]']]
    tensor = torch.tensor(ids, dtype=torch.long)
    tensor = tensor.to(device)
    tensor = tensor.reshape(1, -1)
    tensor_in = tensor.transpose(0, 1).contiguous() # [S, 1]
    logits = model(tensor_in,
                   clf_tokens_mask = (tensor_in == tokenizer.vocab['[CLS]']),
                   padding_mask = (tensor == tokenizer.vocab['[PAD]']))
    val, _ = torch.max(logits, 0)
    val = F.softmax(val, dim=0).detach().cpu().numpy()    
    return {int2label[val.argmax()]: val.max(),
            int2label[val.argmin()]: val.min()}

trainer = Engine(update)
evaluator = Engine(inference)

# add metric to evaluator 
Accuracy().attach(evaluator, "accuracy")

# add evaluator to trainer: eval on valid set after each epoch
@trainer.on(Events.EPOCH_COMPLETED)
def log_validation_results(engine):
    evaluator.run(valid_dl)
    print(f"validation epoch: {engine.state.epoch} acc: {100*evaluator.state.metrics['accuracy']}")
          
# lr schedule: linearly warm-up to lr and then to zero
scheduler = PiecewiseLinear(optimizer, 'lr', [(0, 0.0), (finetuning_config.n_warmup, finetuning_config.lr),
                                              (len(train_dl)*finetuning_config.n_epochs, 0.0)])
trainer.add_event_handler(Events.ITERATION_STARTED, scheduler)


# add progressbar with loss
RunningAverage(output_transform=lambda x: x).attach(trainer, "loss")
ProgressBar(persist=True).attach(trainer, metric_names=['loss'])

# save checkpoints and finetuning config
checkpoint_handler = ModelCheckpoint(finetuning_config.log_dir, 'finetuning_checkpoint', 
                                     save_interval=1, require_empty=False)
trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler, {'imdb_model': model})

int2label = {i:label for label,i in label2int.items()}

# save metadata
torch.save({
    "config": config,
    "config_ft": finetuning_config,
    "int2label": int2label
}, os.path.join(finetuning_config.log_dir, "metadata.bin"))

In [58]:
print('start training')

# fit the model on train_dl
trainer.run(train_dl, max_epochs=3)

# save model weights
torch.save(model.state_dict(), os.path.join('models/', "model_transformer.pth"))

# evaluate the model on test_dl
evaluator.run(test_dl)
print(f"test results IMDB- acc: {100*evaluator.state.metrics['accuracy']:.3f}")
      
# evaluate the model on test_dl
evaluator.run(test_tieto)
print(f"test results Tieto- acc: {100*evaluator.state.metrics['accuracy']:.3f}")
      
# evaluate the model on test_dl
evaluator.run(test_tomato)
print(f"test results Tomato- acc: {100*evaluator.state.metrics['accuracy']:.3f}")

HBox(children=(IntProgress(value=0, max=938), HTML(value='')))

Current run is terminating due to exception: .
Engine run is terminating due to exception: .


KeyboardInterrupt: 

# Comparison between the pretrained model and fine-tuned model

##  Before fine-tuning

In [159]:
# download pre-trained model and config
state_dict_pretrained = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
                                    "naacl-2019-tutorial/model_checkpoint.pth"), map_location='cpu')

config = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
                                        "naacl-2019-tutorial/model_training_args.bin"))

# init model: Transformer base + classifier head
model_pretrained = TransformerWithClfHead(config=config, fine_tuning_config=finetuning_config).to(finetuning_config.device)

incompatible_keys_pretrained = model_pretrained.load_state_dict(state_dict_pretrained, strict=False)
print(f"Parameters discarded from the pretrained model: {incompatible_keys_pretrained.unexpected_keys}")
print(f"Parameters added in the model: {incompatible_keys_pretrained.missing_keys}")

Parameters discarded from the pretrained model: ['lm_head.weight']
Parameters added in the model: ['classification_head.weight', 'classification_head.bias']


In [160]:
print(predict(model_pretrained, tokenizer, int2label, input = "I just love how the actors are playing"))
print(predict(model_pretrained, tokenizer, int2label, input = "I really doubt if there is anybody who like this movie"))
print(predict(model_pretrained, tokenizer, int2label, input = "This movie is poorly directed"))
print(predict(model_pretrained, tokenizer, int2label, input = "It looks like a good movie from outside but it is not"))

{'pos': 0.6025769, 'neg': 0.39742312}
{'neg': 0.5936176, 'pos': 0.4063824}
{'neg': 0.555646, 'pos': 0.44435403}
{'neg': 0.53450847, 'pos': 0.4654915}


##  After fine-tuning

In [161]:
# load fine-tuned model
state_dict_fine_tuned = torch.load('../model/model_transformer.pth', map_location='cpu')

config = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
                                        "naacl-2019-tutorial/model_training_args.bin"))

# init model: Transformer base + classifier head
model_fine_tuned = TransformerWithClfHead(config=config, fine_tuning_config=finetuning_config).to(finetuning_config.device)

incompatible_keys_fine_tuned = model_fine_tuned.load_state_dict(state_dict_fine_tuned, strict=False)
print(f"Parameters discarded from the pretrained model: {incompatible_keys_fine_tuned.unexpected_keys}")
print(f"Parameters added in the model: {incompatible_keys_fine_tuned.missing_keys}")

Parameters discarded from the pretrained model: []
Parameters added in the model: []


In [162]:
print(predict(model_fine_tuned, tokenizer, int2label, input = "I just love how the actors are playing"))
print(predict(model_fine_tuned, tokenizer, int2label, input = "I really doubt if there is anybody who like this movie"))
print(predict(model_fine_tuned, tokenizer, int2label, input = "This movie is poorly directed"))
print(predict(model_fine_tuned, tokenizer, int2label, input = "It looks like a good movie from outside but it is not"))

{'pos': 0.9866541, 'neg': 0.013345966}
{'neg': 0.87248033, 'pos': 0.12751968}
{'neg': 0.99834275, 'pos': 0.0016572878}
{'neg': 0.8652021, 'pos': 0.13479783}
