# Prediciton with BERT Models

#### Import Data

1. Model import

In [49]:
import torch
import random
import numpy as np
import pandas as pd

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [50]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")

In [51]:
init_token = tokenizer.cls_token
eos_token = tokenizer.sep_token
pad_token = tokenizer.pad_token
unk_token = tokenizer.unk_token
init_token_idx = tokenizer.cls_token_id
eos_token_idx = tokenizer.sep_token_id
pad_token_idx = tokenizer.pad_token_id
unk_token_idx = tokenizer.unk_token_id

In [52]:
max_input_length = tokenizer.max_model_input_sizes['bert-base-uncased']
def tokenize_and_cut(sentence):
    tokens = tokenizer.tokenize(sentence) 
    tokens = tokens[:max_input_length-2]
    return tokens

In [130]:
def predict_sentiment(sentence):
    model.eval()
    inputs = tokenizer.encode_plus(sentence, add_special_tokens=True, return_tensors='pt')
    prediction = model(inputs['input_ids'], token_type_ids=inputs['token_type_ids'])[0].argmax().item()
    return prediction

### Prediction

In [131]:
original_predictions = []

In [132]:
for sentence in original_sentences:
    original_predictions.append(predict_sentiment(sentence))   
    

In [133]:
modified_predictions = []

In [134]:
for sentence in modified_sentences:
    modified_predictions.append(predict_sentiment(sentence))

In [135]:
ds = pd.DataFrame(list(zip(original_sentences, original_predictions, modified_sentences, modified_predictions)),
                 columns = ['original_sentences', 'original_predictions', 'modified_sentences', 'modified_predictions'])

In [147]:
ds.head()

Unnamed: 0,original_sentences,original_predictions,modified_sentences,modified_predictions
0,I visited this b&b during a short trip to ride...,4,I vi5it3d this b&b during a short trip to ride...,4
1,"The owners were really lovely people, the room...",4,"The owners w3r3 really lovely people, the room...",4
2,It is in a really good location for riding or ...,3,It is in a really good location for riding or ...,3
3,I cannot recommend this place enough!.,0,I cannot recommend this place 3n0ugh!.,0
4,3 friends and I visited 't Materke in April 20...,4,3 friends and I visited 't Materke in April 20...,4


In [138]:
ds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 436 entries, 0 to 435
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   original_sentences    436 non-null    object
 1   original_predictions  436 non-null    int64 
 2   modified_sentences    436 non-null    object
 3   modified_predictions  436 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 13.8+ KB


In [155]:
advds = ds[ds.original_predictions != ds.modified_predictions]

In [156]:
advds

Unnamed: 0,original_sentences,original_predictions,modified_sentences,modified_predictions
19,"It says it is a hotel, but actually nothing mo...",1,"It 54y5 it is a hotel, but actually nothing mo...",2
26,I went to this hotel because I could not find ...,2,I w3nt to this hotel because I could not find ...,4
30,"Rooms are simple, bathrooms adequate.",2,"Rooms are simple, bathrooms 4d3qu4t3.",3
33,Just outside but close to city center & has an...,3,Just 0ut5id3 but close to city center & has an...,2
39,Situated right across the castle with a fabulo...,4,Situated right across the castle with a f4bu10...,3
...,...,...,...,...
396,"All rooms featured air conditioning, in-room p...",2,"All rooms featured air conditioning, in-room p...",3
402,A small chair and/or small table is featured i...,2,A small chair and/or small table is featured i...,3
405,"Scrambled eggs were offered, also.",0,"Scrambled 3gg5 were offered, also.",1
425,The hotel features a super-tiny pool in its ba...,3,The hotel f34tur35 a super-tiny pool in its ba...,4
