### Import Libraries

In [41]:
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import re
import copy
import numpy as np
import transformers
from tqdm import tqdm
import torch.nn as nn
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
%matplotlib inline
sns.set_style('whitegrid')

import warnings
warnings.filterwarnings("ignore")

In [42]:
# This code will check if we have gpu then code will run on gpu else it will run on cpu.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Parameters

In [43]:
MAX_LENGTH = 128
models_path = './models/'
model_name = 'Bert_Model_HS.pth'

### Pre Processing

In [44]:
def preprocess(tweet):

        pattern_for_space = '\s+'
        pattern_giant_url = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
            '[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
        pattern_mention = '@[\w\-]+'
        tweet = re.sub(pattern_for_space, ' ', tweet) # removing spaces
        tweet = re.sub(pattern_giant_url, '', tweet) # removing URL
        tweet = re.sub(pattern_mention, '', tweet) # removing mentioning
        tweet = re.sub("[^a-zA-Z0-9']+", " ", tweet) # removing other words
        tweet = tweet.strip() # remove first and last space

        return tweet

### Model Defination 

In [5]:
class BERT_Model(nn.Module):
    def __init__(self):
        super(BERT_Model, self).__init__()
        self.bert = transformers.BertModel.from_pretrained('bert-base-uncased') # Use the 12-layer BERT model, with an uncased vocab
        self.out1 = nn.Linear(768, 500)
        self.out2 = nn.Linear(500, 128)
        self.out3 = nn.Linear(128, 2)
        self.drop_out = nn.Dropout(0.3)
        self.relu = nn.ReLU()

    def forward(self, ids, mask, token_type_ids):
        _, output_1= self.bert(ids, attention_mask = mask, token_type_ids = token_type_ids)
        x = self.drop_out(output_1)
        x = self.out1(x)
        x = self.drop_out(x)
        x = self.relu(x)
        x = self.out2(x)
        x = self.drop_out(x)
        x = self.relu(x)
        x = self.out3(x)
        return x

#### Defining Path to Load the model 

In [6]:
PATH = models_path+model_name

In [7]:
def load_model(model_path):
    model = torch.load(model_path, map_location=device)
    return model

### Bert Tokenizer

In [8]:
from transformers import BertTokenizer
# Load the BERT tokenizer.
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

In [9]:
def get_encoding(tweet):
    tweet = preprocess(tweet)
    encoding = tokenizer.encode_plus(
                tweet, # txt to encode.
                None,  # text_pair Optional second sequence to be encoded
                add_special_tokens=True, # Add '[CLS]' and '[SEP]'
                max_length=MAX_LENGTH,  # Truncate all if longer than max len.
                pad_to_max_length=True,  # pad sentences shorter than max len
                truncation=True
            )
    return encoding

#### Classes ID

In [17]:
class_dict = {0: 'N_HS',
     1: 'HS'}

# Main 

In [11]:
model = load_model(PATH)
model.eval()

BERT_Model(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
 

In [12]:
def get_prediction(encoding,model):
    
    ids = torch.tensor(encoding['input_ids'])
    token_type_ids = torch.tensor(encoding["token_type_ids"])
    mask = torch.tensor(encoding['attention_mask'])
    # loading the inputs and outputs into device
    ids = torch.unsqueeze(ids,0).to(device)
    mask = torch.unsqueeze(mask,0).to(device)
    
    prediction = model(ids=ids, mask=mask, token_type_ids=None)
    _, predicted = torch.max(prediction, 1)
    return predicted.cpu().detach().numpy()[0]

### Example 1

In [31]:
text = "Oh ffs NO MORE MIGRANTS GO HOME"

In [32]:
encoding = get_encoding(text)

In [33]:
class_dict[get_prediction(encoding, model)]

'HS'

### Example 2

In [27]:
text = "This is a couple who just moved on in to Australia and thought if we behave ourselves and involve ourselves in the Community, and have a child here then we will be allowed to stay.Sorry - if that got you in we would be swamped with MILLIONS just like you.#SendThemBack."

In [28]:
encoding = get_encoding(text)

In [30]:
class_dict[get_prediction(encoding, model)]

'HS'

### Example 3

In [34]:
text = "Im literally counting down the days until the kids go back to school. #SendThemBack"

In [35]:
encoding = get_encoding(text)

In [36]:
class_dict[get_prediction(encoding, model)]

'N_HS'