In [1]:
!pip install transformers==4.12.5



In [2]:
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import torch.nn.functional as F
from transformers import BertTokenizer, BertConfig,AdamW, BertForSequenceClassification,get_linear_schedule_with_warmup, AutoModel, AutoTokenizer 
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, DistilBertConfig
from transformers import RobertaTokenizer, RobertaForSequenceClassification, RobertaConfig

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.metrics import accuracy_score,matthews_corrcoef

#to_preprocessing
import re
import string
import nltk
from nltk.corpus import stopwords

from tqdm import tqdm, trange,tnrange,tqdm_notebook
import random
import os
import io
%matplotlib inline
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
# identify and specify the GPU as the device, later in training loop we will load data into device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if device == torch.device("cuda"):
    torch.cuda.manual_seed_all(SEED)

In [4]:
def decontracted(phrase):
    """
    We first define a function to expand the contracted phrase into normal words
    """
        
    phrase = re.sub(r"wont", "will not", phrase)
    phrase = re.sub(r"wouldnt", "would not", phrase)
    phrase = re.sub(r"shouldnt", "should not", phrase)
    phrase = re.sub(r"couldnt", "could not", phrase)
    phrase = re.sub(r"cudnt", "could not", phrase)
    phrase = re.sub(r"cant", "can not", phrase)
    phrase = re.sub(r"dont", "do not", phrase)
    phrase = re.sub(r"doesnt", "does not", phrase)
    phrase = re.sub(r"didnt", "did not", phrase)
    phrase = re.sub(r"wasnt", "was not", phrase)
    phrase = re.sub(r"werent", "were not", phrase)
    phrase = re.sub(r"havent", "have not", phrase)
    phrase = re.sub(r"hadnt", "had not", phrase)
    phrase = re.sub(r"neednt", "need not", phrase)
    phrase = re.sub(r"isnt", "is not", phrase)
    phrase = re.sub(r"arent", "are not", phrase)
    phrase = re.sub(r"hasnt", "are not", phrase)
    
    phrase = re.sub(r"n\'t", " not", phrase)
    phrase = re.sub(r"\'re", " are", phrase)
    phrase = re.sub(r"\'s", " is", phrase)
    phrase = re.sub(r"\'d", " would", phrase)
    phrase = re.sub(r"\'ll", " will", phrase)
    phrase = re.sub(r"\'t", " not", phrase)
    phrase = re.sub(r"\'ve", " have", phrase)
    phrase = re.sub(r"\'m", " am", phrase)
    return phrase
def clean_text(text):
    '''Make text lowercase, remove text in square brackets,remove links,remove punctuation
    and remove words containing numbers.'''
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
#     text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    text = decontracted(text)
    return text

In [5]:
from transformers import BertTokenizer
MAX_LEN = 256
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", lower=True)

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [6]:
from transformers import BertModel
bert = BertModel.from_pretrained("bert-base-uncased").to(device)

Downloading:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [7]:
class BERT_14(torch.nn.Module):

    def __init__(self, bert):
      super(BERT_14, self).__init__()

      self.bert = bert 
      
      # dropout layer
      self.dropout = torch.nn.Dropout(0.1)
      
      # relu activation function
      self.relu =  torch.nn.ReLU()

      # dense layer 1
      self.fc1 = torch.nn.Linear(768,512)
      
      # dense layer 2 (Output layer)
      self.fc2 = torch.nn.Linear(512,6)


    #define the forward pass
    def forward(self, sent_id, mask):

      #pass the inputs to the model  
      output = self.bert(sent_id,attention_mask=mask)
      x = output[0][:, 0, :]
      x = self.fc1(x)

      x = self.relu(x)

      x = self.dropout(x)

      # output layer
      x = self.fc2(x)
      
#       # apply softmax activation
#       x = F.softmax(x, dim = 1)

      return x

In [8]:
model = BERT_14(bert)
model.load_state_dict(torch.load("../input/tucode/bert_base_tucode.bin"))
model.to(device)

BERT_14(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [9]:
def preprocessing(text):
    input_id = tokenizer.encode(text, add_special_tokens=True,padding='longest',truncation=True,return_token_type_ids=False)
    attention = [float(i>0) for i in input_id]
    input_id = torch.tensor([input_id]).to(device)
    attention = torch.tensor([attention]).to(device)
    return input_id, attention

In [10]:
text = clean_text("she is unhappy")
input, attention = preprocessing(text)
result = model(input, attention)
print(result)

tensor([[-1.1967, -2.2808, -1.4818, -2.6949,  7.8944, -3.7332]],
       device='cuda:0', grad_fn=<AddmmBackward>)
