In [1]:
# Checking GPU availability

import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [2]:
# Installing the Hugging Face PyTorch Interface for Bert

!pip install -q transformers

[K     |████████████████████████████████| 2.9 MB 2.8 MB/s 
[K     |████████████████████████████████| 52 kB 1.5 MB/s 
[K     |████████████████████████████████| 895 kB 27.0 MB/s 
[K     |████████████████████████████████| 636 kB 42.7 MB/s 
[K     |████████████████████████████████| 3.3 MB 38.3 MB/s 
[?25h

In [3]:
# Importing Dependencies
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertConfig
from transformers import AdamW, BertForSequenceClassification, get_linear_schedule_with_warmup

from tqdm import tqdm, trange

import pandas as pd
import io
import numpy as np
import matplotlib.pyplot as plt
% matplotlib inline

In [4]:
# Specifying CUDA as the device for Torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
torch.cuda.get_device_name(0)

'Tesla K80'

# Inset Sentences

In [5]:
# Insert sentences to be classified using the fine-tuned model here.
sentence = ['This item should be thrown-off.', 'fantastic product']

# Adding CLS and SEP tokens at the beginning and end of each sentence for BERT
sent = ["[CLS] " + x + " [SEP]" for x in sentence]
sent

['[CLS] This item should be thrown-off. [SEP]',
 '[CLS] fantastic product [SEP]']

In [6]:
# Activating the BERT Tokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
tokenized_texts = [tokenizer.tokenize(x) for x in sent]
print ("Tokenize the first sentence:")
tokenized_texts

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Tokenize the first sentence:


  "Passing `gradient_checkpointing` to a config initialization is deprecated and will be removed in v5 "


[['[CLS]', 'this', 'item', 'should', 'be', 'thrown', '-', 'off', '.', '[SEP]'],
 ['[CLS]', 'fantastic', 'product', '[SEP]']]

In [7]:
# Processing the data (as per fine-tuned model standards)

MAX_LEN = 128

# Use the BERT tokenizer to convert the tokens to their index numbers in the BERT vocabulary
input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]

# Pad our input tokens
input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")
input_ids[0]

array([ 101, 2023, 8875, 2323, 2022, 6908, 1011, 2125, 1012,  102,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0])

In [8]:
# Create attention masks
attention_masks = []

# Create a mask of 1s for each token followed by 0s for padding
for x in input_ids:
  seq_mask = [float(i>0) for i in x]
  attention_masks.append(seq_mask)

In [9]:
# Inputs 

t_inputs = torch.tensor(input_ids)
t_inputs[0]

tensor([ 101, 2023, 8875, 2323, 2022, 6908, 1011, 2125, 1012,  102,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0])

In [10]:
# masks

t_masks = torch.tensor(attention_masks)
t_masks[0]

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])

In [11]:
# Inputs and maskes to tensors

prediction_data = TensorDataset(t_inputs, t_masks)

In [12]:
# Feeding input tensor to DataLoader

prediction_dataloader = DataLoader(prediction_data)

In [13]:
# Mounting Google Drive (we need to load the fine-tuned BERT model from the google drive)

from google.colab import drive
drive.mount('/content/gdrive')
# drive.flush_and_unmount()

Mounted at /content/gdrive


In [14]:
# Reading the model

model = torch.load('/content/gdrive/MyDrive/my_model')

# Model is set to eval mode
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [15]:
# Enabling gradient checkpointing

model.gradient_checkpointing_enable()

In [16]:
# Tracking variables 
predictions , true_labels = [], []

# Predict 
for batch in prediction_dataloader:
  # Add batch to GPU
  batch = tuple(t.to(device) for t in batch)
  # Unpack the inputs from our dataloader
  b_input_ids, b_input_mask = batch
  # Telling the model not to compute or store gradients, saving memory and speeding up prediction
  with torch.no_grad():
    # Forward pass, calculate logit predictions
    logits = model(b_input_ids, attention_mask=b_input_mask)

  # Move logits and labels to CPU
  logits = logits['logits'].detach().cpu().numpy()
  # label_ids = b_labels.to('cpu').numpy()
  
  # Store predictions and true labels
  # predictions.append(logits)
  predictions.append(np.argmax(logits))

predictions

[0, 1]

In [45]:
pd.DataFrame(list(zip(sentence, predictions)), columns=['Input_Sentence', 'Sentiment_Prediction'])

Unnamed: 0,Input_Sentence,Sentiment_Prediction
0,This item should be thrown-off.,0
1,fantastic product,1


`predictions` indicates the final output of the model. 

Here, `0` indicates `negative` and `1` indicates `positive` sentiments.

-------------