In [1]:
import numpy as np
import pandas as pd
import torch
import pickle
from itertools import chain
from collections import Counter
import torch.nn as nn
import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.utils.class_weight import compute_class_weight
import transformers
from transformers import AdamW
from transformers import DistilBertTokenizer, DistilBertModel
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler, Dataset

In [2]:
# specify device
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [3]:
bert = DistilBertModel.from_pretrained('distilbert-base-uncased')
# Load the BERT tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
class BERT(nn.Module):
    def __init__(self, bert):
        
        super(BERT, self).__init__()
        
        # Distil Bert model
        self.bert = bert
        ## Additional layers
        # Dropout layer
        self.dropout = nn.Dropout(0.1)
        # Relu activation function
        self.relu =  nn.ReLU()
        # Dense layer 1
        self.fc1 = nn.Linear(768, 512)
        # Dense layer 2 (Output layer)
        self.fc2 = nn.Linear(512, 2)
        # Softmax activation function
        self.softmax = nn.LogSoftmax(dim=1)

    #define the forward pass
    def forward(self, sent_id, mask):

        #pass the inputs to the model BERT  
        cls_hs = self.bert(sent_id, attention_mask=mask)
        hidden_state = cls_hs[0]
        pooler = hidden_state[:, 0]
        
        # dense layer 1        
        x = self.fc1(pooler)
        # ReLU activation
        x = self.relu(x)
        # Drop out
        x = self.dropout(x)
        # dense layer 2
        x = self.fc2(x)
        # apply softmax activation
        x = self.softmax(x)

        return x

In [5]:
model = BERT(bert)
# push the model to GPU
model = model.to(device)

In [12]:
# Load trained model (colab)
try:
    try:
        model_save_name = 'saved_weights.pt'
        path = F"/content/gdrive/My Drive/{model_save_name}"
        model.load_state_dict(torch.load(path))
        print('Google Success')

    except:
        model_save_name = 'saved_weights.pt'
        path = "../models/" + model_save_name
        model.load_state_dict(torch.load(path, 
                                         map_location=torch.device('cpu')))
        print('Local Success')
except:
    print('No pretrained model found.')

Local Success


In [13]:
model.eval()

BERT(
  (bert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Linear(in_features=768, 

In [75]:
# Something about bears
ex_text_str = '''
Enterolobium cyclocarpum is one of the largest trees in the dry forest formation, reaching up to 40 m in height and 3 m in diameter, with a huge, spreading crown. Older E. cyclocarpum trees develop small buttresses and produce large roots that run along the surface of the ground for 2-3 m. Sidewalks, roads, or foundations may be cracked or raised by E. cyclocarpum trees growing close by. The bipinnate compound leaves of E. cyclocarpum have 5 opposite leaflets. The small white flowers occur in compact, round heads. Seeds contained in distinctive, thickened, contorted, indehiscent pods that resemble an ear in form; seed 20 x 15 mm, ovate, compressed, dull, reddish-brown, with 100% pleurogram, marked with a yellowish band on each face, punctiform apical hilium concealed or not by whitish funicle; adult trees produce about 2000 pods, each with 10-16 seeds.
'''

In [76]:
# Tokenize
inputs = tokenizer(ex_text_str, return_tensors="pt")

In [77]:
# Predict
outputs = model(inputs['input_ids'], inputs['attention_mask'])

In [78]:
# Convert
torch.exp(outputs)

tensor([[0.0604, 0.9396]], grad_fn=<ExpBackward>)

In [26]:
# Get class
torch.exp(outputs).argmax(1).item() 

0