In [7]:
from torch import cuda
import torch
from torch.utils.data import Dataset
from transformers import DistilBertModel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Loading the model structure

In [3]:
class finish_layers(torch.nn.Module):
    def __init__(self):
        super(finish_layers, self).__init__()
        self.l1 = DistilBertModel.from_pretrained("distilbert-base-uncased") # Call's distilbert model
        self.pre_classifier = torch.nn.Linear(768, 768) # add aditional layers for prob outputs
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, 2)

    def forward(self, input_ids, attention_mask):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output

 # Load the model

In [9]:
# Load the model using the saved parameters 
input_model_file = './models/model.pt'
model = finish_layers()
model.load_state_dict(torch.load(input_model_file))
model.eval()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


finish_layers(
  (l1): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (lin1): Linear(in_f

# Load the vocabulary

In [10]:
input_vocab_file = './models/vocab_distilbert_twitter.bin'
tokenizer = DistilBertTokenizer.from_pretrained(input_vocab_file)


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'BertTokenizer'. 
The class this function is called from is 'DistilBertTokenizer'.


# Inference

In [20]:
# A function to adjust the input and make the inference
def inference(model, phrase,tokenizer):
    model.eval()
    title = str(phrase)
    title = " ".join(title.split())
    inputs = tokenizer.encode_plus(
        title,
        None,
        add_special_tokens=True,
        max_length=160,
        pad_to_max_length=True,
        return_token_type_ids=True,
        truncation=True
        )
    ids = inputs['input_ids']
    mask = inputs['attention_mask']
    value_tensor={'ids': torch.tensor(ids, dtype=torch.long),
                  'mask': torch.tensor(mask, dtype=torch.long)}
    with torch.no_grad():
            ids = value_tensor['ids'].to(dtype = torch.long)
            mask = value_tensor['mask'].to(dtype = torch.long)
            outputs = model(ids.unsqueeze(0), mask.unsqueeze(0))
    return outputs



In [35]:
phrase = "I didn't like the food of this restaurant"
output = inference(model,phrase,tokenizer)
# Apply softmax to get probabilities
probabilities = torch.nn.functional.softmax(output, dim=1)
# Get the predicted class (index with maximum probability)
predicted_class = torch.argmax(probabilities, dim=1).item()

if predicted_class == 1:
    print("Your phrase is positive!!")
elif predicted_class == 0:
     print("Your phrase is negative!!")
else: 
    print("error")

Your phrase is negative!!
