In [20]:
pip install mlflow boto3 -q -U

Note: you may need to restart the kernel to use updated packages.


In [17]:
import torch
import torch.nn as nn
from transformers import BertTokenizer, BertModel

import mlflow

In [18]:
class BERTClassifier(nn.Module):
    def __init__(self, bert_model_name, num_classes):
        super(BERTClassifier, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_name)
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        x = self.dropout(pooled_output)
        logits = self.fc(x)
        return logits


In [19]:
model_path = '../models/bert_classifier.pth'
model = BERTClassifier('bert-base-uncased', 2)
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
model.eval()

BERTClassifier(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_af

In [11]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

In [12]:
import pickle

with open("../models/bert_classifier_ph.pkl", "wb") as file: # file is a variable for storing the newly created file, it can be anything.
    pickle.dump(model, file) # Dump function is used to write the object into the created file in byte format.


In [13]:
with open("../models/tokenizer.pkl", "wb") as file: # file is a variable for storing the newly created file, it can be anything.
    pickle.dump(tokenizer, file) # Dump function is used to write the object into the created file in byte format.


In [14]:
with open('../models/bert_classifier_ph.pkl', 'rb') as f:
    model_ = pickle.load(f)


In [None]:
def predict_hotness(text, model, tokenizer, device, max_length=128):
    model.eval()
    encoding = tokenizer(text, return_tensors='pt', max_length=max_length, padding='max_length', truncation=True)
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        _, preds = torch.max(outputs, dim=1)
        return "Hot" if preds.item() == 1 else "Not Hot"
