In [None]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/d8/b2/57495b5309f09fa501866e225c84532d1fd89536ea62406b2181933fb418/transformers-4.5.1-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 8.2MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/75/ee/67241dc87f266093c533a2d4d3d69438e57d7a90abb216fa076e7d475d4a/sacremoses-0.0.45-py3-none-any.whl (895kB)
[K     |████████████████████████████████| 901kB 56.2MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/ae/04/5b870f26a858552025a62f1649c20d29d2672c02ff3c3fb4c688ca46467a/tokenizers-0.10.2-cp37-cp37m-manylinux2010_x86_64.whl (3.3MB)
[K     |████████████████████████████████| 3.3MB 55.2MB/s 
Installing collected packages: sacremoses, tokenizers, transformers
Successfully installed sacremoses-0.0.45 tokenizers-0.10.2 transformers-4.5.1


In [None]:
!unzip model_save.zip

Archive:  model_save.zip
   creating: model_save/
  inflating: model_save/config.json  
  inflating: model_save/pytorch_model.bin  
  inflating: model_save/special_tokens_map.json  
  inflating: model_save/tokenizer_config.json  
  inflating: model_save/vocab.txt    


In [None]:
import torch
import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification


class BERT:
    def set_device(self):
        """
        Sets device as 'GPU' or 'CPU'
        """
        # if there's a GPU available...
        if torch.cuda.is_available():
            # tell PyTorch to use the GPU.
            device = torch.device('cuda')
        else:
            device = torch.device('cpu')

        return device

    def load_model(self, path):
        """
        Loads model and tokenizer from binary file

        path: str
        """
        # load a trained model and vocabulary that you have fine-tuned
        model = BertForSequenceClassification.from_pretrained(path)
        tokenizer = BertTokenizer.from_pretrained(path)

        return model, tokenizer

    def predict(self, text, model_path):
        """
        Uses fine tuned BERTforSequenceClassification to make a 
        prediction
        
        text: str
        model_path: str
        """
        device = self.set_device()

        model, tokenizer = self.load_model(model_path)

        model.to(device)

        # prepare our text into tokenized sequence
        inputs = tokenizer(text, padding=True, truncation=True,
                           max_length=64, return_tensors='pt').to(device)

        # perform inference on our model
        outputs = model(**inputs)
        
        # get output probabilities through softmax
        probs = outputs[0].softmax(1)
        
        # executing argmax function to get label
        return np.argmax(probs.detach().cpu().numpy())


In [None]:
bert = BERT()

test = "the police shot and killed a 15 year old Black girl in Columbus as the verdict was being read. She reportedly called them for help. this isn't a giant step anywhere"

print(bert.predict(test, '/content/model_save'))

1
