# Serverless BERT with HuggingFace and AWS Lambda

## Basic imports

In [2]:
# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py

# Cufflinks wrapper on plotly
import cufflinks

# Data science imports
import pandas as pd
import numpy as np

# Options for pandas
pd.options.display.max_columns = 30

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from plotly.offline import iplot, init_notebook_mode
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

# Set global theme
cufflinks.set_config_file(world_readable=True, theme='pearl')

## Params

In [13]:
cache_dir = './models'
pretrained_dir = './trained_model'
model_pack_name = 'squad-distilbert'
s3_bucket = 'neural-networks-model-example'
s3_filename = 'squad-distilbert/en.tar.gz'

## Prepare model

In [5]:
from transformers import DistilBertTokenizer, DistilBertForQuestionAnswering
import torch


class QuestionAnsweringModel:
    def __init__(self):
        self.tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased',return_token_type_ids = True, cache_dir=cache_dir)
        self.model = DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased-distilled-squad', cache_dir=cache_dir)
    
    def encode(self,question,context):
        encoded = self.tokenizer.encode_plus(question, context)
        return encoded["input_ids"], encoded["attention_mask"]

    def decode(self,token):
        answer_tokens = self.tokenizer.convert_ids_to_tokens(token , skip_special_tokens=True)
        return self.tokenizer.convert_tokens_to_string(answer_tokens)

    def predict(self,question,context):
        input_ids, attention_mask = self.encode(question,context)
        start_scores, end_scores = self.model(torch.tensor([input_ids]), attention_mask=torch.tensor([attention_mask])).values()
        ans_tokens = input_ids[torch.argmax(start_scores) : torch.argmax(end_scores)+1]
        answer = self.decode(ans_tokens)
        return answer

model = QuestionAnsweringModel()

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/451 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/265M [00:00<?, ?B/s]

## Check model

In [7]:
context = """We introduce a new language representation model called BERT, which stands for
Bidirectional Encoder Representations from Transformers. Unlike recent language
representation models (Peters et al., 2018a; Radford et al., 2018), BERT is
designed to pretrain deep bidirectional representations from unlabeled text by
jointly conditioning on both left and right context in all layers. As a result,
the pre-trained BERT model can be finetuned with just one additional output
layer to create state-of-the-art models for a wide range of tasks, such as
question answering and language inference, without substantial taskspecific
architecture modifications. BERT is conceptually simple and empirically
powerful. It obtains new state-of-the-art results on eleven natural language
processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute
improvement), MultiNLI accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1
question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD
v2.0 Test F1 to 83.1 (5.1 point absolute improvement)."""

questions = ["What is BERTs best score on Squadv2 ?", "What does the 'B' in BERT stand for?"]

for question in questions:
    answer = model.predict(question, context)
    print('Question:', question, '\nAnswer:', answer, '\n')

Question: What is BERTs best score on Squadv2 ? 
Answer: 83 . 1 

Question: What does the 'B' in BERT stand for? 
Answer: bidirectional encoder representations from transformers 



In [15]:
context

'We introduce a new language representation model called BERT, which stands for\nBidirectional Encoder Representations from Transformers. Unlike recent language\nrepresentation models (Peters et al., 2018a; Radford et al., 2018), BERT is\ndesigned to pretrain deep bidirectional representations from unlabeled text by\njointly conditioning on both left and right context in all layers. As a result,\nthe pre-trained BERT model can be finetuned with just one additional output\nlayer to create state-of-the-art models for a wide range of tasks, such as\nquestion answering and language inference, without substantial taskspecific\narchitecture modifications. BERT is conceptually simple and empirically\npowerful. It obtains new state-of-the-art results on eleven natural language\nprocessing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute\nimprovement), MultiNLI accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1\nquestion answering Test F1 to 93.2 (1.5 point absolute 

## Save and Pack model

In [9]:
DistilBertTokenizer.from_pretrained('distilbert-base-uncased',return_token_type_ids = True, cache_dir=cache_dir) \
    .save_pretrained(pretrained_dir)
DistilBertForQuestionAnswering.from_pretrained('distilbert-base-uncased-distilled-squad', cache_dir=cache_dir) \
    .save_pretrained(pretrained_dir)

('./trained_model/tokenizer_config.json',
 './trained_model/special_tokens_map.json',
 './trained_model/vocab.txt',
 './trained_model/added_tokens.json')

In [12]:
import os
import tarfile

def pack_model(model_path='',file_name=''):
    files = [files for root, dirs, files in os.walk(model_path)][0]
    
    with tarfile.open(file_name+ '.tar.gz', 'w:gz') as f:
        for file in files:
            f.add(f'{model_path}/{file}')
    
    return f"{os.getcwd()}/{file_name}.tar.gz"

model_filename = pack_model(pretrained_dir, model_pack_name)
print('model packed to', model_filename)

model packed to /app/squad-distilbert.tar.gz


## Upload model

In [16]:
import boto3


def upload_model(model_path='', s3_bucket='', s3_filename='', aws_profile='default'):
    s3 = boto3.session.Session(profile_name=aws_profile)
    client = s3.client('s3')
    return client.upload_file(model_path, s3_bucket, s3_filename)
    
upload_model(model_filename, s3_bucket, s3_filename)