In [None]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad", return_dict=True)

def get_answer(question, text, tokenizer, model):
    inputs = tokenizer(question, text, add_special_tokens=True, return_tensors="pt")
    input_ids = inputs["input_ids"].tolist()[0]
    text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
    model_output = model(**inputs)
    answer_start_scores = model_output.start_logits
    answer_end_scores = model_output.end_logits
    answer_start = torch.argmax(
        answer_start_scores
    )  # Get the most likely beginning of answer with the argmax of the score
    answer_end = torch.argmax(answer_end_scores) + 1  # Get the most likely end of answer with the argmax of the score
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    return answer

In [None]:
text = r"""
🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
TensorFlow 2.0 and PyTorch.
"""
questions = [
    "How many pretrained models are available in 🤗 Transformers?",
    "What does 🤗 Transformers provide?",
    "🤗 Transformers provides interoperability between which frameworks?",
]
for question in questions:
    answer = get_answer(question, text, tokenizer, model)
    print(f"Question: {question}")
    print(f"Answer: {answer}")

## MLflow

In [None]:
import mlflow.pyfunc

In [None]:
class TransformersQAWrapper(mlflow.pyfunc.PythonModel):

    def load_context(self, context):
        # TODO this should load a local file, see https://mlflow.org/docs/latest/models.html#id35
        from transformers import AutoTokenizer, AutoModelForQuestionAnswering
        self.tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
        self.tansformer_model = AutoModelForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad", return_dict=True)

    def predict(self, context, model_input):
        # TODO support multiple questions
        import pandas as pd
        import torch
        inputs = self.tokenizer(model_input["question"][0], model_input["text"][0],
                                add_special_tokens=True, return_tensors="pt")
        input_ids = inputs["input_ids"].tolist()[0]
        text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
        model_output = self.tansformer_model(**inputs)
        answer_start_scores = model_output.start_logits
        answer_end_scores = model_output.end_logits
        answer_start = torch.argmax(
            answer_start_scores
        )  # Get the most likely beginning of answer with the argmax of the score
        answer_end = torch.argmax(answer_end_scores) + 1  # Get the most likely end of answer with the argmax of the score
        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
        return pd.Series([answer])

In [None]:
from sys import version_info

PYTHON_VERSION = "{major}.{minor}.{micro}".format(major=version_info.major,
                                                  minor=version_info.minor,
                                                  micro=version_info.micro)

In [None]:
import pandas
import transformers

conda_env = {
    'channels': ['defaults'],
    'dependencies': [
      'python={}'.format(PYTHON_VERSION),
      'pip',
      {
        'pip': [
          'mlflow',
          'pandas=={}'.format(pandas.__version__),
          'torch=={}'.format(torch.__version__),
          'transformers=={}'.format(transformers.__version__),
        ],
      },
    ],
    'name': 'transformers_qa_env'
}


In [None]:
# Save the MLflow Model
mlflow_pyfunc_model_path = "transformers_qa_mlflow_pyfunc"
mlflow.pyfunc.save_model(
        path=mlflow_pyfunc_model_path, python_model=TransformersQAWrapper(), # artifacts=artifacts,
        conda_env=conda_env)

In [None]:
# Load the model in `python_function` format
loaded_model = mlflow.pyfunc.load_model(mlflow_pyfunc_model_path)

In [None]:
# Evaluate the model
import pandas as pd
test_predictions = loaded_model.predict(pd.DataFrame({"text": text, "question": questions[0]}))
print(test_predictions)