In [None]:
# Installing the dependencies
!pip install fastapi
!pip install uvicorn
!pip install pydantic
!pip install pyngrok
!pip install nest-asyncio
!pip install --upgrade-strategy eager install optimum[onnxruntime]

In [None]:
# Import dependencies to run the API
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List
import uvicorn
from starlette.middleware.cors import CORSMiddleware

# Import dependencies for the deployment
from pyngrok import ngrok, conf
import nest_asyncio

# Import dependencies for the model backend
from transformers import AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForTokenClassification
from optimum.onnxruntime import ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig

# Import dependencies for getting information about the Jupyter notebook
import os
from google.colab import userdata

In [None]:
app = FastAPI()

In [None]:
# This allows the frontend and backend on different servers to communicate with each other.
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

In [None]:
# Data validation using Pydantic
class NERRequest(BaseModel):
    text: str
class DataKeys(BaseModel):
    text: str
    ents: List
    title: None
class NERData(BaseModel):
    data_keys: DataKeys
class NERResponse(BaseModel):
    original: str
    name_entities: NERData

In [None]:
# Load and save the model from the huggingface repo into the local runtime disk
tokenizerdis = AutoTokenizer.from_pretrained("elastic/distilbert-base-uncased-finetuned-conll03-english")
modeldis = ORTModelForTokenClassification.from_pretrained("elastic/distilbert-base-uncased-finetuned-conll03-english", export=True)

modeldis.save_pretrained('/save_directory')
tokenizerdis.save_pretrained('/save_directory')

In [None]:
# create ORTQuantizer and define quantization configuration
quantizer = ORTQuantizer.from_pretrained(modeldis)
quantizer_config = AutoQuantizationConfig.avx512_vnni(is_static=False, per_channel=False)

# apply the quantization configuration to the model and specify the directory to save the model
model_quantized_path = quantizer.quantize(
    save_dir="/model-quantized",
    quantization_config=quantizer_config,
)

In [None]:
class NEROnnxModel():
    '''
    Load the quantized distilbert model, run the pipeline, clean up the results and return the data
    '''
    def __call__(self, text: str) -> str:
        # load quantized model and tokenizers
        tokenizerdis = AutoTokenizer.from_pretrained("/save_directory")
        modeldis = ORTModelForTokenClassification.from_pretrained("/model-quantized")

        #create transformers pipeline
        onnx_ner = pipeline("token-classification", model=modeldis, tokenizer=tokenizerdis, aggregation_strategy='simple')

        #run the pipeline and return the results
        pred = onnx_ner(text)
        pred = self.sanitise_data(text, pred)
        return pred

    def sanitise_data(self, text: str, entities: list) -> dict:
        '''
        Remove extra key-value pairs from the result and return the cleaned dictionary
        :param text: original text sent by the user
        :param entities: list of entities returned by the model to be cleaned up
        '''
        return_entities=[]
        # Loop through every entity identofied by the model
        for entity in entities:
            santitise_entity={}
            santitise_entity['start'] = entity['start']
            santitise_entity['end'] = entity['end']
            santitise_entity['label'] = entity['entity_group']
            return_entities.append(santitise_entity) # Keep only the data needed by the front-end
        return {'text':text, 'ents':return_entities, 'title': None} # Required format

In [None]:
# Loading the model class
ner_pipeline = NEROnnxModel()
print('The distilbert NER model & tokenizer sucessfully loaded!')

In [None]:
# Routing for the GET request
@app.get("/")
def get_root():
    return "This is the RESTful API for PrivacyDetection"

The distilbert NER model & tokenizer sucessfully loaded!


In [None]:
# Routing for the POST request
@app.post("/results", response_model=NERResponse)
async def predict(request: NERRequest):
    ner_text = NERData(data_keys=ner_pipeline(request.text)) # Sending the user's input to the model
    return NERResponse(
        original=request.text,
        name_entities=ner_text
    )

In [None]:
# Authentication to run ngrok
os.environ["NGROK"] = userdata.get("NGROK")
conf.get_default().auth_token = os.environ["NGROK"]

In [None]:
# Starting and running the ngrok and fastapi servers
ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)
nest_asyncio.apply()
uvicorn.run(app, port=8000)

In [None]:
# Killing the ngrok and fastapi servers
!pkill uvicorn
!pkill ngrok