### Build Docker Image that contains Entity Extractor model and Flask web application
<br>This Notebook walks you through how to Operationalize the models we built using Docker images. We also cover how to deploy the model using Azure Container Service.<br>
<ul>
<li> First, we develop a Flask Web App that can be exposed to the outside world</li>
<li> Next, we create a docker image of the Flask Web App and push it in a Docker Repo </li>
</ul><br>
In the next notebooks, we show how to test the web service, deploy the web service on ACS, test the deployed web service and show how to make a website to consume these created web service.
<br><br>
**Note**: Make sure you have docker installed on your system for testing the Docker Image later in the notebook

In [102]:
import os
from os import path
import json

In [None]:
!mkdir flaskwebapp
!mkdir flaskwebapp/nginx
!mkdir flaskwebapp/etc
!wget https://wcds2017summernlp.blob.core.windows.net/entityrecognition/NERmodel_D_a_D.model
!wget https://wcds2017summernlp.blob.core.windows.net/entityrecognition/NERmodel_D_a_D.model

#### Step 1<b> Copy the trained Model and the pickled content </b>

In [None]:
!cp pickle_content_DDC.p flaskwebapp
!cp NERmodel_DDC.model flaskwebapp
!ls flaskwebapp

#### Step 2<b> Prepare the Test/Evaluation data in a format suitable for Keras </b>

In [105]:
%%writefile flaskwebapp/Data_Preparation.py
import numpy as np
import cPickle as cpickle
from nltk.tokenize import sent_tokenize
from nltk.tokenize import word_tokenize

class Data_Preparation:

    def __init__ (self, vector_size = 100):
        
        # Some constants
        self.DEFAULT_N_CLASSES = 8 #8 fro DD #12 for PBA
        self.DEFAULT_N_FEATURES = vector_size
        self.DEFAULT_MAX_SEQ_LENGTH = 613 #28 for PBA #208 for PBA
        
        # Other stuff
        self.wordvecs = None
        self.word_to_ix_map = {}
        self.n_features = 0
        self.n_tag_classes = 0
        self.n_sentences_all = 0
        self.tag_vector_map = {}
        
        self.max_sentence_len_train = 0
        self.max_sentence_len = 0
        
        self.all_X_train = []
        self.all_Y_train = []
        
        self.all_X_test = []
        
        self.read_and_parse_data()
            
    def get_data (self):
        return (self.all_X_train, self.all_Y_train)
    
    def decode_prediction_sequence (self, pred_seq):
        
        pred_tags = []
        for class_prs in pred_seq:
            class_vec = np.zeros(self.DEFAULT_N_CLASSES, dtype=np.int32)
            class_vec[np.argmax(class_prs)] = 1
            if tuple(class_vec.tolist()) in self.tag_vector_map:
                pred_tags.append(self.tag_vector_map[tuple(class_vec.tolist())])
        return pred_tags
    
    def read_and_parse_data (self):
        
        pickle_content = cpickle.load(open("pickle_content_D_a_D.p", "rb"))        
        self.word_to_ix_map = pickle_content["word_to_ix_map"]
        self.wordvecs = pickle_content["wordvecs"]
        self.DEFAULT_N_FEATURES = pickle_content["DEFAULT_N_FEATURES"]
        self.DEFAULT_N_CLASSES = pickle_content["DEFAULT_N_CLASSES"]
        self.max_sentence_len_train = pickle_content["max_sentence_len_train"] 
        self.tag_vector_map = pickle_content["tag_vector_map"]
        self.zero_vec_pos = pickle_content["zero_vec_pos"]
        return (self.all_X_train, self.all_Y_train)
    
    def create_test_data(self, vector_size):
        file1 = open("test.txt")
        abstract = ""
        for line in file1:
            abstract += line
            
        sentence_list = sent_tokenize(abstract)
        
        self.all_X_test = []
        words = []
        sentence_lengths = []
                
        for sentence in sentence_list:  
            
            elem_wordvecs = [] 
            word_list = word_tokenize(sentence)        
            for word in word_list:
                words.append(word)
                w = word.lower()
                if w in self.word_to_ix_map:
                    elem_wordvecs.append(self.word_to_ix_map[w])
                    
                elif "UNK" in self.word_to_ix_map :
                    elem_wordvecs.append(self.word_to_ix_map["UNK"])
                
            # Pad the sequences for missing entries to make them all the same length
            nil_X = self.zero_vec_pos
            pad_length = self.max_sentence_len_train - len(elem_wordvecs)
            self.all_X_test.append( ((pad_length)*[nil_X]) + elem_wordvecs)
            sentence_lengths.append(len(elem_wordvecs))
        
        self.all_X_test = np.array(self.all_X_test)
        return self.all_X_test, words, sentence_lengths

Overwriting flaskwebapp/Data_Preparation.py


#### Step 3<b> Create the driver for the Web App </b>

In [106]:
%%writefile flaskwebapp/driver.py
import numpy as np
import logging, sys, json
import timeit as t
from keras.models import load_model

from Data_Preparation import Data_Preparation

logger = logging.getLogger("ER_svc_logger")
ch = logging.StreamHandler(sys.stdout)
logger.addHandler(ch)

trainedModel = None
mem_after_init = None
labelLookup = None
topResult = 3


def init():
    """ Initialise Bi-Directional LSTM model
    """
    global trainedModel, labelLookup, mem_after_init, vector_size, reader
    start = t.default_timer()
    vector_size = 50 #Embedding Size
    reader = Data_Preparation(vector_size)
    
    # Load the trained model
    trainedModel = load_model("NERmodel_D_a_D.model")
    
    end = t.default_timer()
    loadTimeMsg = "Model loading time: {0} ms".format(round((end-start)*1000, 2))
    logger.info(loadTimeMsg)

    
def run(content):
    """ Classify the input using the loaded model
    """
    start = t.default_timer()
    
    ### Creating Colour Dictionary
    colours = {}
    colours["B-Disease"] = "blue"
    colours["I-Disease"] = "blue"

    colours["B-Drug"] = "lime"
    colours["I-Drug"] = "lime"

    colours["B-Chemical"] = "lime"
    colours["I-Chemical"] = "lime"

    colours["O"] = "black"

    target = open("test.txt", "w")
    for line in content:
        target.write(line)
    target.close()
    
    test_data, words, sentence_lengths = reader.create_test_data(vector_size)
    
    target = open("Pubmed_op_Output.txt", 'w')
    i = 0
    # Generate Predictions for the Data from the trained model
    for x in test_data:
        
        tags = trainedModel.predict(np.array([x]), batch_size=1)[0]
        pred_tags = reader.decode_prediction_sequence(tags)
        
        pred_tag_wo_none = []
        for index, tag in enumerate(pred_tags):
            if index + sentence_lengths[i] >= len(pred_tags):
                if tag != "NONE":
                    pred_tag_wo_none.append(pred_tags[index])
                else:
                    pred_tag_wo_none.append("O")
        
        for wo in pred_tag_wo_none:
            target.write(str(wo))
            target.write("\n")
        target.write("\n")
        i+= 1
        
    target.close()
    list1 = []
    file1 = open("Pubmed_op_Output.txt")
    for line in file1:
        list1.append(line)
    file1.close()
    
    ind = 0

    #Colour Code the Text based on the Color Dictionary to identify various Entities effectively
    text_annotated = ""
    for word in list1:
        w = word.split("\n")[0]
        if w != "":
            if w != "O":
                text_annotated += "<b><font size = '2' color = '" + colours[w] + "'>" + words[ind] + "</font></b> "
            else:
                text_annotated += "<font size = '2' color = '" + colours[w] + "'>" + words[ind] + "</font> "
            ind += 1
        else:
            #Add a new line after a sentence
            text_annotated += "<br>"

    print(text_annotated)
    end = t.default_timer()
    logger.info("Predictions took {0} ms".format(round((end-start)*1000, 2)))
    return (text_annotated, 'Computed in {0} ms'.format(round((end-start)*1000, 2)))

Overwriting flaskwebapp/driver.py


#### Step 4<b> Specify the API Routes for the WebApp

In [107]:
%%writefile flaskwebapp/app.py
from flask import Flask, render_template, request
from wtforms import Form, validators
import keras
from driver import *
import time

app = Flask(__name__)


@app.route('/score', methods = ['GET'])
def scoreRRS():
    """ Endpoint for scoring
    """
    input = request.args.get('input')
    start = time.time()
    response = run(input)
    end = time.time() - start
    dict = {}
    dict['result'] = response
    return json.dumps(dict)


@app.route("/")
def healthy():
    return "Healthy"

# Returns Keras Version
@app.route('/version', methods = ['GET'])
def version_request():
    return keras.__version__

@app.route('/val', methods = ['GET'])
def val_request():
    input = request.args.get('input')
    return input

if __name__ == "__main__":
    app.run(host='0.0.0.0') # Makes the Web App accessible from the outside world
                            # The flask web app runs on port 5000 by default. Ensure the port is open on your machine
                            # If you are on an Azure VM, create a rule in the Network Adapter
                            # see https://docs.microsoft.com/en-us/azure/virtual-machines/windows/nsg-quickstart-portal

Overwriting flaskwebapp/app.py


In [108]:
%%writefile flaskwebapp/wsgi.py
import sys
sys.path.append('/code/')
from app import app as application
from driver import *

def create():
    print("Initialising")
    init()
    application.run(host='127.0.0.1', port=5000)

Overwriting flaskwebapp/wsgi.py


#### Step 5

<b> List all the python requirements for your web app here. They will be pip installed in the Docker Image </b>

In [109]:
%%writefile flaskwebapp/requirements.txt
h5py
wtforms
nltk
pillow
click==6.7
configparser==3.5.0
Flask==0.11.1
gunicorn==19.6.0
json-logging-py==0.2
MarkupSafe==1.0
olefile==0.44
requests==2.12.3

Overwriting flaskwebapp/requirements.txt


<b>Creating a proxy between ports 88 and 5000 on Nginx Server </b>

In [110]:
%%writefile flaskwebapp/nginx/app
server {
    listen 88;
    server_name _;
 
    location / {
    include proxy_params;
    proxy_pass http://127.0.0.1:5000;
    proxy_connect_timeout 5000s;
    proxy_read_timeout 5000s;
  }
}

Overwriting flaskwebapp/nginx/app


Specify the name of the image as username/repository_name

In [111]:
image_name = "akshaymehra/bidirectional_lstm_ner_ddc"
application_path = 'flaskwebapp'
docker_file_location = path.join(application_path, 'dockerfile')
print(docker_file_location)

flaskwebapp/dockerfile


In [112]:
%%writefile flaskwebapp/gunicorn_logging.conf

[loggers]
keys=root, gunicorn.error

[handlers]
keys=console

[formatters]
keys=json

[logger_root]
level=INFO
handlers=console

[logger_gunicorn.error]
level=ERROR
handlers=console
propagate=0
qualname=gunicorn.error

[handler_console]
class=StreamHandler
formatter=json
args=(sys.stdout, )

[formatter_json]
class=jsonlogging.JSONFormatter

Overwriting flaskwebapp/gunicorn_logging.conf


In [113]:
%%writefile flaskwebapp/kill_supervisor.py
import sys
import os
import signal


def write_stdout(s):
    sys.stdout.write(s)
    sys.stdout.flush()

# this function is modified from the code and knowledge found here: http://supervisord.org/events.html#example-event-listener-implementation
def main():
    while 1:
        write_stdout('READY\n')
        # wait for the event on stdin that supervisord will send
        line = sys.stdin.readline()
        write_stdout('Killing supervisor with this event: ' + line);
        try:
            # supervisord writes its pid to its file from which we read it here, see supervisord.conf
            pidfile = open('/tmp/supervisord.pid','r')
            pid = int(pidfile.readline());
            os.kill(pid, signal.SIGQUIT)
        except Exception as e:
            write_stdout('Could not kill supervisor: ' + e.strerror + '\n')
            write_stdout('RESULT 2\nOK')

main()


Overwriting flaskwebapp/kill_supervisor.py


In [114]:
%%writefile flaskwebapp/etc/supervisord.conf 
[supervisord]
logfile=/tmp/supervisord.log ; (main log file;default $CWD/supervisord.log)
logfile_maxbytes=50MB        ; (max main logfile bytes b4 rotation;default 50MB)
logfile_backups=10           ; (num of main logfile rotation backups;default 10)
loglevel=info                ; (log level;default info; others: debug,warn,trace)
pidfile=/tmp/supervisord.pid ; (supervisord pidfile;default supervisord.pid)
nodaemon=true               ; (start in foreground if true;default false)
minfds=1024                  ; (min. avail startup file descriptors;default 1024)
minprocs=200                 ; (min. avail process descriptors;default 200)

[program:gunicorn]
command=bash -c "gunicorn --workers 1 -m 007 --timeout 100000 --capture-output --error-logfile - --log-level debug --log-config gunicorn_logging.conf \"wsgi:create()\""
directory=/code
redirect_stderr=true
stdout_logfile =/dev/stdout
stdout_logfile_maxbytes=0
startretries=2
startsecs=20

[program:nginx]
command=/usr/sbin/nginx -g "daemon off;"
startretries=2
startsecs=5
priority=3

[eventlistener:program_exit]
command=python kill_supervisor.py
directory=/code
events=PROCESS_STATE_FATAL
priority=2

Overwriting flaskwebapp/etc/supervisord.conf


<b> Creating a Custom Image with all the requirements for our web app </b>

In [115]:
%%writefile flaskwebapp/dockerfile

FROM ubuntu:16.04
MAINTAINER Akshay Mehra <t-akmehr@microsoft.com>

RUN mkdir /code
WORKDIR /code
ADD . /code/
ADD etc /etc

RUN apt-get update && apt-get install -y --no-install-recommends \
        openmpi-bin \
        python \ 
        python-dev \ 
        python-setuptools \
        python-pip \
        supervisor \
        nginx && \
    rm /etc/nginx/sites-enabled/default && \
    cp /code/nginx/app /etc/nginx/sites-available/ && \
    ln -s /etc/nginx/sites-available/app /etc/nginx/sites-enabled/ && \
    pip install tensorflow && \
    pip install keras && \
    pip install -r /code/requirements.txt

RUN python -m nltk.downloader punkt

EXPOSE 88
CMD ["supervisord", "-c", "/etc/supervisord.conf"]

Overwriting flaskwebapp/dockerfile


In [116]:
!sudo docker build -t $image_name -f $docker_file_location $application_path --no-cache

Sending build context to Docker daemon 250.5 MB
Step 1/10 : FROM ubuntu:16.04
 ---> d355ed3537e9
Step 2/10 : MAINTAINER Akshay Mehra <t-akmehr@microsoft.com>
 ---> Running in cdd1831aea3a
 ---> 05b7f0caf08c
Removing intermediate container cdd1831aea3a
Step 3/10 : RUN mkdir /code
 ---> Running in 211795c56bdb
 ---> df5d208e6343
Removing intermediate container 211795c56bdb
Step 4/10 : WORKDIR /code
 ---> dbccf55f6be7
Removing intermediate container aae7d344fc69
Step 5/10 : ADD . /code/
 ---> 45386af3e38b
Removing intermediate container b360aac102fa
Step 6/10 : ADD etc /etc
 ---> 955848fa4279
Removing intermediate container d51cb3d6a2aa
Step 7/10 : RUN apt-get update && apt-get install -y --no-install-recommends         openmpi-bin         python         python-dev         python-setuptools         python-pip         supervisor         nginx &&     rm /etc/nginx/sites-enabled/default &&     cp /code/nginx/app /etc/nginx/sites-available/ &&     ln -s /etc/nginx/sites-available/app /etc/ngi

In [117]:
!sudo docker tag $image_name "docker.io/akshaymehra/bidirectional_lstm_ner_ddc"

In [118]:
!sudo docker login -u akshaymehra -p Akshay2404

Login Succeeded


In [119]:
!sudo docker push $image_name

The push refers to a repository [docker.io/akshaymehra/bidirectional_lstm_ner_ddc]

[1B5fb386fd: Preparing 
[1B9162ee1e: Preparing 
[1B2b1304d2: Preparing 
[1B6ef8638b: Preparing 
[1Bb7eae922: Preparing 
[1Bc118947e: Preparing 
[1Bf951edf5: Preparing 
[1B2a55830d: Preparing 
[1B2c9a24fc: Preparing 
[9B9162ee1e: Pushed    857 MB/843.8 MB[10A[2K[9A[2K[10A[2K[9A[2K[9A[2K[10A[2K[7A[2K[10A[2K[7A[2K[10A[2K[9A[2K[9A[2K[10A[2K[9A[2K[10A[2K[9A[2K[7A[2K[9A[2K[7A[2K[10A[2K[9A[2K[7A[2K[9A[2K[7A[2K[9A[2K[7A[2K[5A[2K[7A[2K[9A[2K[10A[2K[9A[2K[10A[2K[9A[2K[7A[2K[9A[2K[7A[2K[9A[2K[7A[2K[9A[2K[7A[2K[9A[2K[7A[2K[10A[2K[1A[2K[10A[2K[9A[2K[10A[2K[9A[2K[7A[2K[9A[2K[7A[2K[9A[2K[7A[2K[9A[2K[7A[2K[9A[2K[7A[2K[10A[2K[9A[2K[10A[2K[9A[2K[7A[2K[9A[2K[7A[2K[10A[2K[9A[2K[9A[2K[7A[2K[10A[2K[7A[2K[9A[2K[10A[2K[9A[2K[10A[2K[10A[2K[9A[2K[10A[2K[9A[2K[9A[2

In [120]:
print('Docker image name {}'.format(image_name)) 

Docker image name akshaymehra/bidirectional_lstm_ner_ddc
