In [54]:
!pip install flask_ngrok
!pip install flask-cors



In [55]:
data_dir = "/content/drive/Shareddrives/SWM - NER/models/BiLSTM/"

In [56]:
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
from nltk import word_tokenize

class Parser:

    def __init__(self):
        # ::Hard coded char lookup ::
        self.char2Idx = {"PADDING":0, "UNKNOWN":1}
        for c in " 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.,-_()[]{}!?:;#'\"/\\%$`&=*+@^~|":
            self.char2Idx[c] = len(self.char2Idx)
        # :: Hard coded case lookup ::
        self.case2Idx = {'numeric': 0, 'allLower':1, 'allUpper':2, 'initialUpper':3, 'other':4, 'mainly_numeric':5, 'contains_digit': 6, 'PADDING_TOKEN':7}

    def load_models(self, loc=None):
        if not loc:
            loc = os.path.join(os.path.expanduser('~'), '.ner_model')
        self.model = load_model(os.path.join(loc,"model.h5"))
        # loading word2Idx
        np_load_old = np.load
        np.load = lambda *a, **k: np_load_old(*a, allow_pickle=True, **k)
        self.word2Idx = np.load(os.path.join(loc,"word2Idx.npy")).item()
        np.load = np_load_old
        # loading idx2Label
        np_load_old = np.load
        np.load = lambda *a, **k: np_load_old(*a, allow_pickle=True, **k)
        self.idx2Label = np.load(os.path.join(loc,"idx2Label.npy")).item()
        np.load = np_load_old


    def getCasing(self,word, caseLookup):   
        casing = 'other'
        
        numDigits = 0
        for char in word:
            if char.isdigit():
                numDigits += 1
                
        digitFraction = numDigits / float(len(word))
        
        if word.isdigit(): #Is a digit
            casing = 'numeric'
        elif digitFraction > 0.5:
            casing = 'mainly_numeric'
        elif word.islower(): #All lower case
            casing = 'allLower'
        elif word.isupper(): #All upper case
            casing = 'allUpper'
        elif word[0].isupper(): #is a title, initial char upper, then all lower
            casing = 'initialUpper'
        elif numDigits > 0:
            casing = 'contains_digit'  
        return caseLookup[casing]

    def createTensor(self,sentence, word2Idx,case2Idx,char2Idx):
        unknownIdx = word2Idx['UNKNOWN_TOKEN']
    
        wordIndices = []    
        caseIndices = []
        charIndices = []
            
        for word,char in sentence:  
            word = str(word)
            if word in word2Idx:
                wordIdx = word2Idx[word]
            elif word.lower() in word2Idx:
                wordIdx = word2Idx[word.lower()]                 
            else:
                wordIdx = unknownIdx
            charIdx = []
            for x in char:
                if x in char2Idx.keys():
                    charIdx.append(char2Idx[x])
                else:
                    charIdx.append(char2Idx['UNKNOWN'])   
            wordIndices.append(wordIdx)
            caseIndices.append(self.getCasing(word, case2Idx))
            charIndices.append(charIdx)
            
        return [wordIndices, caseIndices, charIndices]

    def addCharInformation(self, sentence):
        return [[word, list(str(word))] for word in sentence]

    def padding(self,Sentence):
        Sentence[2] = pad_sequences(Sentence[2],52,padding='post')
        return Sentence

    def predict(self,Sentence):
        Sentence = words =  word_tokenize(Sentence)
        Sentence = self.addCharInformation(Sentence)
        Sentence = self.padding(self.createTensor(Sentence,self.word2Idx,self.case2Idx,self.char2Idx))
        tokens, casing,char = Sentence
        tokens = np.asarray([tokens])     
        casing = np.asarray([casing])
        char = np.asarray([char])
        pred = self.model.predict([tokens, casing,char], verbose=False)[0]   
        pred = pred.argmax(axis=-1)
        pred = [self.idx2Label[x].strip() for x in pred]
        return list(zip(words,pred))

In [57]:
p = Parser()
p.load_models(data_dir)
p.predict("Tempe is a place")

[('Tempe', 'B-ORG'), ('is', 'O'), ('a', 'O'), ('place', 'O')]

In [58]:
import json

In [59]:
def pred_wrapper(data_arg):
  data = data_arg['data'].replace("."," ")
  sentences = [i.strip() for i in data.split(" ") if i != '']
  res = p.predict(data_arg['data'])
  ret_list = []
  ner_ctr = 0
  for word,tag in res:
    ##print(type(tag))
    if tag != 'O':
        ret_list.append([word, tag])
        ner_ctr+=1
  return json.dumps({'count': ner_ctr, 'data': ret_list})

In [60]:
pred_wrapper({'data':"I am at tempe. Tempe in Az"})



'{"count": 2, "data": [["Tempe", "B-ORG"], ["Az", "B-LOC"]]}'

In [61]:
from flask import Flask, jsonify
from flask_ngrok import run_with_ngrok
from flask import request
import json
from flask_cors import CORS, cross_origin
import warnings
warnings.filterwarnings('ignore')
import pickle

In [62]:
def web_app():
    app = Flask(__name__)
    run_with_ngrok(app)
    CORS(app)
    @app.route('/', methods=['GET', 'POST'])
    @cross_origin()
    def run_app():
        return "/pred_bilstm/ for Predictions."

    @app.route('/pred_bilstm/', methods=['GET', 'POST'])
    def pred_app():
        jsonData = request.get_json(force=True)
        print(jsonData)
        res = pred_wrapper(jsonData)
        return res
    app.run()

web_app()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://d4b8eb4b25cf.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


127.0.0.1 - - [05/Apr/2021 19:28:53] "[32mPOST /pred_bilstm HTTP/1.1[0m" 308 -


{'data': "Germany representative to the European Union's veterinary committee Werner Zwingmann said on Wednesday consumers should buy sheepmeat from countries other than Britain until the scientific advice."}


127.0.0.1 - - [05/Apr/2021 19:28:54] "[37mPOST /pred_bilstm/ HTTP/1.1[0m" 200 -
