In [None]:
!pip install flask-ngrok

Collecting flask-ngrok
  Downloading https://files.pythonhosted.org/packages/af/6c/f54cb686ad1129e27d125d182f90f52b32f284e6c8df58c1bae54fa1adbc/flask_ngrok-0.0.25-py3-none-any.whl
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25


In [None]:
# data analysis and wrangling
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd
import numpy as np

# flask
from flask import Flask, render_template, url_for, request
from flask_ngrok import run_with_ngrok

# text
import re
import spacy
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from gensim.corpora import Dictionary
from gensim.utils import simple_preprocess
from gensim.matutils import corpus2csc
from IPython.display import display, HTML

# Save objects
import joblib

# Set working directory
import os
os.chdir('/content/drive/MyDrive/Colab Notebooks/P5')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


**Import**

In [None]:
df = joblib.load('df.sav')
texts_cleaned = joblib.load('texts_cleaned.sav')
dictionary = joblib.load('dictionary.sav')
model = joblib.load('model.sav')
binarizer = joblib.load('binarizer.sav')

Function

In [None]:
# Lemmatization function
nlp = spacy.load('en', disable=['parser', 'ner'])

def lemmatization(doc, nlp, banned_postags=['PUNCT', 'DET', 'PRON', 'CONJ',
                                            'ADV', 'INTJ']):
    doc = nlp(doc)
    return [token.lemma_ for token in doc if token.pos_ not in banned_postags]
    

# Remove stopwords
stop_words = stopwords.words('english')

# stop_words.extend([])
def remove_stopwords(doc):
    return " ".join([word for word in doc if word not in stop_words])  

In [None]:
# All processing
def clean_text(text):
    text_cleaned = (pd.Series(text)
                    # delete code parts
                    .apply(lambda x: re.sub('<code>(.|\n)*?</code>', '', x))
                    # remove HTML tags
                    .apply(lambda x: re.sub('<[^<]+?>', '', x))
                    # preprocess
                    .apply(lambda x: simple_preprocess(x, min_len=3,
                                                      deacc=False))
                    # remove stop word
                    .apply(remove_stopwords)
                    # lemmatization
                    .apply(lambda x: lemmatization(x, nlp=nlp))
                    # make a bag of word
                    .apply(dictionary.doc2bow)
                   )
    # Convert a streamed corpus into a sparse matrix
    text_cleaned = corpus2csc(text_cleaned,
                              num_terms=len(dictionary)).transpose()
    return text_cleaned

predict function

In [None]:
def make_prediction(title, body):
  # concatenation title + body
  text = title+' '+body

  # cleaning
  text = clean_text(text)

  # prediction
  model.predict(text)

  #get the tags in text form
  tags_pred = binarizer.inverse_transform(np.array(
      model.predict(text)).reshape(1,40))
  
  return ([i for i in tags_pred[0]])

In [None]:
make_prediction('', "Assigning TCP/IP Ports for python In-House Application Use <p>I've written a WCF Service hosted by a Windows Service and python it needs to listen on a known TCP/IP port.  From what range can I safely allocate a port for use within my organization?  That port will be embedded in the config files for the service and the clients that are consuming the service.</p>")

['python']

**API**

In [None]:
app = Flask(__name__)
run_with_ngrok(app)
 
@app.route('/', methods = ['GET','POST'])
def main():
    return """<!DOCTYPE html>
                <html>
                    <head>
                         <title>Catégoriser questions</title>
                    </head>
                    <body>
                        <div align="center" class="bg-info">
                            <h1>Prediction de Tags</h1>
                        </div>
                        <div class="big" align="center">
                            <form action="predict" method="POST">
                                <h3>Entrez un titre</h3>
                                <textarea name="title" rows="1" cols="70"></textarea>
                                <br>
                                <h3>Entez un contenu</h3>
                                <textarea name="body" rows="20" cols="70"></textarea>
                                   <br><br><br>
                               <input type="submit" name="" value="Predict" class="btn btn-info">
                              </form>
                         </div>
                    </body>
                </html>
                """

@app.route('/predict', methods = ['POST'])
def predict():
    title = ""
    body = ""
    if request.method == 'POST':
        title = str(request.form['title'])
        body = str(request.form['body'])
    tags = make_prediction(title, body)
    return """
            <!DOCTYPE html>
            <html>
                <head>
                     <title>Catégoriser questions</title>
                </head>
                <body>
                    <div class="big" align="center">
                                <h4>Titre saisi</h4>
                                <textarea name="title" rows="1" cols="100"disabled>"""+ title +"""</textarea>
                                <br>
                                <h4>Contenu saisi</h4>
                                <textarea name="body" rows="7" cols="100" disabled>"""+ body +"""</textarea>
                                <br><br>
                     </div>
                     <div align="center">
                          <h2>Tags proposes :</h2>
                          <textarea rows="5" cols="50" disabled>"""+ str(tags) +"""</textarea>
                     </div>
                     <br><br><br>
                     <form action="/" method="POST" align="center">
                         <input type="submit" name="" value="Try again" class="btn btn-info">
                     </form>
                </body>
            </html>
        """
if __name__ == '__main__':
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://4443463863e6.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


127.0.0.1 - - [10/Mar/2021 17:01:59] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [10/Mar/2021 17:01:59] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
127.0.0.1 - - [10/Mar/2021 17:02:23] "[37mPOST /predict HTTP/1.1[0m" 200 -
127.0.0.1 - - [10/Mar/2021 17:03:06] "[37mPOST / HTTP/1.1[0m" 200 -
127.0.0.1 - - [10/Mar/2021 17:03:09] "[37mPOST /predict HTTP/1.1[0m" 200 -
127.0.0.1 - - [10/Mar/2021 17:09:05] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [10/Mar/2021 17:09:45] "[37mPOST /predict HTTP/1.1[0m" 200 -
