**Import des packages, chargement des modèles et définition des fonctions**

In [1]:
!pip install flask-ngrok



In [6]:
import pandas as pd
import numpy as np
from operator import itemgetter
import pickle
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from gensim.models.ldamodel import LdaModel
from gensim.corpora.dictionary import Dictionary

import re
import nltk
from nltk.stem.snowball import EnglishStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from bs4 import BeautifulSoup
import spacy
import en_core_web_sm
nlp = en_core_web_sm.load()

from flask import Flask, render_template, url_for, request
from flask_ngrok import run_with_ngrok

In [9]:
def lower_case(text):
  return text.lower()

def rm_bad_character(text):
  char = [",", ";", ".", "?", "!", "'", ":", "*", "/", "\n", "(", ")", "|", "_", "`",">","<","=", "$", "%", '"', "[","]","{","}","-"]
  dig = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
  for word in char:
    text = text.replace(word,'')
  for num in dig:
    text = text.replace(num,'')
  return text

def rm_stopwords(text):
  stop_list = stopwords.words('english')
  tokenizer = nltk.RegexpTokenizer('\s+', gaps=True)
  doc = tokenizer.tokenize(text)
  words = [item for item in doc if item not in stop_list]
  return ' '.join(words)

def lem(text):
  tokenizer = nltk.RegexpTokenizer('\s+', gaps=True)
  lemmatizer = WordNetLemmatizer()
  doc = tokenizer.tokenize(text)
  words = [lemmatizer.lemmatize(y, pos='v') for y in doc]
  return ' '.join(words)


In [10]:
def cleansing(text):
    text = lower_case(text)
    text = rm_bad_character(text)
    text = rm_stopwords(text)
    text = lem(text)
    return text

In [5]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [6]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

In [11]:
clf = pickle.load(open('API/model_lr.pkl', 'rb'))
mlb = pickle.load(open('API/multiLabBin.pkl', 'rb'))
vector = pickle.load(open('API/vectorization.pkl', 'rb'))
ldaModel = pickle.load(open('API/model_lda.pkl', 'rb'))
dictionnary = pickle.load(open('API/dictionnary.pkl', 'rb'))

In [12]:
def make_prediction(title,body):
  #on fusionne le title et le body
  full_text = title+' '+body

  #on nettoie notre data
  text_clean = cleansing(full_text)

  #Partie prédiction supervisée
  #on insère notre résultat dans une liste pour effectuer les opérations suivantes
  l = []
  l.append(text_clean)

  #on vectorise
  input_to_pred = vector.transform(l)

  #on passe à la prédiction
  input_pred = clf.predict(input_to_pred)

  #on récupère la ou les valeurs prédites
  output = mlb.inverse_transform(input_pred)
  tags = str(output).replace("[(",'').replace(")]",'').replace(","," ")

  #Partie prédiction non supervisée
  #création du BOW à partir du dictionnaire
  bow = dictionnary.doc2bow(text_clean.split())

  #on récupère le meilleur topic
  best_topic = max(ldaModel.get_document_topics(bow),key=itemgetter(1))[0]

  #on récupère les 5 premiers keywors
  keywords = [x[0] for x in ldaModel.show_topic(best_topic, topn=5)]
  
  return (keywords, tags)

In [13]:
Body='I have been developing a private python package (my first py package) and want to change the name while retainng all my git commits.I formatted it in a similar way to Cookie Cutter Data Science where all the code lives in the src dir. This has been fine while building but when upload it to a server or another computer I don''t want to have to call it like... from src.data import * I have tried just renaming it using git mv src/ newname/ but when I push this change to Github all my files are lost (i know they are there but I would prefer to easily see all my past changes). I shared it in the form of a .whl file. So do I just have to rename it and deal with loosing the changes? Or is there a different git command to use. Or is there some configuration in the setup.py file i can do? Here is my setup.py for reference.'

In [14]:
Title='Change private python package name while retaining git history'

In [15]:
keywords, output = make_prediction(Title,Body)

In [16]:
print(output)
print(keywords)

'git' 
['git', 'branch', 'commit', 'change', 'date']


**API**

In [17]:
app = Flask(__name__)
run_with_ngrok(app)
 
@app.route('/', methods = ['GET','POST'])
def main():
    return """<!DOCTYPE html>
                <html>
                    <head>
                         <title>Catégoriser questions</title>
                    </head>
                    <body>
                        <div align="center" class="bg-info">
                            <h1>Prediction de Tags</h1>
                        </div>
                        <div class="big" align="center">
                            <form action="predict" method="POST">
                                <h3>Entrez un titre</h3>
                                <textarea name="title" rows="1" cols="70"></textarea>
                                <br>
                                <h3>Entez un contenu</h3>
                                <textarea name="body" rows="20" cols="70"></textarea>
                                   <br><br><br>
                               <input type="submit" name="" value="Predict" class="btn btn-info">
                              </form>
                         </div>
                    </body>
                </html>
                """

@app.route('/predict', methods = ['POST'])
def predict():
    title = ""
    body = ""
    if request.method == 'POST':
        title = str(request.form['title'])
        body = str(request.form['body'])
    keywords, tags = make_prediction(title, body)
    return """
            <!DOCTYPE html>
            <html>
                <head>
                     <title>Catégoriser questions</title>
                </head>
                <body>
                    <div class="big" align="center">
                                <h4>Titre saisi</h4>
                                <textarea name="title" rows="1" cols="100"disabled>"""+ title +"""</textarea>
                                <br>
                                <h4>Contenu saisi</h4>
                                <textarea name="body" rows="7" cols="100" disabled>"""+ body +"""</textarea>
                                <br><br>
                    </div>
                     <div class="bg-info" align="center">
                         <h2>Mots clés proposes :</h2>
                     </div>
                     <div align="center">
                          <textarea rows="1" cols="10" disabled>"""+ keywords[0] +"""</textarea>
                          <textarea rows="1" cols="10" disabled>"""+ keywords[1] +"""</textarea>
                          <textarea rows="1" cols="10" disabled>"""+ keywords[2] +"""</textarea>
                          <textarea rows="1" cols="10" disabled>"""+ keywords[3] +"""</textarea>
                          <textarea rows="1" cols="10" disabled>"""+ keywords[4] +"""</textarea>
                          <h2>Tags proposes :</h2>
                          <textarea rows="1" cols="10" disabled>"""+ str(tags) +"""</textarea>
                     </div>
                     <br><br><br>
                     <form action="/" method="POST" align="center">
                         <input type="submit" name="" value="Try again" class="btn btn-info">
                     </form>
                </body>
            </html>
        """
if __name__ == '__main__':
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [08/May/2022 16:56:28] "GET / HTTP/1.1" 200 -


 * Running on http://e729-41-62-14-19.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


127.0.0.1 - - [08/May/2022 16:56:43] "POST /predict HTTP/1.1" 200 -
