In [11]:
from flask import Flask, render_template, request
from wtforms import Form, TextAreaField, validators
import pickle
import pandas as pd
from bs4 import BeautifulSoup
import re
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer

app = Flask(__name__)

######## Classification
def classify(text):
    with open('../model/classification_models/gnb_tvec.pickle', 'rb') as f:
        classifier = pickle.load(f)
    
    cleaned_text = clean_text(text)
    
    text_series = pd.Series(cleaned_text)
    
    vectorized_text = vectorizer(text_series)
    
    pred = classifier.predict(vectorized_text.todense())
    
    label = {0: '3Dprinting', 1: 'sewing'}
    
    return label[pred[0]]

def vectorizer(text):
    with open('../model/transformers/gnb_vectorizer.pickle', 'rb') as f:
        vect = pickle.load(f)
    
    return vect.transform(text)

######## Data cleaning
# Remove html
def remove_html(text):
    soup = BeautifulSoup(text)
    
    return soup.get_text()

# Remove url
def remove_url(text):
    url = re.compile(r'https?://\S+|www\.\S+')
    
    return url.sub(r'', text)

# Remove not words
def remove_not_words(text):
    new_text = re.sub("[^a-zA-Z]", " ", text)
    
    return new_text

# Stemming
def stemming(text):
    p_stemmer = PorterStemmer()
    split_list = text.lower().split()
    stemmed = [p_stemmer.stem(i) for i in split_list]
    
    return " ".join(stemmed)

# Clean data
def clean_text(text):
    rem_html_text = remove_html(text)
    
    rem_url_text = remove_url(rem_html_text)
    
    rem_not_word_text = remove_not_words(rem_url_text)
    
    stem_text = stemming(rem_not_word_text)
    
    return stem_text



######## Flask
class Form(Form):
    subreddit_text = TextAreaField('',
                                [validators.DataRequired(),
                                validators.length(min=1)])

@app.route('/')
def index():
    form = Form(request.form)
    return render_template('form.html', form=form)

@app.route('/results', methods=['POST'])
def results():
    form = Form(request.form)
    if request.method == 'POST' and form.validate():
        text = request.form['subreddit_text']
        y = classify(text)
        return render_template('results.html',
                                content=text,
                                prediction=y)
    return render_template('form.html', form=form)

######## run the app
if __name__ == '__main__':
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [16/Jul/2020 18:03:21] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [16/Jul/2020 18:03:21] "GET /static/style.css HTTP/1.1" 404 -
127.0.0.1 - - [16/Jul/2020 18:03:29] "POST /results HTTP/1.1" 200 -
127.0.0.1 - - [16/Jul/2020 18:03:29] "GET /static/style.css HTTP/1.1" 404 -
127.0.0.1 - - [16/Jul/2020 18:03:32] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [16/Jul/2020 18:03:32] "GET /static/style.css HTTP/1.1" 404 -
127.0.0.1 - - [16/Jul/2020 18:03:40] "POST /results HTTP/1.1" 200 -
127.0.0.1 - - [16/Jul/2020 18:03:40] "GET /static/style.css HTTP/1.1" 404 -
