In [25]:
from flask import Flask, render_template, request
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import pandas as pd
import pickle
from sklearn.feature_extraction.text import HashingVectorizer

In [26]:
# loading in the pickled model
clf2 = pickle.load(open('logReg_model', 'rb'))

In [27]:
# importing x_train data
X_train = pd.read_csv("raw_data_econ/X_train_econ.csv", encoding="'iso-8859-1'")
X_train.head()

Unnamed: 0.1,Unnamed: 0,abstract
0,1788,Conventional explanations post-1991 growth Ind...
1,2167,We review nonparametric `` revealed preference...
2,976,We study role domestic global factors payoffs ...
3,2132,This paper part conference `` Beyond Pillar 3 ...
4,1389,We study effects securitization interbank lend...


In [28]:
# function to clean text 
stop_words = set(stopwords.words('english'))
def clean_text(text):
    word_tokens = word_tokenize(text)
    filtered_sentence = []
    for w in word_tokens:
        if w not in stop_words:
            filtered_sentence.append(w)
    string = ' '.join(filtered_sentence)
    return string

# testing clean_text()
test = "This is only a test! Testing, testing, and a 1, and a 2, and a 3!"
cleaned = clean_text(test)
cleaned

'This test ! Testing , testing , 1 , 2 , 3 !'

In [29]:
# Function to predict if an input string is likely to be in top journal
# note: copy/pasted from Econ_machineLearn.ipynb
hash_vectorizer = HashingVectorizer(analyzer='word', ngram_range=(1, 2))
hash_vectorizer.fit(X_train)
def model_predict(s):
    string = []
    string.append(s)
    vectorized = hash_vectorizer.transform(string)
    probab = round(max(clf2.predict_proba(vectorized)[0])* 100, 2) 
    prediction = clf2.predict(vectorized)[0]
    if prediction == 1:
        result = "Predicted to be in the top 20 Economics journals"
    else:
        result = "Predicted to NOT be in the top 20 Economics journals"
    return result + " with a probability of " + str(probab) + "%."

#testing
prediction = model_predict(cleaned)
prediction

'Predicted to NOT be in the top 20 Economics journals with a probability of 67.06%.'

In [30]:
# initialize flask 
app = Flask(__name__)    

In [31]:
# render the homepage
@app.route("/")
def render_index():
     return render_template('index_econ.html')

In [32]:
# When user submits text, returns prediction
@app.route("/", methods=["POST"])
def post_form():
    text = request.form['text']
    text_clean = clean_text(text)
    predict = model_predict(text_clean)
    return predict
    

In [None]:
# run app
if __name__=='__main__':
    app.run()