In [21]:
from flask import Flask,render_template,url_for,request
import pandas as pd
import numpy as np
from keras import Input, Model
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam
from keras.layers.embeddings import Embedding
from keras.preprocessing.text import Tokenizer
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, SpatialDropout1D, Bidirectional, Dense, \
    LSTM, Conv1D, Dropout, concatenate

from preprocessor import clean_txt
import nltk
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\10668186\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\10668186\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [4]:
from keras import backend as K
from keras import constraints, initializers, regularizers
from keras.engine import Layer


class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight(shape=(input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight(shape=(input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
                              K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)

        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        return input_shape[0], self.features_dim

In [17]:
def build_model():
    
    max_features = 15000
    max_len = 150
    embed_size = 150
    
    embedding_matrix = np.genfromtxt("glove_trained_embedding.csv", delimiter=',')
    main_input1 = Input(shape=(max_len,), name='main_input1')
    x1 = (Embedding(max_features + 1, 300, input_length=max_len,
                    weights=[embedding_matrix], trainable=False))(main_input1)
    x1 = SpatialDropout1D(0.4)(x1)
    x2 = Bidirectional(LSTM(75, dropout=0.5, return_sequences=True))(x1)
    x = Dropout(0.55)(x2)
    x = Bidirectional(LSTM(50, dropout=0.5, return_sequences=True))(x)
    hidden = concatenate([
        Attention(max_len)(x1),
        Attention(max_len)(x2),
        Attention(max_len)(x)
    ])
    hidden = Dense(32, activation='selu')(hidden)
    hidden = Dropout(0.5)(hidden)
    hidden = Dense(16, activation='selu')(hidden)
    hidden = Dropout(0.5)(hidden)
    output_lay1 = Dense(8, activation='sigmoid')(hidden)
    model = Model(inputs=[main_input1], outputs=output_lay1)
    model.load_weights(filepath='final_clf_model.hdf5')
    model.compile(loss="binary_crossentropy", optimizer=Adam(), metrics=['binary_accuracy'])
    
    return model

In [None]:
app = Flask(__name__)

@app.route('/')
def home():
    return render_template('home.html')


@app.route('/predict',methods=['POST'])

def predict():
    
    if request.method == 'POST':
        message = request.form['message']
        data = [message]
        
        input_text = []
        model = build_model()
        clean_text =  clean_txt(data)
        input_txt.append(clean_text)
        max_features = 15000
        max_len = 150


        tk = Tokenizer(lower=True, filters='', num_words=max_features, oov_token=True)
        tk.fit_on_texts(input_txt)
        tokenized = tk.texts_to_sequences(input_txt)
        x_test = pad_sequences(tokenized, maxlen=max_len)

        vpp = model.predict(x_test)
        vpp = vpp.flatten(order='C')
        vpp_str = list(vpp)
        vpp_str = ', '.join(map(str, vpp_str))
        
    return render_template('result.html', foobar = vpp_str)



if __name__ == "__main__":
    app.run(debug=True)
    
    
    


In [87]:
def predict(text):
    
    input_text = []
    model = build_model()
    clean_text =  clean_txt(text)
    input_txt.append(clean_text)
    max_features = 15000
    max_len = 150
    
    
    tk = Tokenizer(lower=True, filters='', num_words=max_features, oov_token=True)
    tk.fit_on_texts(input_txt)
    tokenized = tk.texts_to_sequences(input_txt)
    x_test = pad_sequences(tokenized, maxlen=max_len)
    
    vpp = model.predict(x_test)
    vpp = vpp.flatten(order='C')
    
    
    
    


In [20]:
model = build_model()

In [79]:
text = "if america had another years of obama ideology via hillary we would be well on our way to being shithole country"

clean_text =  clean_txt(text)
print(clean_text)

if america had another years of obama ideology via hillary we would be well on our way to being shithole country


In [80]:
input_txt = []
input_txt.append(clean_text)

In [81]:
max_features = 15000
max_len = 150
tk = Tokenizer(lower=True, filters='', num_words=max_features, oov_token=True)
tk.fit_on_texts(input_txt)
tokenized = tk.texts_to_sequences(input_txt)
x_test = pad_sequences(tokenized, maxlen=max_len)


In [82]:
x_test.shape

(1, 150)

In [108]:
vpp = model.predict(x_test)

In [121]:
vpp = list(vpp)
', '.join(map(str, vpp))

'0.3548488, 0.7206501, 0.10172686, 0.015522897, 0.03900522, 0.2573474, 0.020830274, 0.55477875'

In [110]:
vpp = vpp.flatten(order='C')

In [86]:
print('Instance:', input_txt[0])
print('    Incites Violence:', vpp[0])
print('    No Violence:', 1-vpp[0])
print('  Scope:-')
print('    Direct:', vpp[1])
print('    Generalized:', 1-vpp[1])
print('  Hate Group:-')
print('    Gender:', vpp[2])
print('    Race:', vpp[3])
print('    Origin:', vpp[4])
print('    Disability:', vpp[5])
print('    Religion:', vpp[6])
print('    Sexual Orientation:', vpp[7])
print()

Instance: if america had another years of obama ideology via hillary we would be well on our way to being shithole country
    Incites Violence: 0.3548488
    No Violence: 0.6451511979103088
  Scope:-
    Direct: 0.7206501
    Generalized: 0.2793499231338501
  Hate Group:-
    Gender: 0.10172686
    Race: 0.015522897
    Origin: 0.03900522
    Disability: 0.2573474
    Religion: 0.020830274
    Sexual Orientation: 0.55477875



In [24]:
def predict():
    
    model = build_model()
    model.load_weights(filepath='final_clf_model.hdf5')
    model.compile(loss="binary_crossentropy", optimizer=Adam(), metrics=['binary_accuracy'])
    return model