In [28]:
import os
import re
import string

import pandas as pd
import numpy as np

from collections import Counter

import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import regularizers
from tensorflow.keras import preprocessing
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns


import warnings
warnings.filterwarnings('ignore')


In [29]:
main_data=pd.read_csv("train.csv")
data=main_data.copy()
data.drop(columns=['id'],axis=1,inplace=True)


In [30]:
data1=data[data['label']==1]
data0=data[data['label']==0]
data=pd.concat([data,data1,data1], axis=0) #This line duplicated the minority class data


In [5]:
def remove_emoji(text):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    
    return emoji_pattern.sub(r'', text)


def clean_text(text ): 
    delete_dict = {sp_character: '' for sp_character in string.punctuation} 
    delete_dict[' '] = ' ' 
    table = str.maketrans(delete_dict)
    text1 = text.translate(table)
    textArr= text1.split()
    text2 = ' '.join([w for w in textArr if ( not w.isdigit() and  ( not w.isdigit() and len(w)>2))]) 
    
    return text2.lower()

In [6]:
data['text'] = data['text'].apply(remove_emoji)
data['text'] = data['text'].apply(clean_text)
data['Num_words_text'] = data['text'].apply(lambda x:len(str(x).split())) 

train_data,test_data= train_test_split(data, test_size=0.2)
train_data.reset_index(drop=True,inplace=True)
test_data.reset_index(drop=True,inplace=True)

In [None]:
#train and validation dataset splitting
X_train, X_valid, y_train, y_valid = train_test_split(train_data['text'].tolist(),\
                                                      train_data['label'].tolist(),\
                                                      test_size=0.2,\
                                                      stratify = train_data['label'].tolist(),\
                                                      random_state=0)

In [9]:
num_words = 50000
tokenizer = Tokenizer(num_words=num_words,oov_token="unk")
tokenizer.fit_on_texts(X_train)

In [None]:
x_train = np.array( tokenizer.texts_to_sequences(X_train) )
x_valid = np.array( tokenizer.texts_to_sequences(X_valid) )
x_test  = np.array( tokenizer.texts_to_sequences(test_data['twtexteet'].tolist()) )

maxlen=50
x_train = pad_sequences(x_train, padding='post', maxlen=maxlen)
x_valid = pad_sequences(x_valid, padding='post', maxlen=maxlen)
x_test = pad_sequences(x_test, padding='post', maxlen=maxlen)


train_labels = np.asarray(y_train)
valid_labels = np.asarray(y_valid)
test_labels = np.asarray(test_data['label'].tolist())

train_ds = tf.data.Dataset.from_tensor_slices((x_train,train_labels))
valid_ds = tf.data.Dataset.from_tensor_slices((x_valid,valid_labels))
test_ds = tf.data.Dataset.from_tensor_slices((x_test,test_labels))

In [None]:
#model preparation
max_features =50000
embedding_dim =16
sequence_length = maxlen

model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(max_features +1, embedding_dim, input_length=sequence_length, embeddings_regularizer = regularizers.l2(0.005))) 
model.add(tf.keras.layers.Dropout(0.4))

model.add(tf.keras.layers.LSTM(embedding_dim,dropout=0.2, recurrent_dropout=0.2,return_sequences=True,kernel_regularizer=regularizers.l2(0.005),\
                                                             bias_regularizer=regularizers.l2(0.005)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(512, activation='relu',kernel_regularizer=regularizers.l2(0.001),\
                                bias_regularizer=regularizers.l2(0.001),))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(8, activation='relu',kernel_regularizer=regularizers.l2(0.001),\
                                bias_regularizer=regularizers.l2(0.001),))
model.add(tf.keras.layers.Dropout(0.4))
model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
model.compile(loss=tf.keras.losses.BinaryCrossentropy(),optimizer=tf.keras.optimizers.Adam(1e-3),metrics=[tf.keras.metrics.BinaryAccuracy()])

In [13]:
print(len(model.layers))

9


In [None]:
epochs = 10
# Fit the model using the train and test datasets.
history = model.fit(train_ds.shuffle(5000).batch(1024),
                    epochs= epochs ,
                    validation_data=valid_ds.batch(1024),
                    verbose=1)

In [16]:
x_test  = np.array( tokenizer.texts_to_sequences(test_data['text'].tolist()) )
x_test = pad_sequences(x_test, padding='post', maxlen=maxlen)
predictions = model.predict(x_test)

In [None]:
#plot predictions
f, (ax1, ax2) = plt.subplots(1, 2,figsize=(15,5))
ax1.scatter(predictions,range(0,len(predictions)),alpha=0.2)
ax2=sns.distplot(predictions)

In [None]:
cutoff=0.86
test_data['pred_sentiment']= predictions
test_data['pred_sentiment'] = np.where((test_data.pred_sentiment >= cutoff),1,test_data.pred_sentiment)
test_data['pred_sentiment'] = np.where((test_data.pred_sentiment < cutoff),0,test_data.pred_sentiment)

labels = [0, 1]
print(classification_report(test_data['label'].tolist(),test_data['pred_sentiment'].tolist(),labels=labels))

In [None]:
final_test=pd.read_csv("test.csv")
# final_test=pd.read_csv("train.csv")

ftest=final_test.copy()
ftest.drop(columns=['id'],axis=1,inplace=True)

ftest['text'] = ftest['text'].apply(remove_emoji)
ftest['text'] = ftest['text'].apply(clean_text)

f_test  = np.array( tokenizer.texts_to_sequences(ftest['text'].tolist()) )
f_test = pad_sequences(f_test, padding='post', maxlen=maxlen)

In [21]:
predictions = model.predict(f_test)

In [None]:
#plot predictions
f, (ax1, ax2) = plt.subplots(1, 2,figsize=(15,5))
ax1.scatter(predictions,ftest.index,alpha=0.2)
ax2=sns.distplot(predictions)

In [24]:
ftest['pred_sentiment']= predictions
ftest['pred_sentiment'] = np.where((ftest.pred_sentiment >= cutoff),1,ftest.pred_sentiment)
ftest['pred_sentiment'] = np.where((ftest.pred_sentiment < cutoff),0,ftest.pred_sentiment)

pd.set_option('display.max_colwidth', None)
ftest[ftest['pred_sentiment']==1]

In [27]:
# def test1(incoming_string):
# print("incoming_string (from server): ")
# incoming_string = input()

flg = 1
while flg:

  print("Input: ('0' to exit)")
  incoming_string = input()
  # os.system('clear')
  if incoming_string=='0':
    print("Exit successful!!")
    break

  # print("hereeeeee")
  a1 = 100
  id1 = np.int64(a1)
  a2 = 101
  id2 = np.int64(a2)


  # print(type(incoming_string))

  data = {'id': [id1,id2],
          'text': ['Example String',incoming_string]
          }

  final_test = pd.DataFrame(data)
  # final_test
  # products_list = df.values.tolist()
  # print(type(final_test))


  # final_test=pd.read_csv("test.csv")

  ftest2=final_test.copy()

  ftest2.drop(columns=['id'],axis=1,inplace=True)


  # print(type(ftest))

  ftest2['text'] = ftest2['text'].apply(remove_emoji)
  ftest2['text'] = ftest2['text'].apply(clean_text)
  # print(type(ftest['text'][0]))
  f_test  = np.array( tokenizer.texts_to_sequences(ftest2['text'].tolist()) )
  f_test = pad_sequences(f_test, padding='post', maxlen=maxlen)

  # display((x_test))
  # display((f_test))

  predictions = model.predict(f_test)




  #mapping prediction to 1 or 0
  cutoff = 0.82
  ftest2['pred_sentiment']= predictions

  # ftest['pred_sentiment'] = np.where((ftest.pred_sentiment >= cutoff),1,ftest.pred_sentiment)
  # ftest['pred_sentiment'] = np.where((ftest.pred_sentiment < cutoff),0,ftest.pred_sentiment)



  # ///////////////////////////////
  # ftest2['pred_sentiment'] = np.where((ftest2.pred_sentiment >= cutoff),1,ftest2.pred_sentiment)
  # ftest2['pred_sentiment'] = np.where((ftest2.pred_sentiment < cutoff),0,ftest2.pred_sentiment)
  # ///////////////////////////////////////////////////////


  # print((ftest['pred_sentiment'][0]))




  

  #processed texts categorized as hate speech
  # pd.set_option('display.max_colwidth', None)


  # ftest[ftest['pred_sentiment']==1]

  # final_test.iloc[ftest[ftest['pred_sentiment']==1].index]

  # p2 = predictions
  # # predictions
  # p2.sort()
  # print(type(predictions))
  # for i in range(100):
  print(ftest2)

Input: ('0' to exit)
             text  pred_sentiment
0  example string        0.001622
1           white        0.830708
Input: ('0' to exit)
             text  pred_sentiment
0  example string        0.001622
1            fuck        0.485885
Input: ('0' to exit)
0
Exit successful!!


In [None]:
# def test1(incoming_string):
# print("incoming_string (from server): ")
# incoming_string = input()

def Predict_input(str1):
  flg = 1
  # while flg:

  # print("Input: ('0' to exit)")
  incoming_string = str1
  # os.system('clear')
  # if incoming_string=='0':
    # print("Exit successful!!")
    # break

  # print("hereeeeee")
  a1 = 100
  id1 = np.int64(a1)
  a2 = 101
  id2 = np.int64(a2)


  # print(type(incoming_string))

  data = {'id': [id1,id2],
          'text': ['Example String',incoming_string]
          }

  final_test = pd.DataFrame(data)
  # final_test
  # products_list = df.values.tolist()
  # print(type(final_test))


  # final_test=pd.read_csv("test.csv")

  ftest2=final_test.copy()

  ftest2.drop(columns=['id'],axis=1,inplace=True)


  # print(type(ftest))

  ftest2['text'] = ftest2['text'].apply(remove_emoji)
  ftest2['text'] = ftest2['text'].apply(clean_text)
  # print(type(ftest['text'][0]))
  f_test  = np.array( tokenizer.texts_to_sequences(ftest2['text'].tolist()) )
  f_test = pad_sequences(f_test, padding='post', maxlen=maxlen)

  # display((x_test))
  # display((f_test))

  predictions = model.predict(f_test)




  #mapping prediction to 1 or 0
  cutoff = 0.82
  ftest2['pred_sentiment']= predictions

  # ftest['pred_sentiment'] = np.where((ftest.pred_sentiment >= cutoff),1,ftest.pred_sentiment)
  # ftest['pred_sentiment'] = np.where((ftest.pred_sentiment < cutoff),0,ftest.pred_sentiment)



  # ///////////////////////////////
  ftest2['pred_sentiment'] = np.where((ftest2.pred_sentiment >= cutoff),1,ftest2.pred_sentiment)
  ftest2['pred_sentiment'] = np.where((ftest2.pred_sentiment < cutoff),0,ftest2.pred_sentiment)
  # ///////////////////////////////////////////////////////


  # print((ftest['pred_sentiment'][0]))






  #processed texts categorized as hate speech
  # pd.set_option('display.max_colwidth', None)


  # ftest[ftest['pred_sentiment']==1]

  # final_test.iloc[ftest[ftest['pred_sentiment']==1].index]

  # p2 = predictions
  # # predictions
  # p2.sort()
  # print(type(predictions))
  # for i in range(100):
  ret = int(ftest2['pred_sentiment'][1])
  # print(ret)
  return ret
# r = Predict_input("fuck girl")

In [None]:
# !pip install flask-ngrok



In [None]:
from flask import Flask,request,Response
from flask_ngrok import run_with_ngrok
app = Flask(__name__)
run_with_ngrok(app)   

@app.route("/",methods=['GET', 'POST','DELETE', 'PATCH'])
def home():
  if request.method == 'POST':
    av = request.form.get('name')
    print("here->",av)
    model_out = Predict_input(av)
    model_out_str = str(model_out)
    # return Response("{'Hate':'1'}", status=201, mimetype='application/json')
    return Response(model_out_str, status=201, mimetype='application/json')

app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://21b1-35-245-153-21.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


127.0.0.1 - - [20/Apr/2022 16:44:28] "[37mPOST / HTTP/1.1[0m" 201 -


here-> You are a racist person


127.0.0.1 - - [20/Apr/2022 16:44:41] "[37mPOST / HTTP/1.1[0m" 201 -


here-> You are a nice person


[2022-04-20 16:45:29,064] ERROR in app: Exception on / [GET]
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1953, in full_dispatch_request
    return self.finalize_request(rv)
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 1968, in finalize_request
    response = self.make_response(rv)
  File "/usr/local/lib/python3.7/dist-packages/flask/app.py", line 2098, in make_response
    "The view function did not return a valid response. The"
TypeError: The view function did not return a valid response. The function either returned None or ended without a return statement.
127.0.0.1 - - [20/Apr/2022 16:45:29] "[35m[1mGET / HTTP/1.1[0m" 500 -
127.0.0.1 - - [20/Apr/2022 16:46:53] "[37mPOST / HTTP/1.1[0m" 201 -


here-> Your a black nigga



127.0.0.1 - - [20/Apr/2022 16:47:16] "[37mPOST / HTTP/1.1[0m" 201 -


here-> Your a black nice nigerian



127.0.0.1 - - [20/Apr/2022 16:47:34] "[37mPOST / HTTP/1.1[0m" 201 -


here-> Your a looking like bnanana



127.0.0.1 - - [20/Apr/2022 16:47:46] "[37mPOST / HTTP/1.1[0m" 201 -


here-> Your a looking cool



127.0.0.1 - - [20/Apr/2022 16:48:04] "[37mPOST / HTTP/1.1[0m" 201 -


here-> Your a looking like a bitch



127.0.0.1 - - [20/Apr/2022 16:48:29] "[37mPOST / HTTP/1.1[0m" 201 -


here-> lets travel together and fuck eachother



127.0.0.1 - - [20/Apr/2022 16:51:20] "[37mPOST / HTTP/1.1[0m" 201 -


here-> You are a nice person


127.0.0.1 - - [20/Apr/2022 16:51:27] "[37mPOST / HTTP/1.1[0m" 201 -


here-> fuck


127.0.0.1 - - [20/Apr/2022 16:51:53] "[37mPOST / HTTP/1.1[0m" 201 -


here-> i will fuck you


127.0.0.1 - - [20/Apr/2022 16:52:00] "[37mPOST / HTTP/1.1[0m" 201 -


here-> fuck you


127.0.0.1 - - [20/Apr/2022 16:52:19] "[37mPOST / HTTP/1.1[0m" 201 -


here-> i will fuck you hard


127.0.0.1 - - [20/Apr/2022 16:52:39] "[37mPOST / HTTP/1.1[0m" 201 -


here-> i will fuck hard


127.0.0.1 - - [20/Apr/2022 16:52:48] "[37mPOST / HTTP/1.1[0m" 201 -


here-> fuck hard


127.0.0.1 - - [20/Apr/2022 16:53:09] "[37mPOST / HTTP/1.1[0m" 201 -


here-> never fuck anyone


127.0.0.1 - - [20/Apr/2022 16:53:28] "[37mPOST / HTTP/1.1[0m" 201 -


here-> always fuck girl


127.0.0.1 - - [20/Apr/2022 16:53:34] "[37mPOST / HTTP/1.1[0m" 201 -


here-> kiss my ass


127.0.0.1 - - [20/Apr/2022 16:53:53] "[37mPOST / HTTP/1.1[0m" 201 -


here-> first kiss was awesome


127.0.0.1 - - [20/Apr/2022 16:54:19] "[37mPOST / HTTP/1.1[0m" 201 -


here-> my first kiss in murree was awesome.


127.0.0.1 - - [20/Apr/2022 16:54:43] "[37mPOST / HTTP/1.1[0m" 201 -


here-> he forcefully kiss me 


127.0.0.1 - - [20/Apr/2022 16:55:46] "[37mPOST / HTTP/1.1[0m" 201 -


here-> asian racist 


127.0.0.1 - - [20/Apr/2022 16:55:57] "[37mPOST / HTTP/1.1[0m" 201 -


here-> asian nice people


127.0.0.1 - - [20/Apr/2022 16:56:11] "[37mPOST / HTTP/1.1[0m" 201 -


here-> white people are superior than black


127.0.0.1 - - [20/Apr/2022 16:57:56] "[37mPOST / HTTP/1.1[0m" 201 -


here-> white people are superior than black


127.0.0.1 - - [20/Apr/2022 16:58:40] "[37mPOST / HTTP/1.1[0m" 201 -


here-> whitee people are superior than black


127.0.0.1 - - [20/Apr/2022 16:59:00] "[37mPOST / HTTP/1.1[0m" 201 -


here-> kiss my ass
