In [1]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding
from tensorflow.keras.models import save_model




In [3]:
df = pd.read_csv(os.path.join('jigsaw-toxic-comment-classification-challenge','train.csv', 'train.csv'))

In [4]:
df.head()

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\r\nWhy the edits made under my use...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\r\nMore\r\nI can't make any real suggestions...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


In [5]:
categories_list = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']


In [6]:
X = df['comment_text']
y = df[df.columns[2:]].values

In [7]:
MAX_FEATURES = 2000000

In [8]:
vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
                               output_sequence_length=1800,
                               output_mode='int')




In [9]:
vectorizer.adapt(X.values)




In [10]:
vectorized_text = vectorizer(X.values)

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
dataset = dataset.cache()
dataset = dataset.shuffle(160000)
dataset = dataset.batch(16)
dataset = dataset.prefetch(8) 

In [None]:
train = dataset.take(int(len(dataset)*.7))
val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))

In [None]:
model = Sequential()
model.add(Embedding(MAX_FEATURES+1, 32))
model.add(Bidirectional(LSTM(32, activation='tanh')))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(6, activation='sigmoid'))

In [None]:
model.compile(loss='BinaryCrossentropy', optimizer='Adam', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
history = model.fit(train, epochs=1, validation_data=val)

In [None]:
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy
pre = Precision()
re = Recall()
acc = CategoricalAccuracy()
for batch in test.as_numpy_iterator(): 
    # Unpack the batch 
    X_true, y_true = batch
    # Make a prediction 
    yhat = model.predict(X_true)
    
    # Flatten the predictions
    y_true = y_true.flatten()
    yhat = yhat.flatten()
    
    pre.update_state(y_true, yhat)
    re.update_state(y_true, yhat)
    acc.update_state(y_true, yhat)
print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')

In [None]:
model.save('result-model.h5')

In [12]:
model = tf.keras.models.load_model('result-model.h5')

In [None]:
from tkinter import *
class GUI:
    def __init__(self):
        self.root = Tk()
        self.root.title('Pragateesh\'s project')
    def gui(self):
        self.root.geometry('1920x1080')
        self.root.configure(background='#54a849')
        entery = Entry(self.root, borderwidth=15, width=100, font=('BOLD', 12))
        entery.pack(padx=100, pady=100, ipady=10)
        def input_Taker():
                text=entery.get()
                result = []
                input_str = vectorizer(text)
                res = model.predict(np.expand_dims(input_str,0))
                
                binary_array = (res > 0.5).astype(int)
                ans = [item for sublist in binary_array for item in sublist]
                categories_list = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
                flag = 1
                for i in range(0, len(categories_list)):
                    if ans[i]:
                        flag = 0
                        result.append(categories_list[i])
                if flag:
                    label = 'its Normal sentence'
                    
                else:
                    label = ''
                    for i in result:
                        label=  '\n'.join(f'The sentence is {category}' for category in result)

                print(res)
                print(result)

                label = Label(self.root,text=label, padx=200, pady=160, fg='black', bg='#DAF7A6',font=('BOLD', 12))
                label.pack()
        button = Button(self.root, text='submit',padx=100, pady=10, command = input_Taker )
        button.place(y= 180, x =520)
        self.root.mainloop()
obj = GUI()
obj.gui()

[[0.99973065 0.4055692  0.98139775 0.01615946 0.9416433  0.05600086]]
['toxic', 'obscene', 'insult']
