In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf

In [2]:
df=pd.read_csv(os.path.join('Dataset','train.csv'))

In [None]:
df.head()

In [4]:
from tensorflow.keras.layers import TextVectorization

In [5]:
x=df['comment_text']
y=df[df.columns[2:]].values

In [None]:
y.shape

In [7]:
MAX_FEATURES=200000 #number of words in a vocab

In [None]:
vectorizer=TextVectorization(max_tokens=MAX_FEATURES,
                             output_sequence_length=1800,
                             output_mode='int')

In [9]:
vectorizer.adapt(x.values)

In [10]:
vectorized_text=vectorizer(x.values)

In [11]:
#Data pipeline
dataset=tf.data.Dataset.from_tensor_slices((vectorized_text,y))
dataset=dataset.cache()
dataset=dataset.shuffle(160000)
dataset=dataset.batch(16)
dataset=dataset.prefetch(8)

In [22]:
train=dataset.take(int(len(dataset)*0.7))
val=dataset.skip(int(len(dataset)*0.7)).take(int(len(dataset)*0.2))
test=dataset.skip(int(len(dataset)*0.9)).take(int(len(dataset)*0.1))

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Bidirectional,Dropout,Dense,Embedding

In [None]:
model=Sequential()
model.add(Embedding(MAX_FEATURES+1,32))
model.add(Bidirectional(LSTM(32,activation='tanh')))
model.add(Dense(128,activation='relu'))
model.add(Dense(256,activation='relu'))
model.add(Dense(128,activation='relu'))
model.add(Dense(6,activation='sigmoid'))
          

In [27]:
model.compile(loss='BinaryCrossentropy',optimizer='Adam')

In [None]:
model.summary()

In [None]:
history=model.fit(train,epochs=10,validation_data=val)

In [48]:
from matplotlib import pyplot as plt

In [None]:
plt.figure(figsize=(8,5))
pd.DataFrame(history.history).plot()
plt.show()

In [30]:
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy

In [31]:
pre=Precision()
re=Recall()
acc=CategoricalAccuracy()

In [None]:
for batch in test.as_numpy_iterator():
    x_true,y_true=batch
    yhat=model.predict(x_true)
    y_true=y_true.flatten()
    yhat=yhat.flatten()
    pre.update_state(y_true,yhat)
    re.update_state(y_true,yhat)
    acc.update_state(y_true,yhat)

In [None]:
print(f'Precision: {pre.result().numpy()}, Recall: {re.result().numpy()}, Accuracy: {acc.result().numpy()}')

In [12]:
import tensorflow as tf
import gradio as gr

In [38]:
model.save('toxic_detector.h5')

In [None]:
model=tf.keras.models.load_model('toxic_detector.h5')

In [14]:
def score_comment(comment):
    vectorized=vectorizer([comment])
    results=model.predict(vectorized)
    
    text=''
    for idx,col in enumerate(df.columns[2:]):
        text+='{}: {}\n'.format(col,results[0][idx]>0.5)
    return text

In [None]:
interface=gr.Interface(fn=score_comment,
                       inputs=gr.inputs.Textbox(lines=4,placeholder='comment here'),
                       outputs='text')

In [None]:
interface.launch(share=True)