# *IMPORTING LIBRARIES*

In [None]:
%pip install tensorflow

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os

# *DATA PREPROCESSING*

In [None]:
dataset=pd.read_csv('train_toxic.csv')
y=dataset.iloc[:,2:].values
x=dataset.iloc[:,1]

In [2]:
from tensorflow.keras.layers import TextVectorization
tv=TextVectorization(max_tokens=20000,output_mode='int',output_sequence_length=1500)
tv.get_vocabulary()
tv.adapt(x.values)

In [None]:
tv.get_vocabulary()

In [None]:
vectorized_text=tv(x.values)
print(vectorized_text)

In [None]:
#MCSHBAP map cache shuffle batch prefetch
data=tf.data.Dataset.from_tensor_slices((vectorized_text,y))
data=data.cache()
data=data.shuffle(160000)
data=data.batch(16)
data=data.prefetch(8)

In [None]:
len(data)

In [None]:
batch_x,batch_y=data.as_numpy_iterator().next()
print (batch_x)
print (batch_y)

In [None]:
train=data.take(int(len(data)*0.7))
val=data.skip(int (len(data)*.7)).take(int(len(data)*0.2))
test=data.skip(int(len(data)*.9)).take(int(len(data)*0.1))
print(len(train))
print(len(val))
print(len(test))

# *BUILDING MODEL*

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,GRU,Dropout,GlobalMaxPooling1D,Bidirectional,Dense,Embedding,Input

In [None]:
model = Sequential()
model.add(Input(shape=(1500,)))  
model.add(Embedding(20001, 64))
model.add(Bidirectional(GRU(32, activation='tanh', return_sequences=True))) 
model.add(GlobalMaxPooling1D()) 
model.add(Dense(128, activation='relu'))
model.add(Dense(6, activation='sigmoid'))


In [None]:
model.compile(loss='BinaryCrossentropy',optimizer='Adam')

In [None]:
model.summary()

# *TRAINING THE MODEL*

In [None]:
model.fit(train,epochs=3,validation_data=val)

#saving the model
model.save('toxicity_model.h5')

# *TESTING THE MODEL*

In [None]:
text=tv("get lost bitch")
print(dataset.columns[2:])
model.predict(np.array([text]))

In [None]:
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy
pre = Precision()
re = Recall()
acc = CategoricalAccuracy()
for batch in test.as_numpy_iterator(): 
    # Unpack the batch 
    X_true, y_true = batch
    # Make a prediction 
    yhat = model.predict(X_true)
    
    # Flatten the predictions
    y_true = y_true.flatten()
    yhat = yhat.flatten()
    
    pre.update_state(y_true, yhat)
    re.update_state(y_true, yhat)
    acc.update_state(y_true, yhat)

In [None]:
print(f'Precision: {pre.result()}, Recall:{re.result()}, Accuracy:{acc.result()}')

# *USER INTEREACTIVE INTERFACE*

In [None]:
%pip install gradio jinja2

In [None]:
import tensorflow as tf
%pip install gradio
import gradio as gr

In [None]:
model = tf.keras.models.load_model('toxicity.keras')

In [None]:
def score_comment(comment):
    vectorized_comment = tv([comment])
    results = model.predict(vectorized_comment)
    
    text = ''
    for idx, col in enumerate(dataset.columns[2:]):
        text += '{}: {}\n'.format(col, results[0][idx]>0.5)
    
    return text

In [None]:
interface = gr.Interface(fn=score_comment, 
                         inputs=gr.Textbox(lines=2, placeholder='Comment to score'),
                        outputs='text')
interface.launch()