In [None]:
import os
import tensorflow as tf
import pandas as pd
import numpy as np

In [None]:
dataset = pd.read_csv('sample_data/toxicityLevel.csv')

In [None]:
from tensorflow.keras.layers import TextVectorization

In [None]:
X = dataset['comment_text']
y = dataset[dataset.columns[2:]].values

In [None]:
MAX_FEATURES = 200000

In [None]:
vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
                               output_sequence_length=1800,
                               output_mode='int')

In [None]:
vectorizer.adapt(X.values)

In [None]:
vectorizedText = vectorizer(X.values)

In [None]:
ds = tf.data.Dataset.from_tensor_slices((vectorizedText, y))
ds = ds.cache()
ds = ds.shuffle(160000)
ds = ds.batch(64)
ds = ds.prefetch(8)

In [None]:
train = ds.take(int(len(ds)*.7))
val = ds.skip(int(len(ds)*.7)).take(int(len(ds)*.2))
test = ds.skip(int(len(ds)*.9)).take(int(len(ds)*.1))

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding

In [None]:
model = Sequential()
model.add(Embedding(MAX_FEATURES+1, 32))
model.add(Bidirectional(LSTM(32, activation='tanh')))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(6, activation='sigmoid'))

In [None]:
model.compile(loss='BinaryCrossentropy', optimizer='Adam')

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 32)          6400032   
                                                                 
 bidirectional (Bidirection  (None, 64)                16640     
 al)                                                             
                                                                 
 dense (Dense)               (None, 128)               8320      
                                                                 
 dense_1 (Dense)             (None, 256)               33024     
                                                                 
 dense_2 (Dense)             (None, 128)               32896     
                                                                 
 dense_3 (Dense)             (None, 6)                 774       
                                                        

In [None]:
history = model.fit(train, epochs=1, validation_data=val)



In [None]:
inputText = vectorizer('You freaking suck! You better watch your back pal cause I am coming to get you')

In [None]:
res = model.predict(np.expand_dims(inputText, 0))



In [None]:
(res > 0.5).astype(int)

array([[1, 0, 1, 0, 1, 0]])

In [None]:
batch_X, batch_y = test.as_numpy_iterator().next()

In [None]:
(model.predict(batch_X) > 0.5).astype(int)



array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [1, 0, 1, 0, 1, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
 

In [None]:
res.shape

(1, 6)

In [None]:
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy

In [None]:
pre = Precision()
re = Recall()
acc = CategoricalAccuracy()

In [None]:
for batch in test.as_numpy_iterator():
  XTrue, yTrue = batch
  yHat = model.predict(XTrue)
  yTrue = yTrue.flatten()
  yHat = yHat.flatten()
  pre.update_state(yTrue, yHat)
  pre.update_state(yTrue, yHat)
  acc.update_state(yTrue, yHat)



In [None]:
print(f'Precision: {pre.result().numpy()}, Recall: {re.result().numpy()}, Accuracy: {acc.result().numpy()}')

Precision: 0.8122542500495911, Recall: 0.0, Accuracy: 0.1927710771560669


In [None]:
!pip install gradio==



In [None]:
import tensorflow as tf
import gradio as gr

In [None]:
model.save('toxicityDetector.h5')

  saving_api.save_model(


In [None]:
model = tf.keras.models.load_model('toxicityDetector.h5')

In [None]:
inputStr = vectorizer('Your intelligence is my common sense')

In [None]:
res = model.predict(np.expand_dims(inputStr, 0))



In [None]:
res

array([[4.0890370e-02, 9.9140343e-05, 3.5387066e-03, 8.9672097e-04,
        9.8835602e-03, 1.5751611e-03]], dtype=float32)

In [None]:
def scoreComment(comment):
  vectorizedComment = vectorizer([comment])
  results = model.predict(vectorizedComment)
  text = ''
  for idx, col in enumerate(dataset.columns[2:]):
    text += '{}: {}\n'.format(col, results[0][idx]>0.5)
  return text

In [None]:
interface = gr.Interface(fn=scoreComment, inputs=gr.Textbox(lines=2, placeholder='Comment to score'), outputs='text')

In [None]:
interface.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://e321287f4ecf07cdaa.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


