In [1]:
!pip install tensorflow tensorflow-gpu pandas matplotlib sklearn



In [2]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np

In [3]:
df=pd.read_csv(os.path.join('comment_toxicity','train.csv','train.csv'))

In [4]:
df.head()

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


In [5]:
# Preprocessing


In [6]:
from tensorflow.keras.layers import TextVectorization

In [7]:
df.columns

Index(['id', 'comment_text', 'toxic', 'severe_toxic', 'obscene', 'threat',
       'insult', 'identity_hate'],
      dtype='object')

In [8]:
X=df['comment_text']
y=df[df.columns[2:]].values

In [9]:
print(X)
print(y)

0         Explanation\nWhy the edits made under my usern...
1         D'aww! He matches this background colour I'm s...
2         Hey man, I'm really not trying to edit war. It...
3         "\nMore\nI can't make any real suggestions on ...
4         You, sir, are my hero. Any chance you remember...
                                ...                        
159566    ":::::And for the second time of asking, when ...
159567    You should be ashamed of yourself \n\nThat is ...
159568    Spitzer \n\nUmm, theres no actual article for ...
159569    And it looks like it was actually you who put ...
159570    "\nAnd ... I really don't think you understand...
Name: comment_text, Length: 159571, dtype: object
[[0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 ...
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]]


In [10]:
MAX_FEATURES=200000

In [11]:
vectorizer=TextVectorization(max_tokens=MAX_FEATURES,output_sequence_length=1800,output_mode='int')

In [12]:
vectorizer.adapt(X.values)

In [13]:
vectorized_text=vectorizer(X.values)

In [14]:
vectorized_text

<tf.Tensor: shape=(159571, 1800), dtype=int64, numpy=
array([[  643,    76,     2, ...,     0,     0,     0],
       [    1,    54,  2506, ...,     0,     0,     0],
       [  425,   440,    70, ...,     0,     0,     0],
       ...,
       [32141,  7329,   383, ...,     0,     0,     0],
       [    5,    12,   533, ...,     0,     0,     0],
       [    5,     8,   130, ...,     0,     0,     0]], dtype=int64)>

In [15]:
dataset=tf.data.Dataset.from_tensor_slices((vectorized_text,y))


In [16]:
dataset

<TensorSliceDataset element_spec=(TensorSpec(shape=(1800,), dtype=tf.int64, name=None), TensorSpec(shape=(6,), dtype=tf.int64, name=None))>

In [17]:
dataset=dataset.cache()
dataset=dataset.shuffle(160000)
dataset=dataset.batch(16)
dataset=dataset.prefetch(8)

In [18]:
batch_X,batch_y=dataset.as_numpy_iterator().next()

In [19]:
batch_X.shape

(16, 1800)

In [20]:
batch_y.shape

(16, 6)

In [21]:
train=dataset.take(int(len(dataset)*.7))
val=dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
test=dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))

In [22]:
train_generator=train.as_numpy_iterator()

In [23]:
train_generator.next()

(array([[   51,  1149,    35, ...,     0,     0,     0],
        [15747,  2281,   223, ...,     0,     0,     0],
        [    5,    14,     9, ...,     0,     0,     0],
        ...,
        [    8,  3025,    12, ...,     0,     0,     0],
        [    2,  5470,    11, ...,     0,     0,     0],
        [   46,    33,    15, ...,     0,     0,     0]], dtype=int64),
 array([[0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [1, 0, 1, 0, 1, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [1, 0, 1, 0, 1, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0]], dtype=int64))

In [24]:
##Creating Sequential Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dropout,Bidirectional,Dense,Embedding

In [25]:
model=Sequential()
model.add(Embedding(MAX_FEATURES+1,32))
model.add(Bidirectional(LSTM(32,activation='tanh')))
model.add(Dense(128,activation='relu'))
model.add(Dense(256,activation='relu'))
model.add(Dense(128,activation='relu'))
model.add(Dense(6,activation='softmax'))



In [26]:
model.compile(loss='CategoricalCrossentropy',optimizer='Adam')

In [27]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 32)          6400032   
                                                                 
 bidirectional (Bidirectiona  (None, 64)               16640     
 l)                                                              
                                                                 
 dense (Dense)               (None, 128)               8320      
                                                                 
 dense_1 (Dense)             (None, 256)               33024     
                                                                 
 dense_2 (Dense)             (None, 128)               32896     
                                                                 
 dense_3 (Dense)             (None, 6)                 774       
                                                        

In [28]:
history=model.fit(train,epochs=3,validation_data=val)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [29]:
history.history

{'loss': [684419.375, 5610167.5, 12385833.0],
 'val_loss': [901135.5625, 9929309.0, 21275404.0]}

In [32]:
import matplotlib.pyplot as plt


In [33]:
input_text=vectorizer('You freaking suck!')
batch_X,batch_Y=test.as_numpy_iterator().next()
batch_X
batch_Y
res=model.predict(np.expand_dims(input_text,0))
model.predict(batch_X)



array([[0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.]], dtype=float32)

In [34]:
!pip install gradio jinja2



Collecting gradio
  Downloading gradio-3.0.24-py3-none-any.whl (5.1 MB)
Collecting httpx
  Downloading httpx-0.23.0-py3-none-any.whl (84 kB)
Collecting markdown-it-py[linkify,plugins]
  Downloading markdown_it_py-2.1.0-py3-none-any.whl (84 kB)
Collecting python-multipart
  Downloading python-multipart-0.0.5.tar.gz (32 kB)
Collecting pydantic
  Downloading pydantic-1.9.1-cp39-cp39-win_amd64.whl (2.0 MB)
Collecting fastapi
  Downloading fastapi-0.78.0-py3-none-any.whl (54 kB)
Collecting uvicorn
  Downloading uvicorn-0.18.2-py3-none-any.whl (57 kB)
Collecting orjson
  Downloading orjson-3.7.6-cp39-none-win_amd64.whl (188 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Collecting analytics-python
  Downloading analytics_python-1.4.0-py2.py3-none-any.whl (15 kB)
Collecting pycryptodome
  Downloading pycryptodome-3.15.0-cp35-abi3-win_amd64.whl (1.9 MB)
Collecting ffmpy
  Downloading ffmpy-0.3.0.tar.gz (4.8 kB)
Collecting aiohttp
  Downloading aiohttp-3.8.1-cp39-c




In [35]:
import gradio as gr

In [36]:
model.save('toxicity.h5')

In [40]:
import tensorflow as tf
def score_comment(comment):
    vectorized_comment=vectorizer([comment])
    result=model.predict(vectorized_comment)
    res=tf.argmax(result,0)
    text=''
    if res==0:
        text='This comment is toxic'
    elif res==1:
        text='This comment is severely toxic'
    elif res==2:
        text='This comment is obscene'
    elif res==3:
        text='This comment is threat'
    elif res==4:
        text='This comment is insult'
    elif res==5:
        text='This comment is an identity threat'
    return text
    

In [41]:
interface =gr.Interface(fn=score_comment,inputs=gr.inputs.Textbox(lines=2,placeholder='Comment to score'),outputs='text')



In [42]:
interface.launch(share=True)

Running on local URL:  http://127.0.0.1:7861/
Running on public URL: https://51884.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)


(<gradio.routes.App at 0x14e5c26b640>,
 'http://127.0.0.1:7861/',
 'https://51884.gradio.app')



Traceback (most recent call last):
  File "C:\Users\HP\anaconda3\lib\site-packages\gradio\routes.py", line 255, in run_predict
    output = await app.blocks.process_api(
  File "C:\Users\HP\anaconda3\lib\site-packages\gradio\blocks.py", line 548, in process_api
    predictions, duration = await self.call_function(fn_index, processed_input)
  File "C:\Users\HP\anaconda3\lib\site-packages\gradio\blocks.py", line 463, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "C:\Users\HP\anaconda3\lib\site-packages\anyio\to_thread.py", line 31, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "C:\Users\HP\anaconda3\lib\site-packages\anyio\_backends\_asyncio.py", line 937, in run_sync_in_worker_thread
    return await future
  File "C:\Users\HP\anaconda3\lib\site-packages\anyio\_backends\_asyncio.py", line 867, in run
    result = context.run(func, *args)
  File "C:\Users\HP\anaconda3\lib\site-packages\gradio\interface.py", line 514, in <lambda>
