In [None]:
!pip install tensorflow tensorflow-gpu pandas

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np

In [4]:
df = pd.read_csv("/content/drive/MyDrive/Mini Project/train.csv")

In [5]:
df.head()

Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


In [6]:
from tensorflow.keras.layers import TextVectorization

In [7]:
X = df['comment_text']
y = df[df.columns[2:]].values

In [8]:
MAX_FEATURES = 200000

In [9]:
vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
                               output_sequence_length=1800,
                               output_mode='int')

In [10]:
vectorizer.adapt(X.values)

In [11]:
vectorized_text = vectorizer(X.values)

In [12]:
dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
dataset = dataset.cache()
dataset = dataset.shuffle(160000)
dataset = dataset.batch(16)
dataset = dataset.prefetch(8)

In [13]:
train = dataset.take(int(len(dataset)*.7))
val = dataset.skip(int(len(dataset)*.7)).take(int(len(dataset)*.2))
test = dataset.skip(int(len(dataset)*.9)).take(int(len(dataset)*.1))

In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding

In [15]:
model = Sequential()
# Create the embedding layer
model.add(Embedding(MAX_FEATURES+1, 32))
# Bidirectional LSTM Layer
model.add(Bidirectional(LSTM(32, activation='tanh')))
# Feature extractor Fully connected layers
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
# Final layer
model.add(Dense(6, activation='sigmoid'))

In [16]:
model.compile(loss='BinaryCrossentropy', optimizer='Adam')

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 32)          6400032   
                                                                 
 bidirectional (Bidirection  (None, 64)                16640     
 al)                                                             
                                                                 
 dense (Dense)               (None, 128)               8320      
                                                                 
 dense_1 (Dense)             (None, 256)               33024     
                                                                 
 dense_2 (Dense)             (None, 128)               32896     
                                                                 
 dense_3 (Dense)             (None, 6)                 774       
                                                        

In [18]:
model = tf.keras.models.load_model('/content/drive/MyDrive/Mini Project/toxicity.h5')

In [19]:
batch = test.as_numpy_iterator().next

In [20]:
input_text = vectorizer('You freaking suck! I am going to hit you.')

In [21]:
input_text

<tf.Tensor: shape=(1800,), dtype=int64, numpy=array([   7, 7158,  397, ...,    0,    0,    0])>

In [22]:
res = model.predict(np.expand_dims(input_text,0))



In [23]:
res

array([[0.99812937, 0.4776002 , 0.9891026 , 0.0654356 , 0.88015753,
        0.11367781]], dtype=float32)

In [24]:
(res > 0.5).astype(int)

array([[1, 0, 1, 0, 1, 0]])

In [25]:
batch_X, batch_y = test.as_numpy_iterator().next()

In [26]:
res.shape

(1, 6)

In [27]:
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy

In [28]:
pre = Precision()
re = Recall()
acc = CategoricalAccuracy()

In [None]:
for batch in test.as_numpy_iterator():
    # Unpack the batch
    X_true, y_true = batch
    # Make a prediction
    yhat = model.predict(X_true)

    # Flatten the predictions
    y_true = y_true.flatten()
    yhat = yhat.flatten()

    pre.update_state(y_true, yhat)
    re.update_state(y_true, yhat)
    acc.update_state(y_true, yhat)

In [30]:
print(f'Precision: {pre.result().numpy()}, Recall:{re.result().numpy()}, Accuracy:{acc.result().numpy()}')

Precision: 0.8931565880775452, Recall:0.9477611780166626, Accuracy:0.4804413318634033


In [None]:
!pip install gradio

In [None]:
import gradio as gr
import time

In [None]:
input_str = vectorizer('hey i freaken hate you!')

In [None]:
res = model.predict(np.expand_dims(input_str,0))

In [None]:
res

In [37]:
def slow_toxicity_echo(comment, history):
    for i in range(len(comment)):
        time.sleep(0.15)  # Simulating a slow echo
        yield score_comment(comment[:i+1])

def score_comment(comment):
    vectorized_comment = vectorizer([comment])  # Assuming you have a vectorizer object defined
    results = model.predict(vectorized_comment)  # Assuming you have a model object defined

    text = ''
    for idx, col in enumerate(df.columns[2:]):  # Assuming df is your DataFrame
        text += '{}: {}\n'.format(col, results[0][idx] > 0.5)

    return text

# Launching the GUI interface
gr.ChatInterface(slow_toxicity_echo,
                 chatbot=gr.Chatbot(height=500),
                 theme="soft",
                 title="Comment Toxicity Classifier",
                 description="Enter comments to display the toxicity levels for each comment",
                 undo_btn="Undo",
                 clear_btn="Clear",
                 examples=["Hello", "You Suck!", "You are ugly"]).launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://298f6ac919de204394.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


