In [6]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random

In [7]:
import tensorflow
from tensorflow.keras.layers import Embedding, Dense, Input, LSTM, Dropout, Activation,Bidirectional, GlobalMaxPool1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from keras import initializers, regularizers, constraints, optimizers, layers

In [8]:
# Important libraries
import re
from nltk.corpus import stopwords
import nltk

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [9]:
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = re.sub(r'[^a-zA-Z\s]','',text)
    text =  text.lower()
    words = text.split()
    words = [word for word in words if word not in stop_words]
    return ' '.join(words)


In [1]:
from tensorflow.keras.layers import TextVectorization

In [2]:
max_words = 200000
vectorizer = TextVectorization(max_tokens=max_words,
                               output_sequence_length=1800,
                               output_mode='int')

In [3]:
import pickle

# Load the vectorizer
with open('/content/drive/MyDrive/com_tox_proj/vectorizer.pkl', 'rb') as f:
    vectorizer = pickle.load(f)

In [22]:
inp = Input(shape=(1800,))
x = Embedding(max_words, 32)(inp)
x = Bidirectional(LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(x)
x = GlobalMaxPool1D()(x)
x = Dense(50, activation="relu")(x)
x = Dropout(0.1)(x)
x = Dense(6, activation="sigmoid")(x)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [23]:
model.load_weights('/content/drive/MyDrive/com_tox_proj/trained/my_model_h.h5')

# Interface using gradio


In [12]:
!pip install gradio jinja2



In [24]:
import gradio as gr

def score_comment(comment, columns): # Pass the relevant columns as argument
    # comment=clean_text(comment)
    vectorized_comment = vectorizer([comment])
    # vectorized_comment = vectorizer([comment])
    results = model.predict(vectorized_comment)

    text = ''
    for idx, col in enumerate(columns): # Use the passed 'columns' argument
        text += '{}: {} percent\n'.format(col, round(results[0][idx]*100,2))

    return text

# Get the required columns from your DataFrame outside the function
relevant_columns = list(['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'])

interface = gr.Interface(
    fn=lambda comment: score_comment(comment, relevant_columns),  # Pass relevant_columns as argument to score_comment
    inputs=gr.Textbox(lines=2, placeholder='Comment to score'),
    outputs='text'

)

interface.launch(share=True,debug=True)

IMPORTANT: You are using gradio version 3.35.2, however version 4.44.1 is available, please upgrade.
--------
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://55681a395d9694aedc.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 866ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 142ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://55681a395d9694aedc.gradio.live


