In [1]:
!pip install gradio --upgrade



In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

model = tf.keras.models.load_model('drive/MyDrive/BiLSTMTrainedModel/BiLSTM.keras')

with open('drive/MyDrive/TrainedTokenizer/tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)

# Show the model architecture
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 300)         14297400  
                                                                 
 bidirectional (Bidirection  (None, 512)               1140736   
 al)                                                             
                                                                 
 dense (Dense)               (None, 8)                 4104      
                                                                 
Total params: 15442240 (58.91 MB)
Trainable params: 1144840 (4.37 MB)
Non-trainable params: 14297400 (54.54 MB)
_________________________________________________________________


In [4]:
!pip install neattext



In [5]:
import neattext.functions as nfx
import pandas as pd
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import nltk
nltk.download('wordnet')
nltk.download('punkt')

lemmatizer = WordNetLemmatizer()
tknzr = nltk.tokenize

def lemmatize_text(text):
    return [lemmatizer.lemmatize(w) for w in tknzr.word_tokenize(text)]

def preprocessing(Input):
  corpus = pd.DataFrame([[Input, 1]], columns=["value","index"])
  corpus = corpus["value"].astype(str)
  corpus = corpus.apply(nfx.remove_stopwords)
  corpus = corpus.apply(nfx.remove_special_characters)
  corpus = corpus.apply(nfx.remove_multiple_spaces)
  corpus = corpus.str.lower()
  corpus = corpus.apply(lemmatize_text)

  return corpus

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [6]:
def predict(Input):
  input = preprocessing(Input)
  input = tokenizer.texts_to_sequences(input)
  input = pad_sequences(input,maxlen= 300,padding='post', truncating='post')

  return model.predict(input)[0]

In [20]:
import numpy as np

categories = ["EMPOWERED VOICES","ENVIRONMENT","GENERAL","LIFESTYLE AND WELLNESS","MISC","PARENTING AND EDUCATION","SPORTS AND ENTERTAINMENT","WORLDNEWS"];


def output(predictedOutput):

  predicted_class = np.argmax(predictedOutput)

  return f"Category: {categories[predicted_class]} Probablity: {predictedOutput[predicted_class]}"


In [28]:
import altair as alt
import numpy as np
import pandas as pd

def radialChart(predictedOutput):

  print(len(predictedOutput))
  print(len(categories))
  source = pd.DataFrame({"values": predictedOutput,"labels": categories})

  base = alt.Chart(source).encode(
    theta=alt.Theta("values:Q", stack=True),
    radius=alt.Radius("values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20)),
    color="values:N",
  )

  c1 = base.mark_arc(innerRadius=20, stroke="#fff")

  c2 = base.mark_text(radiusOffset=10).encode(text="labels")

  return c1 + c2

In [9]:
def predictTextToPrediciton(Input):

  return output(predict(Input))


In [10]:
def predictTextToProbablityDist(Input):

  return radialChart(predict(Input).ravel())

In [32]:
import gradio as gr

examples = [
    "The earth is getting warmer. The raise of global warming!",
    "10 tips to improve your lifestyle as a software developer.",
    "Americans proset black lives matter due gang related violance.",
]

ttp_demo = gr.Interface(
    fn=predictTextToPrediciton,
    inputs="text",
    outputs="text",
    title=None,
    examples=examples,
    description="Please give an input to get a news category with the use of BiLSTM.",
)

plot = gr.Plot()
ttpd_demo = gr.Interface(
    fn =predictTextToProbablityDist ,
    title=None,
    inputs="text",
    outputs=plot,
    examples=examples,
    description="Please give an input to get the probablity distribution with the use of BiLSTM.",
)

demo = gr.TabbedInterface([ttp_demo, ttpd_demo], ["Text-to-Prediction", "Text-to-Probability-Distribution"])

if __name__ == "__main__":
    demo.launch(debug=True)




Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://eeaf5c8f949bb11cf6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


8
8
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7861 <> https://eeaf5c8f949bb11cf6.gradio.live
