In [133]:
import gradio as gr
import cv2
from transformers import pipeline
import pickle
import numpy as np
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load Models and their Tokenizers

### GRU & Glove Model

In [35]:
gru_model = load_model('best_Glove_model.keras')

In [36]:
with open('tokenizer_GRU.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

In [64]:
max_len = 150

### RandomForest Model

In [39]:
with open('random_forest_model.pkl', 'rb') as f:
    rf_model = pickle.load(f)

In [42]:
with open('tfidf_vectorizer.pkl', 'rb') as f:
    tfidf = pickle.load(f)

### Transformer Model

In [90]:
transformer_tokenizer = AutoTokenizer.from_pretrained(r"E:\NLP\content\fake_news_model")
transformer_model = AutoModelForSequenceClassification.from_pretrained(r"E:\NLP\content\fake_news_model")
transformer_model.eval()

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [88]:
max_len = 150

# Preprocessing & Prediction Random Forest

In [66]:
def preprocessing_rf(text):
    text = re.sub(r'https?://\S+|www\.\S+', ' ', text)
    text = re.sub(r'<.*?>',' ', text)
    text = re.sub(r'[^a-zA-Z]',' ', text)
    text = re.sub(r'\s+',' ', text).strip()
    tokens = word_tokenize(text)
    tokens = [t.lower() for t in tokens]
    stop_words = set(stopwords.words('english'))
    tokens = [w for w in tokens if w not in stop_words]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(w) for w in tokens]
    return " ".join(tokens)

In [68]:
def predict_rf(text):
    processed = preprocessing_rf(text)
    features = tfidf.transform([processed])
    pred_prob = rf_model.predict_proba(features)[0][1]

    if pred_prob >= 0.5:
        label = "Fake"
        percentage = pred_prob * 100
    else:
        label = "Real"
        percentage = (1 - pred_prob) * 100
    
    return f"{label} ({percentage:.2f}%)"

# Preprocessing & Prediction GRU & Glove Model

In [70]:
def preprocessing_gru(text):
    text = text.lower()
    text = re.sub(r'https?://\S+|www\.\S+', '', text)    
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'[^a-z\s]', ' ', text)    
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [80]:
def predict_gru(text):
    processed = preprocessing_gru(text)
    sequence = tokenizer.texts_to_sequences([processed])
    X_input = pad_sequences(sequence, maxlen=max_len)
    pred_prob = float(gru_model.predict(X_input)[0][0])

    if pred_prob >= 0.5:
        label = "Fake"
        percentage = pred_prob * 100
    else:
        label = "Real"
        percentage = (1 - pred_prob) * 100

    return f"{label} ({percentage:.2f}%)"

# Preprocessing & Prediction DistilBert (Transformer)

In [92]:
def preprocessing_transformer(text):
    text = text.lower()
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'[^a-z\s]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [94]:
def predict_transformer(text):
    processed = preprocessing_transformer(text)
    inputs = transformer_tokenizer(processed, padding="max_length", truncation=True, max_length=max_len, return_tensors="pt")
    
    with torch.no_grad():
        outputs = transformer_model(**inputs)
        logits = outputs.logits
        probs = torch.softmax(logits, dim=1)[0]
    
    pred_prob = float(probs[1]) 

    if pred_prob >= 0.5:
        label = "Fake"
        percentage = pred_prob * 100
    else:
        label = "Real"
        percentage = (1 - pred_prob) * 100

    return f"{label} ({percentage:.2f}%)"

# Sentiment

In [136]:
sentiment_pipeline = pipeline("sentiment-analysis",model="cardiffnlp/twitter-roberta-base-sentiment",top_k=3)
label_mapping = {"LABEL_0": -1.0, "LABEL_1": 0.0, "LABEL_2": 1.0}

config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development





Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


vocab.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Device set to use cpu


In [153]:
def get_sentiment_score(text):
    results = sentiment_pipeline(text)
    positivity = 0.0
    for label_score in results[0]:
        label = label_score['label']
        score = label_score['score']
        if label in label_mapping:
            positivity += label_mapping[label] * score
    positivity = max(-1.0, min(1.0, positivity))
    return positivity

# Procedural Smiley

In [324]:
def sentiment_label_from_score(score: float) -> str:
    if score > 0.3:
        return "Positive 🙂"
    elif score < -0.3:
        return "Negative 😢"
    else:
        return "Neutral 😐"

In [320]:
def create_sentiment_image(positivity: float, image_size: tuple[int, int]=(200,200)) -> np.ndarray:
    width, height = image_size
    frame = np.zeros((height, width, 4), dtype=np.uint8)

    color_outline = (0, 0, 0, 255)
    thickness_outline = max(2, min(image_size) // 30)

    center = (width // 2, height // 2)
    radius = min(image_size) // 2 - thickness_outline

    cv2.circle(frame, center, radius, color_outline, thickness_outline)

    eye_radius = radius // 6
    eye_offset_x = radius // 3
    eye_offset_y = radius // 4
    eye_left = (center[0] - eye_offset_x, center[1] - eye_offset_y)
    eye_right = (center[0] + eye_offset_x, center[1] - eye_offset_y)

    cv2.circle(frame, eye_left, eye_radius, color_outline, -1)
    cv2.circle(frame, eye_right, eye_radius, color_outline, -1)

    mouth_width = radius
    mouth_height = radius // 3
    mouth_offset_y = radius // 3
    mouth_center_y = center[1] + mouth_offset_y

    t = np.linspace(-1, 1, 100)
    y = positivity * (t ** 2)
    if positivity == 0:
        y[:] = 0

    pts = np.array([
        (
            int(center[0] + tx * mouth_width // 2),
            int(mouth_center_y - ty * mouth_height)
        )
        for tx, ty in zip(t, y)
    ], dtype=np.int32).reshape((-1, 1, 2))

    cv2.polylines(frame, [pts], isClosed=False, color=color_outline, thickness=thickness_outline)

    return frame


# Choose the model

In [326]:
def predict_model_live(model_choice, text):
    if text.strip() == "":
        return " ", np.zeros((200,200,3), dtype=np.uint8), " "

    if model_choice == "RandomForest":
        news_pred = predict_rf(text)
    elif model_choice == "GRU+GloVe":
        news_pred = predict_gru(text)
    elif model_choice == "Transformer":
        news_pred = predict_transformer(text)
    else:
        news_pred = "Select a model"

    score = get_sentiment_score(text)
    smiley_img = create_sentiment_image(score)
    sentiment_label = sentiment_label_from_score(score)

    return news_pred, smiley_img, sentiment_label

In [328]:
iface = gr.Interface(
    fn=predict_model_live,
    inputs=[
        gr.Dropdown(["RandomForest", "GRU+GloVe", "Transformer"], label="Choose Model"),
        gr.Textbox(label="Enter your text here")
    ],
    outputs=[
        gr.Textbox(label="Prediction"),
        gr.Image(label="Sentiment Smiley"),
        gr.Textbox(label="Sentiment Label")
    ],
    live=False
)

iface.launch()

* Running on local URL:  http://127.0.0.1:7900

To create a public link, set `share=True` in `launch()`.


