In [None]:
!pip install gradio -q

In [None]:
!pip install langdetect lime -q

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import io
from collections import defaultdict
import warnings
warnings.filterwarnings("ignore")
from langdetect import detect
import torch
from lime.lime_text import LimeTextExplainer
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification

# Codes

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Load the model and tokenizer
model_name = "Jannat24/finetuned_mbert_fakenews_bn_en"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2, output_attentions=True)

In [None]:
txt = "ছাত্র আন্দোলনের সময় ৩২০৪ জন পুলিশকে হত্যা করা হয়েছে- টাইমস ম্যাগাজিন।"
l = len(txt.split(" "))

In [None]:
pipeline_model = pipeline("text-classification", model=model, tokenizer=tokenizer, return_all_scores=True)
outputs = pipeline_model(txt)
print(outputs)

In [None]:
outputs[0]

In [None]:
logits_label = [output['label'] for output in outputs[0]]
logits_prob = [(round(output['score'],2)*100) for output in outputs[0]]
print(logits_label)
print(logits_prob)

In [None]:
# Define a prediction function for LIME
def predict_proba(texts):
    # Tokenize input texts
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
    # Move input tensors to the same device as the model
    inputs = {key: val.to(model.device) for key, val in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()
    return probs

# Initialize LIME Explainer
explainer = LimeTextExplainer(class_names=["Fake", "Real"])


In [None]:
exp = explainer.explain_instance(txt, predict_proba, num_features=int(l))

In [None]:
exp.show_in_notebook(text=True)

#For gradio interface

In [None]:
# Load the model and tokenizer
model_name = "Jannat24/finetuned_mbert_fakenews_bn_en_lang"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
model.eval()

# Prediction function for LIME
def predict_proba(texts):
    inputs = tokenizer(texts, return_tensors="pt", truncation=True, padding=True, max_length=512)
    inputs = {key: val.to(model.device) for key, val in inputs.items()}
    with torch.no_grad():
        outputs = model(**inputs)
    probs = torch.nn.functional.softmax(outputs.logits, dim=-1).cpu().numpy()
    return probs

# Function to generate LIME visualization and return as PIL image
def generate_lime_visualization(text):
    l = len(text.split(" "))
    pipeline_model = pipeline("text-classification", model=model, tokenizer=tokenizer, return_all_scores=True)
    outputs = pipeline_model(text)
    logits_prob = [(round(output['score'],2)*100) for output in outputs[0]]
    f = str(logits_prob[0])+" %"
    r = str(logits_prob[1])+" %"

    # Initialize LIME explainer
    explainer = LimeTextExplainer(class_names=["Fake", "Real"])
    exp = explainer.explain_instance(text, predict_proba, num_features=l)

    # Convert explanation to matplotlib figure
    fig = exp.as_pyplot_figure()
    # Save the figure to a buffer
    buf = io.BytesIO()
    fig.savefig(buf, format='png')
    buf.seek(0)
    plt.close(fig)
    # Convert buffer to PIL image
    pil_image = Image.open(buf)

    #list of words
    word_contributions = defaultdict(list)
    for word,contribution in exp.as_list():
      if contribution <= 0:
        word_contributions['Fake'].append((word, contribution))
      else:
        word_contributions['Real'].append((word, contribution))
    fake = [i for i,_ in word_contributions['Fake']]
    real = [i for i,_ in word_contributions['Real']]

    return f,r,fake,real,pil_image

# Define Gradio interface
def gradio_interface(text):
    return generate_lime_visualization(text)



In [None]:
gr_interface = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(lines=5, placeholder="Enter text to classify and visualize explanation."),
    outputs=[ gr.Textbox(label="Fake News Probability"),
        gr.Textbox(label="Real News Probability"),
        gr.Textbox(label="Fake Words"),
        gr.Textbox(label="Real Words"),
        gr.Image(type="pil",label="Visualization")],
    theme = gr.themes.Soft(),
    title="Bengali and English Languages Fake-news Identification",
    description="Enter text (Bengali or Engish news stories only)."
)

In [None]:
gr_interface.launch(debug=True)