In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install gradio --quiet


In [None]:
!pip install pymupdf --quiet

In [None]:
! pip install gradio PyMuPDF transformers torch

In [None]:
import gradio as gr
import torch
import torch.nn.functional as F
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import fitz  # PyMuPDF

# Load both models
arabic_model_path = "/kaggle/input/ara_model/flax/default/1"
english_model_path = "/kaggle/input/eng_model/flax/default/1"

arabic_tokenizer = DistilBertTokenizer.from_pretrained(arabic_model_path)
arabic_model = DistilBertForSequenceClassification.from_pretrained(arabic_model_path)
arabic_model.eval()

english_tokenizer = DistilBertTokenizer.from_pretrained(english_model_path)
english_model = DistilBertForSequenceClassification.from_pretrained(english_model_path)
english_model.eval()

# Extract text from PDF
def extract_text_from_pdf(file):
    doc = fitz.open(stream=file.read(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Prediction function
def predict(language, input_type, text, file):
    if input_type == "PDF" and file is not None:
        text = extract_text_from_pdf(file)
    elif input_type == "Text":
        text = text
    else:
        return "Please provide a valid input."

    if language == "Arabic":
        tokenizer = arabic_tokenizer
        model = arabic_model
    else:
        tokenizer = english_tokenizer
        model = english_model

    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = F.softmax(outputs.logits, dim=1)
        confidence, prediction = torch.max(probs, dim=1)
        label = "Human" if prediction.item() == 0 else "AI"
        result = f"Prediction: {label} ({confidence.item()*100:.2f}%)"
        return result

# Feedback collection
def collect_feedback(feedback):
    return f"Thanks for your feedback: {feedback}"

with gr.Blocks() as demo:
    gr.Markdown("# AI vs Human Text Detector")

    language = gr.Radio(["Arabic", "English"], label="Select Language", value="English")
    input_type = gr.Radio(["Text", "PDF"], label="Choose Input Type", value="Text")
    
    text_input = gr.Textbox(label="Enter your text", visible=True)
    pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"], visible=False)
    
    result_output = gr.Textbox(label="Prediction Result", interactive=False)
    feedback_button = gr.Button("Provide Feedback", visible=False)
    feedback_box = gr.Textbox(label="Your Feedback", visible=False)
    feedback_result = gr.Textbox(label="", visible=False)

    submit = gr.Button("Submit")

    def toggle_inputs(input_choice):
        return (
            gr.update(visible=input_choice == "Text"),
            gr.update(visible=input_choice == "PDF")
        )
    
    input_type.change(fn=toggle_inputs, inputs=input_type, outputs=[text_input, pdf_input])

    def on_submit(language, input_type, text_input, pdf_input):
        result = predict(language, input_type, text_input, pdf_input)
        return result, gr.update(visible=True), gr.update(visible=True)

    submit.click(fn=on_submit, inputs=[language, input_type, text_input, pdf_input],
                 outputs=[result_output, feedback_button, feedback_box])

    feedback_button.click(lambda: gr.update(visible=True), None, feedback_box)
    feedback_box.submit(fn=collect_feedback, inputs=feedback_box, outputs=feedback_result)

demo.launch()
