In [1]:
import gradio as gr
import pickle
import pytesseract
from PIL import Image
import numpy as np
from tensorflow.keras.models import load_model

# Ensure pytesseract is properly configured
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

# Load Random Forest model and vectorizer
with open("spam_model.pkl", "rb") as f:
    text_model = pickle.load(f)

with open("vectorizer.pkl", "rb") as f:
    vectorizer = pickle.load(f)

# Load CNN logo model (.h5 format)
logo_model = load_model("cnn_fake_logo_model.h5")

# Weights for text and logo analysis
w_text = 0.4  
w_logo = 0.6  

# Function to classify the uploaded ad
def classify_ad(ad_image):
    # Step 1: Extract text from the image using OCR
    extracted_text = pytesseract.image_to_string(ad_image)

    # Step 2: Vectorize the text
    transformed_text = vectorizer.transform([extracted_text])

    # Step 3: Classify text using Random Forest
    text_prediction = text_model.predict(transformed_text)[0]  # 1 (Fake) or 0 (Legitimate)
    text_prob = text_model.predict_proba(transformed_text)[0][1]  # Get probability of "Fake"
    text_result = f"Fake ({text_prob:.3f})" if text_prob >= 0.5 else f"Legitimate ({1 - text_prob:.3f})"

    # Step 4: Preprocess the image for logo classification
    if not isinstance(ad_image, Image.Image):
        ad_image = Image.fromarray(np.array(ad_image))
    ad_image = ad_image.resize((128, 128))  # Example resizing
    ad_image = np.array(ad_image) / 255.0  # Normalize the image
    ad_image = ad_image.reshape(1, 128, 128, 3)  # Add batch dimension

    # Step 5: Classify the logo using the CNN model
    logo_prediction = logo_model.predict(ad_image)
    logo_prob = logo_prediction[0][0]  # Extract probability of "Fake"
    logo_result = f"Fake ({logo_prob:.3f})" if logo_prob >= 0.5 else f"Legitimate ({1 - logo_prob:.3f})"

    # Step 6: Compute weighted average score
    final_score = (w_text * text_prob) + (w_logo * logo_prob)

    # Step 7: Final classification
    final_result = f"Fake ({final_score:.3f})" if final_score >= 0.5 else f"Legitimate ({1 - final_score:.3f})"

    return extracted_text, text_result, logo_result, final_result

# Gradio Interface
interface = gr.Interface(
    fn=classify_ad,
    inputs=gr.Image(label="Upload Advertisement"),
    outputs=[
        gr.Textbox(label="Extracted Text"),
        gr.Textbox(label="Text Prediction (With Probability)"),
        gr.Textbox(label="Logo Prediction (With Probability)"),
        gr.Textbox(label="Final Classification (With Probability)"),
    ],
    title="Fake Ad Scam Detection",
    description="Upload an ad image to detect if it's fake or real based on text and logo analysis using weighted decision-making. Each classification shows confidence probability."
)

# Launch the interface
interface.launch()




* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 538ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step
