In [6]:
import pandas as pd
import numpy as np
import re, math
from urllib.parse import urlparse
from collections import Counter
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import plotly.express as px
import gradio as gr

# Step 1: Feature Extraction
def extract_features(url):
    parsed = urlparse(url)
    features = {}
    features['url_length'] = len(url)
    features['https'] = 1 if parsed.scheme == 'https' else 0
    features['num_dots'] = parsed.netloc.count('.')
    suspicious_keywords = ['credit', 'card', 'secure', 'login', 'verify', 'account']
    features['suspicious_keywords'] = sum(1 for keyword in suspicious_keywords if keyword in url.lower())
    entropy = 0
    for char, count in Counter(url).items():
        p = count / len(url)
        entropy += -p * math.log2(p) if p > 0 else 0
    features['entropy'] = entropy
    features['has_ip'] = 1 if re.match(r'^\d+\.\d+\.\d+\.\d+', parsed.netloc) else 0
    features['special_chars'] = len(re.findall(r'[^a-zA-Z0-9.]', url))
    return features

# Step 2: Load or Train Model
try:
    model = joblib.load('phishing_detector.pkl')
except FileNotFoundError:
    # Fallback: train a default model
    data = {
        'url': [
            'https://secure.bank.com/login',
            'http://phishing-site.com/credit-card',
            'https://example.com',
            'http://malicious.site/verify',
            'https://legitimate-site.net'
        ],
        'label': [0, 1, 0, 1, 0]
    }
    df = pd.DataFrame(data)
    df_features = df['url'].apply(lambda x: pd.Series(extract_features(x)))
    X = df_features
    y = df['label']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    joblib.dump(model, 'phishing_detector.pkl')

# Global history for dashboard
history = {'predictions': []}

# Step 3: Prediction and Visualization
def predict_dashboard(url):
    features = extract_features(url)
    features_df = pd.DataFrame([features])
    proba = model.predict_proba(features_df)[0]
    labels = ['Legitimate', 'Phishing']
    pie_fig = px.pie(names=labels, values=proba, title='Prediction Probabilities')
    pred_index = int(np.argmax(proba))
    pred_label = labels[pred_index]
    history['predictions'].append(pred_label)
    count_series = pd.Series(history['predictions']).value_counts()
    bar_fig = px.bar(x=count_series.index, y=count_series.values, title='Prediction Counts',
                     labels={'x':'Prediction','y':'Count'})
    return pred_label, pie_fig, bar_fig

# Step 4: Create Gradio App
with gr.Blocks() as demo:
    gr.Markdown("## Phishing URL Detection App")
    with gr.Row():
        url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com/login")
        predict_btn = gr.Button("Predict")
    with gr.Row():
        label_output = gr.Textbox(label="Prediction Result")
    with gr.Tabs():
        with gr.Tab("Probability Distribution"):
            pie_output = gr.Plot()
        with gr.Tab("Dashboard"):
            bar_output = gr.Plot()
    predict_btn.click(fn=predict_dashboard,
                      inputs=[url_input],
                      outputs=[label_output, pie_output, bar_output])
demo.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://05eef00379955f7aa5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [4]:
pip install gradio

Collecting gradio
  Downloading gradio-5.29.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.0 (from gradio)
  Downloading gradio_client-1.10.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6

# New Section