In [None]:
import tkinter as tk
import customtkinter as ctk
import pickle
import re
from urllib.parse import urlparse
import numpy as np

# Create the application window
app = ctk.CTk()
app.geometry("600x500")
app.title("Phishing URL Detection")

# Load the models
with open('svm_model.pkl', 'rb') as file:
    svm_model = pickle.load(file)
with open('xgb_model.pkl', 'rb') as file:
    xgb_model = pickle.load(file)
with open('rf_model.pkl', 'rb') as file:
    rf_model = pickle.load(file)
with open('knn_model.pkl', 'rb') as file:
    knn_model = pickle.load(file)

# Define the custom feature extraction functions
def count_special_chars(url):
    # Implement your logic to count special characters
    special_chars = re.findall(r'[!@#\$%\^&\*\(\)\-\+=]', url)
    return len(special_chars)

def check_url(url, words):
    # Check if URL contains a hyphen
    result = 0
    contains_hyphen = '-' in url
    
    # Check if URL contains any word from the list
    contains_word = any(re.search(rf'\b{word}\b', url) for word in words)
    
    # Return 1 if both conditions are met, else return 0
    if contains_hyphen and contains_word:
        result = 1
    else:
        result = 0
    return result

def extract_features(url):
    features = {}
    parsed_url = urlparse(url)
    domain = parsed_url.netloc
    
    # Extract features from the URL
    features['url_length'] = len(url)
    features['num_digits'] = sum(char.isdigit() for char in url)
    features['num_letters'] = sum(char.isalpha() for char in url)
    features['num_dots'] = url.count(".")
    features['url_depth'] = url.count("/")
    features['contains_https'] = 0 if "https" in url else 1
    features['contains_dash'] = 1 if "-" in url else 0
    features['num_subdomains'] = len(url.split('.')) - 2
    features['num_special_chars'] = count_special_chars(url)
    
    # Prefix and Suffix lengths
    try:
        if domain in url:
            parts = url.split(domain)
            features['prefix_length'] = len(parts[0])
            features['suffix_length'] = len(parts[1]) if len(parts) > 1 else 0
        else:
            features['prefix_length'] = 0
            features['suffix_length'] = 0
    except:
        features['prefix_length'] = 0
        features['suffix_length'] = 0
        
    return list(features.values())  # Return feature values as a list

# UI layout: Title
title = ctk.CTkLabel(app, text="Enter a URL to check for phishing:", font=("Arial", 24, "bold"))
title.pack(pady=20)

# UI layout: URL entry field
url_entry = ctk.CTkEntry(app, width=500)
url_entry.pack(pady=10)

# UI layout: Output label for overall result
output_label = ctk.CTkLabel(app, text="", font=("Arial", 18, "bold"), fg_color=("gray", "white"), corner_radius=8)
output_label.pack(pady=20)

# UI layout: Individual model result labels
svm_label = ctk.CTkLabel(app, text="SVM: ", font=("Arial", 18, "bold"))
svm_label.pack(pady=5)

xgb_label = ctk.CTkLabel(app, text="XGBoost: ", font=("Arial", 18, "bold"))
xgb_label.pack(pady=5)

rf_label = ctk.CTkLabel(app, text="Random Forest: ", font=("Arial", 18, "bold"))
rf_label.pack(pady=5)

knn_label = ctk.CTkLabel(app, text="KNN: ", font=("Arial", 18, "bold"))
knn_label.pack(pady=5)

# Function to check the URL against the models
def check_url_action():
    url = url_entry.get()
    
    if not url:
        output_label.configure(text="Please enter a URL!", text_color="red")
        return
    
    # Extract features from the URL
    features = extract_features(url)
    features = np.array([features])  # Convert to 2D array for model input
    
    # Make predictions using the loaded models
    svm_pred = svm_model.predict(features)[0]
    xgb_pred = xgb_model.predict(features)[0]
    rf_pred = rf_model.predict(features)[0]
    knn_pred = knn_model.predict(features)[0]

    # Display the result of each model
    svm_label.configure(text=f"SVM: {'Phishing' if svm_pred == 1 else 'Safe'}", text_color=("red" if svm_pred == 1 else "green"))
    xgb_label.configure(text=f"XGBoost: {'Phishing' if xgb_pred == 1 else 'Safe'}", text_color=("red" if xgb_pred == 1 else "green"))
    rf_label.configure(text=f"Random Forest: {'Phishing' if rf_pred == 1 else 'Safe'}", text_color=("red" if rf_pred == 1 else "green"))
    knn_label.configure(text=f"KNN: {'Phishing' if knn_pred == 1 else 'Safe'}", text_color=("red" if knn_pred == 1 else "green"))

    # Collect predictions and analyze
    predictions = np.array([svm_pred, xgb_pred, rf_pred, knn_pred])
    phishing_count = np.sum(predictions)  # Count how many models predict "phishing" (1)

    # Display overall results based on majority vote
    if phishing_count >= 3:
        output_label.configure(text="This URL is likely PHISHING!", text_color="red")
    else:
        output_label.configure(text="This URL seems SAFE.", text_color="green")

# UI layout: Check button
check_button = ctk.CTkButton(app, text="Check URL", command=check_url_action, width=200)
check_button.pack(pady=20)

# Run the application
app.mainloop()

invalid command name "139836289058880update"
    while executing
"139836289058880update"
    ("after" script)
invalid command name "139836053300864check_dpi_scaling"
    while executing
"139836053300864check_dpi_scaling"
    ("after" script)
