In [1]:
import pickle
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf

# Load the model, tokenizer, and label encoder
model = tf.keras.models.load_model("model.h5")

with open("tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

with open("label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)


In [2]:
def predict_url(url):
    max_length = 150
    # Preprocess the input URL
    sequence = tokenizer.texts_to_sequences([url])
    padded_sequence = pad_sequences(sequence, maxlen=max_length)

    # Make the prediction using the trained model
    prediction_proba = model.predict(padded_sequence)[0][0]

    # Convert the prediction to a label
    label = "phishing" if prediction_proba > 0.5 else "legitimate"

    # Calculate the percentage of prediction
    prediction_percentage = prediction_proba * 100 if label == "phishing" else (1 - prediction_proba) * 100

    return label, prediction_percentage

In [12]:
    
# Test the function with a URL
input_url = "https://www.amazon.com"
#input_url = "http://2.bad-phishing-site.com"  # Replace with the URL you want to test
result, prediction_percentage = predict_url(input_url)
print(f"The URL '{input_url}' is classified as {result} with a {prediction_percentage:.2f}% confidence.")

The URL 'https://www.amazon.com' is classified as legitimate with a 99.97% confidence.
