In [11]:

import re
from sklearn.preprocessing import StandardScaler
import joblib

# Assuming the model and scaler are already trained and available
# If you saved them, load them first:
# model = joblib.load('model.pkl')
# scaler = joblib.load('scaler.pkl')

model = joblib.load('/content/drive/MyDrive/PHISHING_MODEL/model.pkl')
scaler = joblib.load('/content/drive/MyDrive/PHISHING_MODEL/scaler.pkl')

def extract_features(url):
    features = {
        'url_length': len(url),
        'has_ip': int(bool(re.search(r'(\d{1,3}\.){3}\d{1,3}', url))),
        'has_at_symbol': int('@' in url),
        'has_hyphen': int('-' in url),
        'num_dots': url.count('.'),
        'has_https': int('https' in url),
        'count_www': url.count('www'),
        'count_slash': url.count('/'),
        'count_percent': url.count('%'),
    }
    return list(features.values())

# Sample URL to test
sample_url = "https://google.com"

# Step 1: Extract features from the sample URL
sample_features = extract_features(sample_url)

# Step 2: Scale the features using the same scaler
sample_features_scaled = scaler.transform([sample_features])  # Note: scaler expects a list of feature vectors

# Step 3: Predict using the trained model
prediction = model.predict(sample_features_scaled)
prediction_proba = model.predict_proba(sample_features_scaled)  # Optional: Get probability scores

# Step 4: Output the result
print(f"Sample URL: {sample_url}")
print(f"Prediction: {'phishing' if prediction[0] == 1 else 'legit'}")
print(f"Probability of being phishing: {prediction_proba[0][1]*100:.2f}%")

Sample URL: https://google.com
Prediction: legit
Probability of being phishing: 0.14%
