In [None]:
import re
from urllib.parse import urlparse
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# ----------------------------------------
# Step 1: Feature Extraction Function
# ----------------------------------------

shorteners = ['bit.ly', 'tinyurl.com', 'goo.gl', 't.co', 'ow.ly', 'buff.ly', 'is.gd', 'cutt.ly']
suspicious_words = ['login', 'verify', 'secure', 'account', 'update', 'bank', 'free', 'click']

def extract_url_features(url):
    parsed = urlparse(url)
    domain = parsed.netloc.lower()
    path = parsed.path.lower()

    features = {
        'url_length': len(url),
        'has_ip': 1 if re.match(r'(http[s]?://)?\d{1,3}(\.\d{1,3}){3}', url) else 0,
        'has_https': 1 if parsed.scheme == 'https' else 0,
        'has_at': 1 if '@' in url else 0,
        'num_dots': url.count('.'),
        'has_dash': 1 if '-' in domain else 0,
        'num_subdomains': len(domain.split('.')) - 2 if len(domain.split('.')) > 2 else 0,
        'has_suspicious_word': 1 if any(word in url.lower() for word in suspicious_words) else 0,
        'is_shortened': 1 if any(short in domain for short in shorteners) else 0,
        'ends_with_slash': 1 if url.endswith('/') else 0,
    }

    return list(features.values())

# ----------------------------------------
# Step 2: Sample Dataset
# ----------------------------------------

data = [
    ("http://www.legitimatewebsite.com", 0),
    ("http://phishingsite.xyz/login", 1),
    ("https://secure.banking.com/verify", 1),
    ("http://192.168.1.100/update", 1),
    ("https://anotherlegitwebsite.org", 0),
    ("https://bit.ly/fakeupdate", 1),
    ("https://accounts.google.com", 0),
    ("http://freemoney.bogussite.com", 1),
    ("https://tinyurl.com/login-help", 1),
    ("http://my-bank-verification.com", 1)
]

urls, labels = zip(*data)
X = np.array([extract_url_features(url) for url in urls])
y = np.array(labels)

# ----------------------------------------
# Step 3: Split Dataset
# ----------------------------------------

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ----------------------------------------
# Step 4: Build Neural Network Model
# ----------------------------------------

model = Sequential([
    Dense(16, activation='relu', input_shape=(X.shape[1],)),
    Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=50, batch_size=2, validation_split=0.1, verbose=0)

# ----------------------------------------
# Step 5: Evaluate the Model
# ----------------------------------------

loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\n✅ Model Accuracy: {accuracy * 100:.2f}%")

# ----------------------------------------
# Step 6: Real-Time URL Prediction
# ----------------------------------------

def predict_url(url):
    features = np.array([extract_url_features(url)])
    prediction = model.predict(features)[0][0]
    if prediction > 0.5:
        print("⚠️ Warning: This URL might be phishing!")
    else:
        print("✅ This URL appears to be safe.")

# ----------------------------------------
# Step 7: Test Real-Time Input
# ----------------------------------------

# Try any URL here
user_url = input("\nEnter a URL to check: ")
predict_url(user_url)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



✅ Model Accuracy: 100.00%

Enter a URL to check: http://www.legitimatewebsite.com
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
