In [20]:
import re
import requests
import whois
import tldextract
from urllib.parse import urlparse

#  Check if the URL contains an IP address
def has_ip_address(url):
    parsed_url = urlparse(url)
    return bool(re.match(r'\d+\.\d+\.\d+\.\d+', parsed_url.netloc))

#  Check if the URL is using a shortening service
shorteners = ["bit.ly", "goo.gl", "tinyurl.com", "t.co", "ow.ly", "is.gd", "buff.ly"]
def is_shortened(url):
    return any(service in url for service in shorteners)

#  Check for "@" symbol (used in phishing to redirect)
def has_at_symbol(url):
    return "@" in url

#  Check if domain has a hyphen (often used in phishing)
def has_hyphen(url):
    domain_info = tldextract.extract(url)
    return "-" in domain_info.domain

#  Check if the site is using HTTPS
def is_https(url):
    parsed_url = urlparse(url)
    return parsed_url.scheme == "https"

#  Check number of subdomains
def subdomain_count(url):
    domain_info = tldextract.extract(url)
    return len(domain_info.subdomain.split(".")) if domain_info.subdomain else 0

#  Check WHOIS data (age and expiration of domain)
def get_domain_age(url):
    try:
        domain_info = whois.whois(url)
        if isinstance(domain_info.creation_date, list):
            creation_date = domain_info.creation_date[0]
        else:
            creation_date = domain_info.creation_date

        if isinstance(domain_info.expiration_date, list):
            expiration_date = domain_info.expiration_date[0]
        else:
            expiration_date = domain_info.expiration_date

        if creation_date and expiration_date:
            domain_age = (expiration_date - creation_date).days
            return domain_age
        return 0  # If unable to calculate, assume low trust
    except Exception:
        return 0  # WHOIS lookup failed

# Check number of redirects
def check_redirects(url):
    try:
        response = requests.get(url, timeout=5)
        return len(response.history)
    except requests.RequestException:
        return 0  # If the request fails, assume no redirects

# Rule-based phishing detectiondef check_url(url):
    score = 0
    score += 2 if has_ip_address(url) else 0
    score += 2 if is_shortened(url) else 0
    score += 2 if has_at_symbol(url) else 0
    score += 2 if has_hyphen(url) else 0
    score += 1 if subdomain_count(url) > 2 else 0
    score += 2 if check_redirects(url) > 2 else 0
    score -= 2 if is_https(url) else 0  # HTTPS is a good sign
    score -= 2 if get_domain_age(url) > 365 else 0  # Domains older than a year are safer

    #  Determine result based on score
    if score >= 5:
        return "🚨 PHISHING URL (High Risk)"
    elif score >= 3:
        return "⚠️ SUSPICIOUS URL (Moderate Risk)"
    else:
        return "✅ SAFE URL (Low Risk)"

# ✅ User Input for URL Scanning
user_url = input("🔍 Enter URL to scan: ")
print(check_url(user_url))


🔍 Enter URL to scan: https://chatgpt.com/c/67c83df9-47dc-8004-b1f3-e7b7a8eb39ef
✅ SAFE URL (Low Risk)
