In [1]:
!pip install requests beautifulsoup4 whois



In [27]:
import requests
import whois
from bs4 import BeautifulSoup
import re
from urllib.parse import urlparse

In [29]:
def is_suspicious_url(url):
    suspicious_patterns = [
        r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}",  # IP address in URL
        r"\.xyz",  # Certain TLDs often used by phishing sites
        r"\.ru",   # .ru is often associated with phishing
        r"login",  # Common word used in phishing sites
    ]
    for pattern in suspicious_patterns:
        if re.search(pattern, url):
            return True
    return False

In [31]:
def get_domain_info(url):
    domain = urlparse(url).netloc
    try:
        w = whois.whois(domain)
        if w.creation_date is None or (type(w.creation_date) == list and len(w.creation_date) == 0):
            return False  # New domain, possibly suspicious
        return True
    except Exception as e:
        return False  # Could not retrieve domain info, likely suspicious

In [35]:
def analyze_page(url):
    try:
        # Fetch the page content
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        # Look for phishing signs in the html tags(e.g: fake forms, excessive redirects)
        forms = soup.find_all('form')
        for form in forms:
            action = form.get('action', '')
            if 'login' in action or 'submit' in action:
                print("Suspicious form found in page!")
                return True
        return False
    except Exception as e:
        print(f"Error fetching page: {e}")
        return True

# Check if the URL is in PhishTank (API check)
def check_with_phishtank(url):
    phishtank_api_url = "https://checkurl.phishtank.com/checkurl/"
    headers = {"Content-Type": "application/x-www-form-urlencoded"}
    data = {"url": url, "format": "json", "app_key": "your_api_key_here"}
    response = requests.post(phishtank_api_url, data=data, headers=headers)
    result = response.json()
    if result.get('results', {}).get('in_databse', False):
        print("PhishTank: This URL is flagged as phishing.")
        return True
    else:
        print("PhishTank: This URL seems safe.")
        return False

# Final phishing link scanner function
def phishing_link_scanner(url):
    print(f"Analyzing URL: {url}")
    # Step 1: Check the URL pattern
    if is_suspicious_url(url):
        print("Suspicious URL pattern detected!")
        return True
    # Step 2: Check domain registration information
    if not get_domain_info(url):
        print("Suspicious domain registration detected!")
        return True
    # Step 3: Check URL with PhishTank
    if check_with_phishtank(url):
        return True
    
    # Step 4: Analyze the page for suspicious content
    if analyze_page(url):
        print("Suspicious content found on the page!")
        return True
    
    print("The URL seems safe.")
    return False

In [37]:
test_url = input("Enter URL to check: ")
result = phishing_link_scanner(test_url)
if result:
    print("Warning! This is a phishing attempt.")
else:
    print("The URL appears to be safe.")

Enter URL to check:  http://example.com


Analyzing URL: http://example.com
Suspicious domain registration detected!
