In [97]:
#1-2. Web URL input and Feature Selection
print("Running web url input and Feature Selection")

import pandas as pd
import requests
from bs4 import BeautifulSoup
import whois
from datetime import datetime
import socket
import ssl
from urllib.parse import urlparse
import requests
from urllib3.exceptions import NewConnectionError, MaxRetryError
from requests.exceptions import ConnectionError
import time
import re

# Define headers to mimic a real browser request
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Referer': 'https://google.com',  # Optional

}

# Fetch URL with retries
def fetch_url(url, retries=2):
    for attempt in range(retries):
        try:
            response = requests.get(url, timeout=10)
            response.raise_for_status()  # Raise an HTTPError for bad responses (4xx, 5xx)
            return response.content
        except (requests.exceptions.RequestException, NewConnectionError, MaxRetryError, ConnectionError):
            # Suppress error details and retry
            time.sleep(2)  # Delay between retries
    # If all retries fail, return None
    return None

                
#------------------------------------------------------------------------------------------------------------------
                                             #Domain-based Feature
# Function to check SSL certificate
def check_ssl(domain):
    try:
        context = ssl.create_default_context()
        with socket.create_connection((domain, 443)) as sock:
            with context.wrap_socket(sock, server_hostname=domain) as secure_sock:
                return True
    except:
        return False
#------------------------------------------------------------------------------------------------------------------
                                           

                                           
#-------------------------------------------------------------------------------------------------------------------
                                               #URL-based Feature
# Check for IP address in URL
def contains_ip(url):
    ip_pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
    return bool(ip_pattern.search(url))

#--------------------------------------------------------------------------------------------------------------------
                                              #URL-based feature
# Compile the shortening services regex pattern
shortening_services_pattern = re.compile(r"bit\.ly|goo\.gl|shorte\.st|go2l\.ink|x\.co|ow\.ly|t\.co|tinyurl|tr\.im|is\.gd|cli\.gs|"
                      r"yfrog\.com|migre\.me|ff\.im|tiny\.cc|url4\.eu|twit\.ac|su\.pr|twurl\.nl|snipurl\.com|"
                      r"short\.to|BudURL\.com|ping\.fm|post\.ly|Just\.as|bkite\.com|snipr\.com|fic\.kr|loopt\.us|"
                      r"doiop\.com|short\.ie|kl\.am|wp\.me|rubyurl\.com|om\.ly|to\.ly|bit\.do|t\.co|lnkd\.in|db\.tt|"
                      r"qr\.ae|adf\.ly|goo\.gl|bitly\.com|cur\.lv|tinyurl\.com|ow\.ly|bit\.ly|ity\.im|q\.gs|is\.gd|"
                      r"po\.st|bc\.vc|twitthis\.com|u\.to|j\.mp|buzurl\.com|cutt\.us|u\.bb|yourls\.org|x\.co|"
                      r"prettylinkpro\.com|scrnch\.me|filoops\.info|vzturl\.com|qr\.net|1url\.com|tweez\.me|v\.gd|"
                      r"tr\.im|link\.zip\.net")

# Count special characters in URL
def count_special_chars(url):
    return int(sum(not c.isalnum() and c not in ['.', '-', '_', ':', '/', '?', '&', '=', '%'] for c in url))

# Check for URL shortening services using the provided regex pattern
def shortening_services(url):
    return bool(re.search(shortening_services_pattern, url))

#----------------------------------------------------------------------------------------------------------------------

# Extract features from URL
def extract_features(url):
    features = {}
    domain = urlparse(url).netloc
    #-----------------------------------------------------------------------------------------------------------------
                                              #URL-based features
    features['url_length'] = int(len(url))
    features['contains_ip'] = int(contains_ip(url))
    features['shortening_services'] = int(shortening_services(url))
    features['special_chars'] = int(count_special_chars(url))
    #-----------------------------------------------------------------------------------------------------------------
    content = fetch_url(url)
    if content:
        soup = BeautifulSoup(content, 'html.parser')
        #-------------------------------------------------------------------------------------------------------------
                                              #Content-based features
        features['html_length'] = int(len(content))
        features['js_length'] = sum(len(s.string) for s in soup.find_all('script') if s.string)
        features['num_links'] = len(soup.find_all('a'))
        features['num_forms'] = len(soup.find_all('form'))
        
       
        #-------------------------------------------------------------------------------------------------------------
    
        try:
            #---------------------------------------------------------------------------------------------------------
                                              #Domain-based features
            domain_info = whois.whois(domain)
            creation_date = domain_info.creation_date
            updated_date = domain_info.updated_date
            expiration_date = domain_info.expiration_date
            
            if isinstance(creation_date, list):
                creation_date = creation_date[0]
            if isinstance(updated_date, list):
                updated_date = updated_date[0]
            if isinstance(expiration_date, list):
                expiration_date = expiration_date[0]
            
            features['domain_age'] = (datetime.now() - creation_date).days if creation_date else None
        except Exception as e:
            print(f"Error fetching domain info for {domain}: {e}")
            features['domain_age'] = None

        features['has_ssl'] = 1 if int(check_ssl(domain)) else 0
        #------------------------------------------------------------------------------------------------------------------

        features.update(features)
    else:
        features.update({
            'html_length': 0,
            'js_length': 0,
            'num_links': 0,
            'num_forms': 0,
            'contains_ip': 0,
            'shortening_services': 0,
            'url_length': len(url),
            'special_chars': sum(not c.isalnum() and c not in ['.', '-', '_'] for c in url),
            'domain_age': None,
            'has_ssl': 0,
        })
    return features

Running web url input and Feature Selection


In [99]:
                                              #3. Feature Vector
print("Extract features running")
# Function to process a single URL
def process_url(url):
    try:
        features = extract_features(url)
        return {'url': url, **features}
    except Exception as e:
        print(f"Error processing URL: {url}. Error: {e}")
        return None


Extract features running


In [101]:
                             #5. Data Preprocessing - Randomly process 200 URLs for each label
import pandas as pd
import random
from concurrent.futures import ThreadPoolExecutor, as_completed

print("Data preprocessing running")
def preprocess_data(df, processed_df, n=1000):
    # Separate URLs by label (0 for legitimate, 1 for phishing)
    legitimate_urls = df[df['status'] == 0]['url'].tolist()
    phishing_urls = df[df['status'] == 1]['url'].tolist()
    
    # Filter out URLs that have already been processed
    already_processed_urls = set(processed_df['url'].tolist()) if not processed_df.empty else set()
    legitimate_urls = [url for url in legitimate_urls if url not in already_processed_urls]
    phishing_urls = [url for url in phishing_urls if url not in already_processed_urls]

    # Randomly sample 1000 URLs from each group (or fewer if not enough remain)
    legitimate_sample = random.sample(legitimate_urls, min(n, len(legitimate_urls)))
    phishing_sample = random.sample(phishing_urls, min(n, len(phishing_urls)))

    all_samples = legitimate_sample + phishing_sample
    results = []
    
    # Multithreading to process URLs faster
    with ThreadPoolExecutor(max_workers=50) as executor:
        futures = {executor.submit(process_url, url): url for url in all_samples}
        for future in as_completed(futures):
            url = futures[future]
            try:
                result = future.result()
                if result:  # Only append if the URL is successfully processed
                    results.append(result)
                    print(f"Processed URL: {url}")
            except Exception as e:
                print(f"Error processing URL: {url}. Error: {e}")

    # Convert results to DataFrame
    X = pd.DataFrame(results)
    
    # Ensure the corresponding labels match the processed URLs
    y = df.loc[df['url'].isin(X['url']), 'status'].values

    return X, y

Data preprocessing running


In [103]:
print("Loading Historical Dataset")
import pandas as pd
import numpy as np

# Load datasets
df = pd.read_csv('legitimate_and_phishing_urls.csv')

# Replace "legitimate" with 0 and "phishing" with 1
# Use np.where() to replace "legitimate" with 0 and "phishing" with 1
df['status'] = np.where(df['status'] == 'legitimate', 0, 1)

#remove duplicates
df = df.drop_duplicates(subset='url')  # Remove duplicates

# Assuming we have a dataframe 'processed_df' of already processed URLs, or create an empty one
processed_df = pd.DataFrame(columns=['url'])

# Preprocess the data (sample 1000 legitimate and 200 phishing URLs for feature extraction)
X, y = preprocess_data(df, processed_df, n=1000)

# At this point, `X` contains the feature set and `y` contains the corresponding labels for training
print(f"Feature matrix shape: {X.shape}")
print(f"Labels shape: {len(y)}")
# X and y can now be used for model training

Loading Historical Dataset


2024-09-14 08:46:32,709 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://www.bastelitis.de/
Processed URL: http://jordanpost.com.jo
Processed URL: http://everything.explained.today/%5C/cladism/
Processed URL: http://www.battingcagesofmn.com/batting-cages/
Processed URL: http://musinfo.ch
Processed URL: https://en.wikipedia.org/wiki/MultiMediaCard
Processed URL: https://www.torproject.org/download/download
Error fetching domain info for www.cloudaccess.net: No match for "WWW.CLOUDACCESS.NET".
>>> Last update of whois database: 2024-09-14T07:46:14Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://www.gamereactor.se/
Processed URL: https://en.m.wikipedia.org/wiki/House_Intelligence_Committee
Processed URL: http://www.yourdictionary.com/provenance
Processed URL: https://www.facebook.com/TheGraphicsFairy/
Processed URL: http://www.wellfloured.com/
Processed URL: https://www.facebook.com/DiscFamily/
Processed URL: https://www.taphunter.com/
Processed URL: https://www.cloudaccess.net/cloud-control-panel-ccp/157-dns-management/319-resolving-dns-http-redirect-issues.html
Processed URL: http://www.newworldencyclopedia.org/entry/CD
Processed URL: http://stackoverflow.com/questions/6260756/how-to-stop-javascript-foreach
Processed URL: https://en.wikipedia.org/wiki/List_of_CNN_personnel
Processed URL: https://www.youtube.com/channel/UCLSjw7g5U48jgtjaeyjfwjQ
Processed URL: https://www.newlifepca.org/
Processed URL: https://www.ovcmt.com/
Processed URL: http://zlsa.github.io/atc/
Processed URL: http://www.iconmeals.com/product-category/snacks/
Processed URL: https://w

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://www.merriam-webster.com/dictionary/DNA
Processed URL: https://www.sociuswonen.nl/
Processed URL: https://www.chinesisches-horoskop.de/
Processed URL: http://www.lissyara.su/doc/rfc/rfc1459/
Processed URL: https://www.visitislesofscilly.com/
Processed URL: https://en.wikipedia.org/wiki/Gateway
Processed URL: https://en.wikipedia.org/wiki/Lists_of_network_protocols
Processed URL: http://tvtropes.org/pmwiki/pmwiki.php/Characters/TheAmazingRace
Processed URL: https://en.wikipedia.org/wiki/Software_as_a_service
Processed URL: http://elm-hashtoom.blog.ir
Processed URL: http://dictionary.sensagent.com/Select%20or%20special%20committee%20(United%20States%20Congress)/en-en/
Processed URL: http://housing.blogfa.com
Processed URL: https://collections.ushmm.org/search/catalog/irn3499
Processed URL: https://www.facebook.com/ImpactEventsGroupInc
Processed URL: https://www.w3schools.com/jsreF/jsref_obj_date.asp
Processed URL: http://www.definitions.net/definition/STANDARD%20ERR

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.
2024-09-14 08:46:52,400 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://speedtest.xfinity.com/
Processed URL: http://resources.infosecinstitute.com/session-hijacking-cheat-sheet/
Processed URL: http://www.explainthatstuff.com/streamingmedia.html
Processed URL: http://www.medicalook.com/human_anatomy/systems/Peripheral_nervous_system.html
Processed URL: https://es.wikipedia.org/wiki/Lucha_libre_mexicana
Processed URL: https://beta.znipe.tv/
Processed URL: http://www.dafont.com/computerfont.font
Error fetching domain info for mifiboltrisman.blogspot.com: No match for "MIFIBOLTRISMAN.BLOGSPOT.COM".
>>> Last update of whois database: 2024-09-14T07:46:46Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's 

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://www.goodreads.com/book/show/973368.The_Noble_Eightfold_Path
Processed URL: https://coachballer.tumblr.com/#_=_
Processed URL: https://www.wolves.co.uk/
Processed URL: https://www.rodoviariadotiete.com/deonibus
Processed URL: http://www.kreezcraft.com/
Processed URL: https://www.gtc.edu/
Processed URL: https://developer.mozilla.org/en-US/docs/Web/API/SpeechSynthesis
Error fetching domain info for exoteambd.blogspot.com: No match for "EXOTEAMBD.BLOGSPOT.COM".
>>> Last update of whois database: 2024-09-14T07:47:01Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: Yo

2024-09-14 08:47:35,704 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out


Processed URL: http://thejockstrapenthusiast.tumblr.com
Processed URL: http://stackoverflow.com/questions/5081025/php-session-fixation-hijacking
Error fetching domain info for abouthappybirthday.blogspot.com: No match for "ABOUTHAPPYBIRTHDAY.BLOGSPOT.COM".
>>> Last update of whois database: 2024-09-14T07:47:17Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the 

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://lavieshopping.pt/
Processed URL: https://twitter.com/vh1
Processed URL: https://www.aikido.org.au/
Error fetching domain info for www.ungerandkowitt.com: [Errno 104] Connection reset by peer
Processed URL: http://www.xe.com/currency/nio-nicaraguan-cordobaError fetching domain info for www.allopsm.fr: [Errno 104] Connection reset by peer

Processed URL: http://en.rfwiki.org/wiki/Voltaire_(musician)
Processed URL: http://www.iconfilms.co.uk/productions
Processed URL: http://rua.pp.ua
Processed URL: http://www.ungerandkowitt.com
Processed URL: https://www.allopsm.fr/
Error fetching domain info for www.tomsguide.com: [Errno 104] Connection reset by peer


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://www.432player.com/
Processed URL: https://trv.no/
Processed URL: http://www.tomsguide.com/us/iphones-dont-need-antivirus-software,news-23111.html
Processed URL: http://www.hbs.edu/faculty/Publication%20Files/08-017_1903b556-786c-49fb-8e95-ab9976da8b4b.pdf
Processed URL: https://www.popmech.ru/
Error fetching domain info for spn3.narod.ru: No entries found for the selected source(s).

>>> Last update of WHOIS database: 2024.09.14T10:47:38Z <<<

Processed URL: https://www.youtube.com/user/FoxwoodsOfficial
Processed URL: http://sjec.edu.in
Processed URL: https://msdn.microsoft.com/ru-ru/library/mt238290.aspx
Processed URL: https://msdn.microsoft.com/en-us/library/aa480039.aspx
Processed URL: http://www.byggmakkerpluss.no/
Processed URL: https://www.xyplorer.com/
Processed URL: http://docwiki.cisco.com/wiki/Internetworking_Case_Studies_--_Using_the_Border_Gateway_Protocol_for_Interdomain_Routing
Error fetching domain info for www.littlerockstore.com: [Errno 104] Conn

2024-09-14 08:47:48,911 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out


Processed URL: http://www.whitesandlakeresort.com/
Processed URL: https://www.monster.com/career-advice/article/sample-resume-graphic-designer
Processed URL: http://spn3.narod.ru/nat.pdf
Error fetching domain info for www.usedvancouver.com: [Errno 104] Connection reset by peer
Processed URL: https://www.littlerockstore.com/
Processed URL: https://napavintners.com/vineyards/provenance-vineyards/
Processed URL: https://www.brainyquote.com/quotes/authors/v/voltaire.html
Processed URL: http://www.silvergames.com/en/traffic-collision-2
Processed URL: http://fisioterapiayterapiaocupacional.blogspot.com/
Processed URL: https://tools.ietf.org/html/2535
Processed URL: https://en.wikipedia.org/wiki/Batting_average
Processed URL: https://www.populuslive.com/
Processed URL: https://www.rottentomatoes.com/m/the_help/
Error fetching domain info for penjual-mimpi.blogspot.com: No match for "PENJUAL-MIMPI.BLOGSPOT.COM".
>>> Last update of whois database: 2024-09-14T07:47:32Z <<<

NOTICE: The expiratio

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: http://www.brighthub.com/electronics/home-theater/articles/111893.aspx
Processed URL: https://grid.ac/
Processed URL: http://www.definitions.net/definition/PERIPHERAL%20NEUROPATHY
Processed URL: https://www.youtube.com/watch?v=_f6k_UIr86E
Processed URL: http://www.larapedia.com/definition_and_meaning_of/portable_document_format_definition_and_meaning.html
Processed URL: http://www.autoeurope.com/go/car-rental-nice/
Processed URL: https://www.pinterest.com/explore/command-hooks/
Processed URL: https://thesession.org/sessions
Processed URL: https://www.dbs.com/in/index/default.page
Processed URL: https://www.providenceiscalling.jobs/
Processed URL: https://www.hivos.org/
Processed URL: https://www.pgweb.cz/
Processed URL: https://www.australia.com/en-in
Processed URL: http://www.victorpest.com/advice/rodent-library/house-mouse
Processed URL: http://www.passivehousecanada.com/
Processed URL: https://www.betclic.fr/ui/legislation/restricted
Processed URL: http://www.nocleans

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Error fetching domain info for ezoteryka-magia-okultyzm.blogspot.com: No match for "EZOTERYKA-MAGIA-OKULTYZM.BLOGSPOT.COM".
>>> Last update of whois database: 2024-09-14T07:47:46Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to 

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: http://prekese.dadeschools.net/-docs/ForTeachers/TeacherHandbook/esy/ESY%20Self-Help%20List.pdf
Processed URL: https://shitpostgenerator.tumblr.com/#_=_
Processed URL: http://www.siemon.com/us/white_papers/07-03-01-demystifying.asp
Processed URL: https://www.pasokon-syobun.com/


2024-09-14 08:48:10,377 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: https://www.mymajors.com/career/computer-network-architects/salary/
Processed URL: http://www.sjsu.edu/faculty/watkins/coase.htm
Processed URL: https://en.wikipedia.org/wiki/List_of_computer_simulation_software
Processed URL: http://encyclopedia.kids.net.au/page/im/Impact_event
Processed URL: http://www1.salary.com/Graphic-Design-Specialist-Salary.html
Processed URL: https://www.savetheboundarywaters.org/
Processed URL: http://www.Cisco.com/c/en/us/td/docs/voice_ip_comm/cuipph/7960g_7940g/sip/7_5/english/administration/guide/ver7_5/sipaxc75.pdf
Processed URL: https://www.hgshop.hr/
Processed URL: http://staff.um.edu.mt/albert.gatt/teaching/dl/biber93.pdf
Processed URL: http://www.prismo.ch/comparisons/
Processed URL: https://enredesao.wordpress.com/
Processed URL: http://www.thp.org/
Processed URL: http://www.greennet.org.uk/support/open-source-vs-proprietary-software
Processed URL: https://syonyk.blogspot.com/
Processed URL: http://www.my-car.info/
Processed URL: https:

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: http://www.guidetoonlineschools.com/graphic_design
Processed URL: http://www.rogerebert.com/reviews/bridge-of-spies-2015
Error fetching domain info for tipsserbaserbi.blogspot.com: No match for "TIPSSERBASERBI.BLOGSPOT.COM".
>>> Last update of whois database: 2024-09-14T07:48:02Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign 

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: http://designation.io/blog/digital-and-graphic-designer-salary/
Processed URL: http://www.softsea.com/review/Text-File-Splitter.html
Processed URL: http://www.thebodyguardmusical.com/
Processed URL: http://www.diffen.com/difference/Mouse_vs_Rat
Processed URL: http://learningenglish.voanews.com/a/google-hidden-games/3614935.html
Processed URL: http://www.rokuhelp.com/now-you-can-stream-discovery-family-of-networks-on-roku/
Processed URL: http://www.tech-faq.com/linksys-router-password.html
Processed URL: http://grammarist.com/usage/province-vs-provenance/
Processed URL: https://www.liftmaster.com/
Processed URL: http://www.doityourself.com/stry/the-advantages-and-disadvantages-of-using-a-category-6-cable
Processed URL: http://mtgcommander.net/
Processed URL: http://computer.howstuffworks.com/bittorrent1.htm
Processed URL: https://www.tdameritrade.com/research/bonds-and-cds/cd-center.page
Processed URL: http://dailynylon.tumblr.com
Processed URL: http://www.fotosearch.com/

2024-09-14 08:48:33,959 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: https://www.samysprints2go.com/
Processed URL: https://www.handshq.com/
Processed URL: http://hypertext-transfer-protocol-with-privacy.downloads.filetransit.com/
Processed URL: https://www.hd-bits.com/
Processed URL: http://www.3starmy.com/
Processed URL: http://www.choose-piano-lessons.com/piano-keys.html
Processed URL: http://www.payscale.com/research/US/Job=Software_Architect/Salary
Processed URL: https://openclipart.org/tags/computer%20mouse
Processed URL: https://books.google.com.et/books?id=KpY1hpKKwdQC&printsec=frontcover&hl=tr&source=gbs_atb


2024-09-14 08:48:35,209 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://www.clickntake.com/
Processed URL: http://www.filmandfriends.com/
Processed URL: http://www.gettysburg.com/bog/address.htm
Processed URL: http://www.newworldencyclopedia.org/entry/Cladistics
Processed URL: http://www.kkoworld.com/
Processed URL: http://chel.media/
Error fetching domain info for thetrumpetstone.blogspot.ru: No entries found for the selected source(s).

>>> Last update of WHOIS database: 2024.09.14T10:48:27Z <<<

Processed URL: http://www.microwavejournal.com/
Processed URL: http://www.retroporn.sexy/
Processed URL: http://thetrumpetstone.blogspot.ru/2011/01/lds-temple-baptismal-font-styles.html
Processed URL: https://www.astrologyonline.eu/Astro_MemoNew/Profilo.asp
Processed URL: https://www.liberopensiero.eu/
Processed URL: https://www.facebook.com/VH1
Processed URL: http://www.getactive.gr/
Processed URL: http://www.balloonsaloon.com/fun-mylar-balloon-shapes/mega-number-balloons.html
Processed URL: https://ahyesmysavedstudystuff.tumblr.com/#_=_
P

2024-09-14 08:48:42,670 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: https://plevenzapleven.bg/


2024-09-14 08:48:42,818 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out


Processed URL: https://cbgverzamelingen.nl/
Processed URL: http://photos.toofab.com/galleries/mickey_mouse_club_cast_then__now
Processed URL: http://www.oliopizzeria.com/
Processed URL: https://www.online-calculator.com/online-maths-calculator/
Processed URL: https://www.academiamusical.com.pt/
Error fetching domain info for sherizeee.blogspot.com: No match for "SHERIZEEE.BLOGSPOT.COM".
>>> Last update of whois database: 2024-09-14T07:48:33Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic proces

2024-09-14 08:48:46,986 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://nbc4i.com/
Processed URL: https://www.marketingprofs.com/charts/2012/8560/email-open-and-click-rates-benchmarks-trends
Processed URL: http://www.bakerypublishing.com
Processed URL: http://xian.95191.com
Processed URL: https://www.uratex.com.ph/
Processed URL: http://www.melllawrencearchitects.com/
Processed URL: http://www.brighthub.com/computing/hardware/articles/68502.aspx
Processed URL: http://en.academic.ru/dic.nsf/enwiki/4160994
Processed URL: https://www.metropolitandecor.com
Processed URL: http://www.ehow.com/how_5910412_make-fingerprint-jewelry.html
Processed URL: http://military.wikia.com/wiki/United_States_Pacific_Fleet
Processed URL: https://www.porndex.com/
Processed URL: http://www.a10.com/multiplayer-games
Processed URL: http://www.protocols.com/
Processed URL: http://creative-punch.net/2014/10/intro-html5-speech-synthesis-api/
Processed URL: http://www.tablefortwoblog.com/
Processed URL: https://simpliciaty-cc.tumblr.com/#_=_
Processed URL: http://p

2024-09-14 08:48:50,309 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: https://en.wikipedia.org/wiki/Router_table_(woodworking)
Processed URL: http://www.ondecksports.com/netting/batting-cages
Processed URL: http://www.investopedia.com/terms/t/transactioncosts.asp
Processed URL: https://www.dnalc.org/resources/3d/04-mechanism-of-replication-advanced.html
Processed URL: http://mintian.tumblr.com
Processed URL: http://list.ly/list/pXy-open-source-enterprise-architecture-modeling-tools
Processed URL: http://www.truli.com/


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://motosaigon.vn/
Processed URL: http://www.hotnewhiphop.com/kendrick-lamar-dna-prod-by-mike-will-made-it-new-song.1973694.html


2024-09-14 08:48:51,967 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://www.theskicenter.com/
Processed URL: http://www.hartzine.com/
Processed URL: https://wordpress.org/plugins/free-cdn/
Processed URL: http://www.apkhere.com/app/com.smartprojects.ramoptimization
Processed URL: https://carmenlafox.tumblr.com/#_=_
Error fetching domain info for www.yasminka.cz: Your connection limit exceeded. Please slow down and try again later.

Processed URL: http://www.pianonanny.com/
Processed URL: http://internetserviceprovidersnyc.weebly.com/
Error fetching domain info for www.cintac.cl: unsupported operand type(s) for -: 'datetime.datetime' and 'str'
Processed URL: https://www.mytello.com/de/
Processed URL: http://www.yasminka.cz/
Processed URL: http://www.wikihow.com/Build-a-Bridge-with-Popsicle-Sticks
Processed URL: https://www.tibbsandbones.com/
Processed URL: http://www.mgn.ru/
Processed URL: https://en.wikipedia.org/wiki/Multiprotocol_Label_Switching
Error fetching domain info for betioke.blogspot.com: No match for "BETIOKE.BLOGSPOT.COM".

2024-09-14 08:48:53,700 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://betioke.blogspot.com/
Processed URL: https://totohz.world.tmall.com/
Processed URL: http://cgi.csc.liv.ac.uk/~ped/COMP104/COMP104-2015-16/104-Set8-Files.pdf
Processed URL: https://www.cintac.cl/
Processed URL: https://baohiemxahoidientu.vn/
Processed URL: http://www.wikihow.com/Use-a-Computer-Mouse
Processed URL: http://dictionary.sensagent.com/Public%20address/en-en/
Processed URL: http://en.academic.ru/dic.nsf/enwiki/295527
Processed URL: https://www.ekupi.ba/
Processed URL: http://www.nikutronics.eu/home/
Processed URL: http://dorkdiaries.co.uk
Processed URL: http://www.moongiant.com/calendar/
Processed URL: http://www.smartinsights.com/lead-generation/marketing-automation/definition-marketing-automation/
Processed URL: https://acls.com/free-resources/knowledge-base/acute-coronary-syndrome/myocardial-infarction-prognosis-and-predictors-of-mortality
Processed URL: http://www.goodhousekeeping.com/life/entertainment/g2932/mickey-mouse-club-cast-members-then-and-no

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://estacaoxxx.wordpress.com/
Processed URL: http://www.spanishdict.com/translate/so%C3%B1ar
Processed URL: http://www.letras.org.es/
Processed URL: http://quiluc.com.vn
Processed URL: https://www.newegg.com/Product/ProductList.aspx?Description=USB%20Parallel%20port&Submit=ENE
Processed URL: https://www.chuv.ch/fr/chuv-home/
Processed URL: https://www.hrkgame.com/en/
Processed URL: http://www.veranda.com/luxury-lifestyle/news/g1492/dynasty-tv-show-cast-today/
Processed URL: http://www.wikihow.com/Display-Bookmarks-in-Chrome
Processed URL: https://www.newegg.com/Video-Cards-Video-Devices/Category/ID-38
Processed URL: http://www.developershome.com/sms/smsWindows.asp
Processed URL: http://tipsareforkids.com/port-and-starboard-game/
Processed URL: https://www.magnumelectronics.com/Default.asp
Processed URL: http://dictionary.sensagent.com/Tabloid/da-da/
Processed URL: http://www.ldoceonline.com/search/?q=provenance
Processed URL: http://www.dictionary.com/browse/compact-

2024-09-14 08:49:15,492 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known
Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://www.vincheckup.com/
Processed URL: http://www.globalsqa.com/
Processed URL: http://www.zeroenergydesign.com/passive%20solar%20cooling.html
Processed URL: https://www.volley.de/
Processed URL: http://yulearse.tumblr.com
Processed URL: http://www.fonearena.com/blog/110043/vh1-app-released-for-android-iphone-ipad-and-windows-phone.html
Processed URL: https://siotranculpdi.files.wordpress.com/2015/09/osi-model-for-dummies-video.pdf
Processed URL: https://www.researchgate.net/publication/271074231_The_Open_Source_Modelica_Project
Processed URL: http://www.dictionary.com/browse/lan
Processed URL: http://www.downloadmoreram.com/
Processed URL: https://www.churchandchapel.com/
Processed URL: https://www.law.cornell.edu/wex/incorporation_doctrine
Processed URL: https://file.al/?op=login
Processed URL: http://www.freesheetpianomusic.com/
Processed URL: http://www.cannonscombine.com/
Processed URL: https://www.tumblr.com/safe-mode?url=https%3A%2F%2Fbuttercup-queen.tumblr.co

2024-09-14 08:49:21,012 - whois.whois - ERROR - Error trying to connect to socket: closing socket - timed out


Processed URL: https://ru.wikipedia.org/wiki/Walnut_Creek_CDROM
Processed URL: http://www.quillpad.in/index.html
Processed URL: http://www.authorstream.com/Presentation/ramendrakumar-1210196-6631741-transport-layer-security-tls-session-resumption/
Processed URL: http://reaco.com.mx
Processed URL: http://www.imdb.com/title/tt3294732/
Processed URL: https://senoraorso.wordpress.com/
Processed URL: https://www.advancefamilyplanning.org/
Processed URL: http://www.dosbox.com/wiki/Software
Processed URL: https://www.computerhope.com/jargon/m/memory.htm
Processed URL: https://electronics.stackexchange.com/questions/20079/do-electromagnetic-shielding-stickers-do-anything
Processed URL: https://chicowhitaker.wordpress.com/
Processed URL: http://www.digital-scrapbooking-storage.com/compact-disc-storage.html
Processed URL: http://www.thegeekstuff.com/2009/06/how-to-upgrade-linksys-wireless-router-firmware/
Processed URL: http://www.sexinfo101.com/sp_bodyguard.shtml
Processed URL: http://everythin

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://www.analyticsvidhya.com/
Processed URL: http://cmapspublic2.ihmc.us/rid=1174448387125_1274109684_5724/Transaction%20Processing%20Cycle.cmap
Processed URL: https://www.facebook.com/imdbpro/
Processed URL: https://www.wunderground.com/us/ca/fontana
Processed URL: http://www.yourdictionary.com/menu-bar
Processed URL: https://www.craftbeer.com/styles/session-beer
Processed URL: https://www.enotes.com/homework-help/explain-advantages-disadvantages-open-source-465755
Processed URL: https://headcamatu.files.wordpress.com/2015/10/schema-cable-serial-null-modem.pdf
Processed URL: https://www.sappi.com/
Processed URL: http://www.google.co.id/patents/US6219419
Processed URL: https://www.brainyquote.com/quotes/keywords/passivity.html
Processed URL: http://www.inetservicescloud.com/knowledgebase/how-to-disable-ctrl-alt-del-in-centos6rhel6/
Processed URL: https://www.solvedassignmentaiou.com/
Processed URL: http://www.baubauhaus.com/
Processed URL: https://www.droneshop.com/
P

2024-09-14 08:49:38,500 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://d2rights.blogspot.com
Processed URL: https://www.aydinlik.com.tr/
Processed URL: http://www.urbandictionary.com/define.php?term=session%20beer
Processed URL: http://whatis.techtarget.com/definition/speech-synthesis
Processed URL: http://www.dcup.com/t/04/?nats=MjAwMzcwLjYyLjEuMTI3LjEuMC4wLjAuMA
Processed URL: https://www.aace.com
Processed URL: http://tips4pc.com/computer-repair/your-computer-is-low-on-memory-4-easy-solutions.htm
Processed URL: http://helpdeskgeek.com/help-desk/fix-base-system-device-not-found-in-device-manager/
Processed URL: https://discussions.apple.com/thread/1728171?start=0&tstart=0
Processed URL: http://dapeco.com.om
Processed URL: http://www.wikiwand.com/en/MultiMediaCard
Processed URL: https://www.stgeorge.com.au/personal/bank-accounts/transaction-accounts


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://www.peopleshost.com/
Processed URL: https://www.mathpapa.com/algebra-calculator.html
Processed URL: https://www.showplaceicon.com/


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://www.zero.de/
Processed URL: https://www.merriam-webster.com/dictionary/cladistics
Processed URL: http://search.kna.kw
Processed URL: http://www.telegraph.co.uk/news/uknews/4508682/Oliver-Cromwells-death-mask-for-sale.html
Error fetching domain info for chesaudade.blogspot.com: No match for "CHESAUDADE.BLOGSPOT.COM".
>>> Last update of whois database: 2024-09-14T07:49:32Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as 

2024-09-14 08:49:53,650 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 104] Connection reset by peer


Processed URL: http://www.dictionary.com/browse/standard-error
Processed URL: https://freedomfromnarcissisticandemotionalabuse.weebly.com/
Processed URL: http://www.topbbwporn.com/
Processed URL: https://www.toltyarnandwool.com/
Processed URL: https://www.opentable.ae/
Processed URL: http://www.iranhiv.com/
Processed URL: https://www.americanvan.com/
Processed URL: https://www.programsgulf.com/
Processed URL: https://twitter.com/impact_events
Processed URL: https://kb.juniper.net/library/CUSTOMERSERVICE/technotes/Junos_NAT_Examples.pdf
Processed URL: https://ncatlab.org/nlab/show/connected%20space
Processed URL: http://ecorporateoffices.com/SaveALot-825
Processed URL: http://www.guitaretab.com/j/justin-timberlake/371296.html
Processed URL: http://www.thefreedictionary.com/traffic
Processed URL: https://www.quora.com/How-useful-is-IMDbPro
Processed URL: http://syria.news
Processed URL: http://www.qtoysaustin.com/
Processed URL: http://www.gdgsoft.com/gSplit/
Processed URL: http://invest

2024-09-14 08:49:59,323 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://www.wolterskluwer.it:80/
Processed URL: http://www.imdiet.com/
Processed URL: https://www.sxsw.com/
Processed URL: https://www.download.hr/category-multimedia.html


2024-09-14 08:50:00,636 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://www.crestonwood.com/router.php
Processed URL: https://www.macrumors.com/2014/06/03/ios-8-peer-to-peer-airplay/
Processed URL: https://en.wikipedia.org/wiki/Provenance
Processed URL: http://www.kln.ac.lk/
Processed URL: http://divorcesupport.about.com/od/passiveaggressive/qt/What-Causes-Passive-Aggressive-Behavior.htm
Processed URL: http://www.sshic.com/
Processed URL: https://en.wikibooks.org/wiki/Windows_Programming/Device_Driver_Introduction
Processed URL: https://www.army.mil/info/organization/unitsandcommands/commandstructure/USARPAC
Processed URL: http://topmeaning.com/english/provenance
Processed URL: https://en.wikipedia.org/wiki/Linksys_routers
Processed URL: http://www.larapedia.com/glossary_of_plants_terms/provenance_meaning_in_plants_terminology.html
Processed URL: http://everything.explained.today/Internet_radio_device/
Processed URL: http://www.cmxcinemas.com/
Processed URL: http://www.elecdude.com/2012/12/avr-spi-serial-pheripheral-interface.html
Pro

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: http://en.academic.ru/dic.nsf/enwiki/137612
Processed URL: https://www.parfumuri-timisoara.ro/?SID=f79h0tdqocm3a6iehmn0easiv5


  k = self.parse_starttag(i)


Processed URL: http://dfat.gov.au/people-to-people/australia-awards/Pages/australia-awards-scholarships-opening-and-closing-dates.aspx
Processed URL: http://www.techfaq360.com/viewTutorial.jsp?tutorialId=326
Processed URL: http://www.cbs.com/shows/cbs_this_morning/
Processed URL: http://www.opticsplanet.com/cameras.html
Processed URL: http://www.whio.com/news/local/thief-breaks-into-save-lot-steals-steaks-dayton/dX1YgaOZ7zmj18bPd4tU6L/
Processed URL: https://www.abv.bg/
Error fetching domain info for www.vratnepenize.cz: Your connection limit exceeded. Please slow down and try again later.



Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Error fetching domain info for www.cuatrovientoscye.cl: unsupported operand type(s) for -: 'datetime.datetime' and 'str'
Processed URL: https://www.tm.org/
Processed URL: https://www.vratnepenize.cz/
Processed URL: http://www.seltmannundsoehne.de/
Processed URL: http://www.atpworldtour.com/en/tournaments/australian-open/580/overview
Processed URL: http://en.academic.ru/dic.nsf/enwiki/112147
Processed URL: https://www.cuatrovientoscye.cl/
Processed URL: http://www8.hp.com/us/en/thin-clients/index.html
Processed URL: https://www.elsevier.com/books/passive-optical-networks/lam/978-0-12-373853-0
Processed URL: https://www.mesenvies.fr/
Processed URL: http://spie.org/documents/publications/00%20step%20module%2008.pdf
Processed URL: http://www.historyvshollywood.com/reelfaces/bridge-of-spies/


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: http://www.zebrakeys.com/lessons/
Processed URL: https://onlineradiotuner.com/
Processed URL: https://togetalsond.files.wordpress.com/2015/10/usb-to-parallel-port-converter-schematics.pdf
Processed URL: http://www.imdb.com/Title?0096641
Error fetching domain info for teploseti.zp.ua: unsupported operand type(s) for -: 'datetime.datetime' and 'str'
Processed URL: http://teploseti.zp.ua
Processed URL: https://www.dssresearch.com/Solutions/StrategyResearchSolutionsGroup/MarketSegmentation/MarketSegmentationTechniques
Processed URL: http://navelec.com/products
Processed URL: http://www.masscool.com/massmedia/softloads2/planet-waves-patch-cable-kit.html
Processed URL: http://www.federteep.org
Error fetching domain info for chandeliercakes.blogspot.com: No match for "CHANDELIERCAKES.BLOGSPOT.COM".
>>> Last update of whois database: 2024-09-14T07:50:17Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registrat

2024-09-14 08:50:59,397 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://ch.prunauneau.fr/3/
Processed URL: http://www.alpha-east.de/wp-folder/folder/nw/demp.php
Processed URL: http://dlakupujacych.allegro.secfence.lesavik.net
Processed URL: http://snoweco.com/tinmb3.html
Processed URL: https://foresta-mod.firebaseapp.com
Processed URL: http://mdmplus.com.ph/uphty/
Processed URL: http://gsk-mozirje.si/wp-content/plugins/wpsecone/icscards.nl/
Processed URL: https://onedrive.live.com/?authkey=%21ADgDSBYlMqMJyCE&amp;cid=B209490283DB4B3D&amp;id=B209490283DB4B3D%21113&amp;parId=root&amp;o=OneUp
Processed URL: https://safirbetgiristikla.blogspot.com/
Processed URL: http://sloaneandhyde.com/imm/new2015/document.php
Processed URL: http://www.amberexpeditions.com/plugins/search/contacts/a.htm
Processed URL: http://donald-repare.droppages.com/
Processed URL: http://provinciabrescia2.babyloweb.eu/info
Processed URL: https://smradvocates.com/voicemail/
Processed URL: http://vilanovacenter.com/templates/yoo_bigeasy/janetyolo/
Processed URL: http://

2024-09-14 08:51:03,678 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 111] Connection refused


Processed URL: http://m.hf203.com/
Processed URL: http://het-impp11.com/servi-mail-pro-webb111928270383jksddkjsdjks191171927282827383322/b32eb9b4e80af65ebc22006e39e5db9d
Processed URL: https://quke9.com/wp-content/plugins/apikey/?qzw0iniuGYTRYwy$vwedbygVDRwydgCCYyueuuyywUBYVYUb*uWu
Processed URL: https://eastviewcobbs.com.au/wp-includes/js/plupload/mtb2020/onlinebankingmtb
Processed URL: http://www.theironinnparlour.co.uk/includes/PayPal/app/index
Processed URL: https://seachromelb-my.sharepoint.com/:b:/g/personal/dstevens_seachrome_com/ES9rj0pB2hROugeHWPyK7Y4BYbjrsC3MjqSuCqo2_7Y0ew
Processed URL: https://stage.cwport.com/
Processed URL: http://www.bcp.futbolfinanciero.com.pe/
Processed URL: https://neocellularparts.com/amazon.deapsignin/a/.a/amzon2020/a1b2c3/41c1477d772848edbcb357bf159c8b0d/login/
Processed URL: http://www.neudorf-ole.irv.cc/4B374AA12276932A9FA724E6142E9BD1?sec=Wolfgang%20Meyerle
Processed URL: http://mail.feesthut.be/~u43730p39181/pki-validation
Processed URL: http:/

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: http://flow-wrap-machines.com/wp-includes/IXR/helpdesk/ss/?email=nobody@mycraftmail.com
Processed URL: https://mynailtechnique.com/wp-conf10/am19/PortailAS/appmanager/PortailAS/amelipro/
Processed URL: http://yovcxm.com/chase/Chase/9854c60851ce4be37638aaaabfd0f96c
Processed URL: http://secure.informations.ninjamarketingman.com/view/login?cmd=_signin&dispatch=9309bc66c8512ec0ca3b82ab7&locale=en_
Processed URL: https://drive.google.com/file/d/1uX4SJKJ2TtKdl63pqON1jY6e4YX4fHR-/edit
Processed URL: http://pinnerx.com/abc/KIQ7VGFbZhUOJq2ghNbGSa6GhZQE5ehPX7WTNB5NTZBDqETHs5okfu1xtRcy6to4AlXoX3cQ2Nu/
Processed URL: http://shadetreetechnology.com/V4/validation/86a85b9ac67ed06c86d4dd56ae8b2b04
Processed URL: http://mailupgrade2info.site44.com/redirect.html
Processed URL: http://alareentading-catalog.page.tl/


2024-09-14 08:51:56,416 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://www.imcreator.com/viewer/vbid-fa0f29d5-fpsjmms8
Processed URL: http://htl.li/kL5q30q5utK
Processed URL: https://healthandwellnessgel.com/wp-content/languages/b2020123/1/
Processed URL: https://sum.vn/UUxKg?idtrack=H8pSVzTL
Processed URL: https://forms.office.com/Pages/ResponsePage.aspx?id=9mkL-UB4bEKSg-BMXXMbpMOBEAB-n85CvYZfHJASiU5URVdOQjA3Q1VRQlYyVEhTQU9VNTFHVElQTC4u
Processed URL: http://www.piandizano.it/public/map/
Processed URL: http://email.m-aut.ru/dhl/autodhl/dhl/deliveryform.php
Processed URL: http://oohlalasings.com/redir/MsgCentre/msgLists/?ID=nobody@mycraftmail.com
Processed URL: http://sertyxese.myfreesites.net/
Processed URL: http://d2882xyni5hwu8.cloudfront.net/
Processed URL: http://foliar.pl/admin/js/lib/bricks.php
Processed URL: https://ecb9547832.nxcli.net/images/alibaba/cmd-login=2fa960a1990a231b18bfe6368da9c911/copyright.php
Processed URL: http://pinnerx.com/abc/Si0BvtOxDKySD1hCvjnGehqjSKPmEX9xga8LDXigHR9kTqWoKk5YCwhug6QV40skaB5Nyo4fgdA
Proces

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Processed URL: https://vacomsystems.com/wp-content/uploads/2020/07/online/chase/home/
Processed URL: https://visitor.constantcontact.com/manage/update/genemail?v=001MDKVK-KV0oGx0m9pen7VfbkCi3JbQH6CxWt2KJR3Sl7ocoW5oVQfUngYVZBl8V8_DuuodSn8LwhJ811FPM6KwxlApqDEYy1OdOJi3CtrgF7zXkEVPVJFFNna-XA9HcE5vELxaugXXt4=
Processed URL: http://electricitypak.com/fakt/css/data/17/index.php
Processed URL: http://tabloupersonalizat.ro/admin/secure/T0RVM05ETTRORE0yTnpFPQ==/
Processed URL: https://support-appleld.com.secureupdate.duilawyeryork.com/ap/799c6043d8e6110/?cmd=_update&dispatch=799c6043d8e611069&locale=_US
Processed URL: https://sites.google.com/site/libretyreserve
Processed URL: http://quantgbo.lviv.ua/cli/limted/~/user/~/sing%20in%20/eroor/update/-/myaccount/signin/
Processed URL: https://20200724065829-dot-s2pe7ed9y.rj.r.appspot.com/office/index.php
Processed URL: http://www.stockedsummit.com/droms/freemobile.fr/d1a6452554707057e3a4e4e07af7b6d1/moncompte/
Processed URL: http://email.m-aut.ru/1/b

2024-09-14 08:52:25,094 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno 111] Connection refused


Processed URL: http://www.iscbankofamerica.net
Processed URL: https://secppal.xyz/
Processed URL: http://instagram.com.login.s5df46ss21s.oficinasylocalesfiore.com/
Processed URL: http://frosty-nobel-ca7f68.netlify.app/
Processed URL: http://profelectrik.ru/update/e96288debace15316498395a8aac112fZjZmYmEzOGU3NTYyZWVkZjJmNTk2YWY2YmZmZDk5ODM=/myaccount/websc_success/
Processed URL: http://87.138.95.150:8080/index.php
Processed URL: http://manguita.es/images/albums/index.html
Processed URL: https://www.shortlink.net/VERIFY-ATM
Processed URL: https://support-appleld.com.secureupdate.duilawyeryork.com/ap/63e1cdcdc423bfb/?cmd=_update&dispatch=63e1cdcdc423bfb87&locale=_
Processed URL: http://www.support-appleld.com.secureupdate.duilawyeryork.com/ap/b2bf3837b8e3e8b?cmd=_update&dispatch=b2bf3837b8e3e8b61&locale=_US
Processed URL: http://mlcrosoft-0nedrive-portal.el.r.appspot.com/logi.html
Processed URL: http://mymwebza.usite.pro/Email-LoginMywebhtml.htm
Processed URL: https://support-appleld.com.

2024-09-14 08:52:40,730 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://harcomputer.com/http/match
Processed URL: https://coliissimo-livraison.com/colissimo%20new/colissimo/account/loading.html?id=300000716523000091
Processed URL: http://atpscan.global.hornetsecurity.com/index.php?atp_str=iwbJSC5r_fvjexWtZ0APVWAaNFBb5w5RaWbWfiVG2tJ7l3sMCHkJIzo6IzYwZmRmNjNlOWYwMSM6OiOB3TRcMGuLLky4VNdgcmDW
Processed URL: http://attys.yolasite.com/
Processed URL: https://financiallifecoaching.builderallwp.com/wp-content/themes/skanda/?qzwoiniuGYTRYwysvwedbygVDRwydgCCYyueuuyywUBYVYUbbxuWu
Processed URL: http://qubectravel.com/bpm44/
Processed URL: http://igsign00.000webhostapp.com/login.html
Processed URL: http://fbproject4df3409fkl342ef043.el.r.appspot.com/wp-content
Processed URL: http://www.goodhopeservices.com/update/
Processed URL: https://support-appleld.com.secureupdate.duilawyeryork.com/ap/e909aa5608f4fea/?cmd=_update&dispatch=e909aa5608f4fea21&locale=_US
Processed URL: http://mne.edu.vn/wp-includes/Netflix/LoginID/index.php
Processed URL: http://

2024-09-14 08:53:17,964 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: https://eoexpo.sa/boa/bankofamerica/next3.php
Error fetching domain info for interbahiss1.blogspot.com: No match for "INTERBAHISS1.BLOGSPOT.COM".
>>> Last update of whois database: 2024-09-14T07:53:05Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign f

2024-09-14 08:54:46,990 - whois.whois - ERROR - Error trying to connect to socket: closing socket - [Errno -2] Name or service not known


Processed URL: http://www.peoplemakingplaces.com/includes/Support/En/log/signin/customer_center/customer-IDPP00C644/myaccount/signin/
Processed URL: http://www.stolizaparketa.ru/wp-content/themes/twentyfifteen/css/read/chinavali/index.php?email=abuse@fit-online.com
Processed URL: http://www.shadetreetechnology.com/V4/validation/ef27f396982a200c0e512f941f4653c2
Processed URL: http://y9o5m.codesandbox.io/
Processed URL: http://dscsc.lk/images/comunicado-pbb.com.br/
Processed URL: http://apple.com.icloud-lost.co/
Processed URL: http://piponi.com/58CEBC670A93888DE49B5C0EE755326F?sec=Michaela%20Fellenberg
Processed URL: http://marcoislandfurnitureanddesign.com/wp-includes/Text/Diff/Renderer/index.php
Processed URL: http://starmak.com.tr/950CAAEA0281AA2BEBED8F9ECCA4BD30/?sec=KnelsPatrick
Processed URL: https://worldativelion.my-free.website/
Processed URL: https://wh494528.ispot.cc/index.php?email=daesankh@hanmail.net
Processed URL: http://labanquepostale-ssm-voscomptepostalessl.com/
Process

In [104]:
                                       #5. Data Preprocessing
print("Running Data Preprocessing")

from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

# Preprocess features
processed_df = pd.DataFrame(X)
processed_df['status']= y
processed_df.head()

Running Data Preprocessing


Unnamed: 0,url,url_length,contains_ip,shortening_services,special_chars,html_length,js_length,num_links,num_forms,domain_age,has_ssl,status
0,http://www.bastelitis.de/,25,0,0,0,63203,3155,28,0,,0,0
1,http://jordanpost.com.jo,24,0,1,0,1686,405,0,1,,1,0
2,http://everything.explained.today/%5C/cladism/,46,0,0,7,0,0,0,0,,0,0
3,http://www.battingcagesofmn.com/batting-cages/,46,0,0,5,0,0,0,0,,0,1
4,http://musinfo.ch,17,0,0,3,0,0,0,0,,0,1


In [178]:
                                        #5. Data Preprocessing
# Remove rows with any NaN values in the dataframe
processed_df = processed_df.dropna()

# Check if there are any NaN values left (should print 0 for all columns)
print(processed_df.isnull().sum())

# Verify if any NaN values remain
print(f"Remaining NaN values: {processed_df.isna().sum().sum()}")  # This should print 0 if no NaNs are left
#save to csv file

processed_df.head()

url            0
url_length     0
html_length    0
js_length      0
num_links      0
num_forms      0
domain_age     0
has_ssl        0
status         0
dtype: int64
Remaining NaN values: 0


Unnamed: 0,url,url_length,html_length,js_length,num_links,num_forms,domain_age,has_ssl,status
0,https://www.torproject.org/download/download,44,29592,876,109,0,6541.0,1,0
1,https://www.gamereactor.se/,27,520543,99233,449,1,7623.0,1,0
2,https://en.m.wikipedia.org/wiki/House_Intellig...,60,128572,23253,292,1,8645.0,1,0
3,http://www.yourdictionary.com/provenance,40,276433,16364,130,1,9098.0,1,0
4,http://www.wellfloured.com/,27,163,0,0,0,3414.0,0,0


In [121]:
#Drop specified columns from the DataFrame
processed_df = processed_df.drop(columns=['contains_ip', 'shortening_services', 'special_chars'])
processed_df.head()

Unnamed: 0,url,url_length,html_length,js_length,num_links,num_forms,domain_age,has_ssl,status
0,https://www.torproject.org/download/download,44,29592,876,109,0,6541.0,1,0
1,https://www.gamereactor.se/,27,520543,99233,449,1,7623.0,1,0
2,https://en.m.wikipedia.org/wiki/House_Intellig...,60,128572,23253,292,1,8645.0,1,0
3,http://www.yourdictionary.com/provenance,40,276433,16364,130,1,9098.0,1,0
4,http://www.wellfloured.com/,27,163,0,0,0,3414.0,0,0


In [176]:
                                              #5. Data Preprocessing
#Seperate the legitimate represented as 0 and phishing representd as 1 dataframes
legit_df = processed_df[processed_df['status'] == 0].reset_index(drop=True)  # Rows with status == 0
phish_df = processed_df[processed_df['status'] == 1].reset_index(drop=True)    # Rows with status == 1
#concat the legitimate and phishing
# Concatenate legitimate_df and phishing_df
processed_df = pd.concat([legit_df, phish_df], ignore_index=True)


In [207]:
                              #6-7. Feature Scaling and Feature Selection

from imblearn.over_sampling import SMOTE

try:
    # Split data
    X = processed_df.drop(columns=['url', 'status'])
    y = processed_df['status']
except: 
    print('url and staus have been dropped and not found in the dataframe')
finally:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [127]:
                #8. Machine Learning Models such as RandomForest, Support Vector, and GradentBoosting

#Define classifiers
classifiers = {
    'RandomForest': RandomForestClassifier(),
    'SVM': SVC(),
    'GradientBoosting': GradientBoostingClassifier()
}

In [129]:
                                            #9. Hyperparameter Tuning
param_grids = {
    'RandomForest': {'n_estimators': [100, 200], 'max_depth': [10, 20]},
    'SVM': {'C': [0.1, 1, 10], 'kernel': ['rbf', 'linear']},
    'GradientBoosting': {'n_estimators': [100, 200], 'learning_rate': [0.01, 0.1]}
}

In [131]:
                              #10. Model selection, training and evaluation metrics
best_model = None
best_score = 0

# Model selection, training and evaluation
for name, clf in classifiers.items():
    print(f"Training {name}...")
    
    grid_search = GridSearchCV(clf, param_grids[name], cv=5, scoring='accuracy')
    grid_search.fit(X_train_scaled, y_train)
    
    model = grid_search.best_estimator_
    y_pred = model.predict(X_test_scaled)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    print(f"{name} - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}")
    
    if accuracy > best_score:
        best_score = accuracy
        best_model = model

Training RandomForest...
RandomForest - Accuracy: 0.563953488372093, Precision: 0.5802469135802469, Recall: 0.5340909090909091, F1 Score: 0.5562130177514792
Training SVM...
SVM - Accuracy: 0.4476744186046512, Precision: 0.40540540540540543, Recall: 0.17045454545454544, F1 Score: 0.24
Training GradientBoosting...
GradientBoosting - Accuracy: 0.5116279069767442, Precision: 0.5217391304347826, Recall: 0.5454545454545454, F1 Score: 0.5333333333333333


In [133]:
                                       #11. Final Model Selection
print(f"Best model: {best_model}")

Best model: RandomForestClassifier(max_depth=10)


In [140]:
                                       #12. Classification
def classify_url(url, model, scaler, feature_columns_order):
    features = extract_features(url)
    features_df = pd.DataFrame([features])
    # Ensure all required columns are present
    for col in feature_columns_order:
        if col not in features_df.columns:
            features_df[col] = 0
    
    features_df = features_df[feature_columns_order]
    
    # Remove any NaN values
    
    # Scale and predict
    features_scaled = scaler.transform(features_df)
    prediction = model.predict(features_scaled)
    print(prediction)
    return "Legitimate" if prediction == 0 else "Phishing"

In [191]:
# Enter new url
new_url = input("https://: ")


https://:  https://gmail.com


In [193]:
classification_result = classify_url(new_url, best_model, scaler, X.columns)
print(f"The URL '{new_url}' is classified as: {classification_result}")

[0]
The URL 'https://gmail.com' is classified as: Legitimate


In [209]:
#Import the model to use to create a web interface
import pickle

# Save the model
with open('model.pkl', 'wb') as model_file:
    pickle.dump(best_model, model_file)

In [211]:
# Save the scaler
with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)

In [215]:
X.to_csv('processed_data.csv', index=False)