In [1]:
#import for parsing url
from urllib.parse import urlparse
import requests
import ssl
import socket
import whois
from datetime import datetime
import time
import csv
from tqdm import tqdm
import dns.resolver

In [2]:
#url features
def qty_dot_url(url):
    """Count the number of dots in the URL."""
    return url.count('.')

def qty_hyphen_url(url):
    """Count the number of hyphens in the URL."""
    return url.count('-')

def qty_slash_url(url):
    """Count the number of slashes in the URL."""
    return url.count('/')

def length_url(url):
    """Get the length of the URL."""
    return len(url)

In [3]:
#domain features
def qty_dot_domain(url):
    """Count the number of dots in the domain."""
    domain = urlparse(url).netloc
    return domain.count('.')

def qty_hyphen_domain(url):
    """Count the number of hyphens in the domain."""
    domain = urlparse(url).netloc
    return domain.count('-')

def qty_vowels_domain(url):
    """Count the number of vowels in the domain."""
    domain = urlparse(url).netloc.lower()
    vowels = "aeiou"
    return sum(1 for char in domain if char in vowels)

def domain_length(url):
    """Get the length of the domain."""
    domain = urlparse(url).netloc
    return len(domain)

In [4]:
#directory features
def qty_dot_directory(url):
    """Count the number of dots in the directory path."""
    path = urlparse(url).path
    return path.count('.')

def qty_hyphen_directory(url):
    """Count the number of hyphens in the directory path."""
    path = urlparse(url).path
    return path.count('-')

def qty_underline_directory(url):
    """Count the number of underscores in the directory path."""
    path = urlparse(url).path
    return path.count('_')

def qty_slash_directory(url):
    """Count the number of slashes in the directory path."""
    path = urlparse(url).path
    return path.count('/')

def qty_at_directory(url):
    """Count the number of '@' symbols in the directory path."""
    path = urlparse(url).path
    return path.count('@')

def qty_comma_directory(url):
    """Count the number of commas in the directory path."""
    path = urlparse(url).path
    return path.count(',')

def qty_plus_directory(url):
    """Count the number of '+' symbols in the directory path."""
    path = urlparse(url).path
    return path.count('+')

def qty_percent_directory(url):
    """Count the number of '%' symbols in the directory path."""
    path = urlparse(url).path
    return path.count('%')

def directory_length(url):
    """Get the length of the directory path."""
    path = urlparse(url).path
    return len(path)

In [5]:
#file features
def qty_dot_file(url):
    """Count the number of dots in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('.')

def qty_hyphen_file(url):
    """Count the number of hyphens in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('-')

def qty_underline_file(url):
    """Count the number of underscores in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('_')

def qty_slash_file(url):
    """Count the number of slashes in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('/')

def qty_at_file(url):
    """Count the number of '@' symbols in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('@')

def qty_and_file(url):
    """Count the number of '&' symbols in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('&')

def qty_comma_file(url):
    """Count the number of commas in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count(',')

def qty_percent_file(url):
    """Count the number of '%' symbols in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('%')

def file_length(url):
    """Get the length of the file path."""
    path = urlparse(url).path.split('/')[-1]
    return len(path)

In [6]:
#param based features
def qty_dot_params(url):
    """Count the number of dots in the query parameters."""
    params = urlparse(url).query
    return params.count('.')

def qty_and_params(url):
    """Count the number of '&' in the query parameters."""
    params = urlparse(url).query
    return params.count('&')

def params_length(url):
    """Get the length of the query parameters."""
    params = urlparse(url).query
    return len(params)

In [7]:
#network based features

def time_response(url):
    """Measure the response time of a URL in seconds."""
    domain = urlparse(url).netloc
    try:
        start = time.time()
        socket.gethostbyname(domain)
        return time.time() - start
    except:
        return -1  # Indicates failure

def domain_spf(url):
    """Check if the domain has an SPF record."""
    domain = urlparse(url).netloc
    try:
        answers = dns.resolver.resolve(domain, 'TXT')
        for rdata in answers:
            txt_record = rdata.to_text()
            if "v=spf1" in txt_record.lower():  # SPF records start with 'v=spf1'
                return 1  # SPF record exists
        return 0  # No SPF record found
    except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.Timeout):
        return -1  # DNS resolution failed


def asn_ip(url):
    """Get ASN of the domain's IP."""
    domain = urlparse(url).netloc
    try:
        ip = socket.gethostbyname(domain)
        import requests
        response = requests.get(f"https://api.hackertarget.com/aslookup/?q={ip}")
        return response.text.split()[0] if response.ok else -1
    except:
        return -1  # Indicates failure


def get_domain_dates(domain):
    """Fetches domain activation and expiration times using WHOIS."""
    try:
        domain_info = whois.whois(domain)

        activation_date = domain_info.creation_date
        expiration_date = domain_info.expiration_date

        if isinstance(activation_date, list):
            activation_date = activation_date[0]
        if isinstance(expiration_date, list):
            expiration_date = expiration_date[0]

        activation_timestamp = int(activation_date.timestamp()) if activation_date else -1
        expiration_timestamp = int(expiration_date.timestamp()) if expiration_date else -1

        return activation_timestamp, expiration_timestamp

    except Exception as e:
        print(f"WHOIS lookup failed: {e}")
        return -1, -1

def time_domain_activation(url):
    """Get domain activation time."""
    try:
        domain = urlparse(url).netloc
        activation, _ = get_domain_dates(domain)
        return activation
    except:
        return -1

def time_domain_expiration(url):
    """Get domain expiration time."""
    try:
        domain = urlparse(url).netloc
        _, expiration = get_domain_dates(domain)
        return expiration
    except:
        return -1

In [8]:
#dns based features

def qty_ip_resolved(url):
    """Get number of resolved IPs for the domain."""
    try:
        domain = urlparse(url).netloc
        return len(socket.gethostbyname_ex(domain)[2])
    except:
        return 0

def qty_nameservers(url):
    """Get number of nameservers."""
    domain = urlparse(url).netloc
    try:
        import dns.resolver
        return len(dns.resolver.resolve(domain, 'NS'))
    except:
        return 0

def qty_mx_servers(url):
    """Get number of MX (mail) servers."""
    domain = urlparse(url).netloc
    try:
        import dns.resolver
        return len(dns.resolver.resolve(domain, 'MX'))
    except:
        return 0


In [9]:
def ttl_hostname(url):
    """Get the TTL (Time-To-Live) value for the domain."""
    domain = urlparse(url).netloc
    try:
        answers = dns.resolver.resolve(domain, 'A')
        return answers.rrset.ttl
    except:
        return -1  # Indicates failure



def tls_ssl_certificate(url):
    """Check if the domain has a valid TLS/SSL certificate."""
    domain = urlparse(url).netloc
    try:
        ctx = ssl.create_default_context()
        with ctx.wrap_socket(socket.socket(), server_hostname=domain) as s:
            s.connect((domain, 443))
            cert = s.getpeercert()
            return 1 if cert else 0
    except:
        return 0  # No valid SSL certificate

def qty_redirects(url):
    """Get the number of redirects when accessing the URL."""
    try:
        response = requests.get(url, allow_redirects=True, timeout=5)
        return len(response.history)
    except:
        return -1  # Indicates failure

In [10]:
import dns.resolver

def get_spf_record(domain: str) -> str:
    try:
        answers = dns.resolver.resolve(domain, 'TXT')
    except Exception as e:
        return ""

    for rdata in answers:
        for txt_string in rdata.strings:
            record = txt_string.decode() if isinstance(txt_string, bytes) else txt_string
            if record.lower().startswith("v=spf1"):
                return record
    return ""


In [11]:
domain = "google.com"
spf = get_spf_record(domain)
if spf:
    print(f"SPF record for {domain}: {spf}")
else:
    print(f"No SPF record found for {domain}.")

No SPF record found for google.com.


In [12]:
# Sample URL for Testing
test_url = "https://www.example.com/path/to/page.html?param1=value1&param2=value2"

# Running all feature extraction functions
features = {
    "qty_dot_url": qty_dot_url(test_url),
    "qty_hyphen_url": qty_hyphen_url(test_url),
    "qty_slash_url": qty_slash_url(test_url),
    "length_url": length_url(test_url),
    "qty_dot_domain": qty_dot_domain(test_url),
    "qty_hyphen_domain": qty_hyphen_domain(test_url),
    "qty_vowels_domain": qty_vowels_domain(test_url),
    "domain_length": domain_length(test_url),
    "qty_dot_directory": qty_dot_directory(test_url),
    "qty_hyphen_directory": qty_hyphen_directory(test_url),
    "qty_underline_directory": qty_underline_directory(test_url),
    "qty_slash_directory": qty_slash_directory(test_url),
    "qty_at_directory": qty_at_directory(test_url),
    "qty_comma_directory": qty_comma_directory(test_url),
    "qty_plus_directory": qty_plus_directory(test_url),
    "qty_percent_directory": qty_percent_directory(test_url),
    "directory_length": directory_length(test_url),
    "qty_dot_file": qty_dot_file(test_url),
    "qty_hyphen_file": qty_hyphen_file(test_url),
    "qty_underline_file": qty_underline_file(test_url),
    "qty_slash_file": qty_slash_file(test_url),
    "qty_at_file": qty_at_file(test_url),
    "qty_and_file": qty_and_file(test_url),
    "qty_comma_file": qty_comma_file(test_url),
    "qty_percent_file": qty_percent_file(test_url),
    "file_length": file_length(test_url),
    "qty_dot_params": qty_dot_params(test_url),
    "qty_and_params": qty_and_params(test_url),
    "params_length": params_length(test_url),
    "time_response": time_response(test_url),
    "domain_spf": domain_spf(test_url),
    "asn_ip": asn_ip(test_url),
    "time_domain_activation": time_domain_activation(test_url),
    "time_domain_expiration": time_domain_expiration(test_url),
    "qty_ip_resolved": qty_ip_resolved(test_url),
    "qty_nameservers": qty_nameservers(test_url),
    "qty_mx_servers": qty_mx_servers(test_url),
    "ttl_hostname": ttl_hostname(test_url),
    "tls_ssl_certificate": tls_ssl_certificate(test_url),
    "qty_redirects": qty_redirects(test_url),
}

for feature, value in features.items():
    print(f"{feature}: {value}")



qty_dot_url: 3
qty_hyphen_url: 0
qty_slash_url: 5
length_url: 69
qty_dot_domain: 2
qty_hyphen_domain: 0
qty_vowels_domain: 4
domain_length: 15
qty_dot_directory: 1
qty_hyphen_directory: 0
qty_underline_directory: 0
qty_slash_directory: 3
qty_at_directory: 0
qty_comma_directory: 0
qty_plus_directory: 0
qty_percent_directory: 0
directory_length: 18
qty_dot_file: 1
qty_hyphen_file: 0
qty_underline_file: 0
qty_slash_file: 0
qty_at_file: 0
qty_and_file: 0
qty_comma_file: 0
qty_percent_file: 0
file_length: 9
qty_dot_params: 0
qty_and_params: 1
params_length: 27
time_response: 0.10490679740905762
domain_spf: -1
asn_ip: "42.106.162.171","38266","42.106.162.0/24","VIL-AS-AP
time_domain_activation: 808353000
time_domain_expiration: 1755037800
qty_ip_resolved: 2
qty_nameservers: 0
qty_mx_servers: 0
ttl_hostname: 33
tls_ssl_certificate: 1
qty_redirects: 0


In [None]:
import csv
from urllib.parse import urlparse
import requests
import ssl
import socket
import whois
import time
import dns.resolver

# -----------------------------
# URL Features
# -----------------------------
def qty_dot_url(url):
    """Count the number of dots in the URL."""
    return url.count('.')

def qty_hyphen_url(url):
    """Count the number of hyphens in the URL."""
    return url.count('-')

def qty_slash_url(url):
    """Count the number of slashes in the URL."""
    return url.count('/')

def length_url(url):
    """Get the length of the URL."""
    return len(url)

# -----------------------------
# Domain Features
# -----------------------------
def qty_dot_domain(url):
    """Count the number of dots in the domain."""
    domain = urlparse(url).netloc
    return domain.count('.')

def qty_hyphen_domain(url):
    """Count the number of hyphens in the domain."""
    domain = urlparse(url).netloc
    return domain.count('-')

def qty_vowels_domain(url):
    """Count the number of vowels in the domain."""
    domain = urlparse(url).netloc.lower()
    vowels = "aeiou"
    return sum(1 for char in domain if char in vowels)

def domain_length(url):
    """Get the length of the domain."""
    domain = urlparse(url).netloc
    return len(domain)

# -----------------------------
# Directory Features
# -----------------------------
def qty_dot_directory(url):
    """Count the number of dots in the directory path."""
    path = urlparse(url).path
    return path.count('.')

def qty_hyphen_directory(url):
    """Count the number of hyphens in the directory path."""
    path = urlparse(url).path
    return path.count('-')

def qty_underline_directory(url):
    """Count the number of underscores in the directory path."""
    path = urlparse(url).path
    return path.count('_')

def qty_slash_directory(url):
    """Count the number of slashes in the directory path."""
    path = urlparse(url).path
    return path.count('/')

def qty_at_directory(url):
    """Count the number of '@' symbols in the directory path."""
    path = urlparse(url).path
    return path.count('@')

def qty_comma_directory(url):
    """Count the number of commas in the directory path."""
    path = urlparse(url).path
    return path.count(',')

def qty_plus_directory(url):
    """Count the number of '+' symbols in the directory path."""
    path = urlparse(url).path
    return path.count('+')

def qty_percent_directory(url):
    """Count the number of '%' symbols in the directory path."""
    path = urlparse(url).path
    return path.count('%')

def directory_length(url):
    """Get the length of the directory path."""
    path = urlparse(url).path
    return len(path)

# -----------------------------
# File Features
# -----------------------------
def qty_dot_file(url):
    """Count the number of dots in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('.')

def qty_hyphen_file(url):
    """Count the number of hyphens in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('-')

def qty_underline_file(url):
    """Count the number of underscores in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('_')

def qty_slash_file(url):
    """Count the number of slashes in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('/')

def qty_at_file(url):
    """Count the number of '@' symbols in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('@')

def qty_and_file(url):
    """Count the number of '&' symbols in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('&')

def qty_comma_file(url):
    """Count the number of commas in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count(',')

def qty_percent_file(url):
    """Count the number of '%' symbols in the file path."""
    path = urlparse(url).path.split('/')[-1]
    return path.count('%')

def file_length(url):
    """Get the length of the file path."""
    path = urlparse(url).path.split('/')[-1]
    return len(path)

# -----------------------------
# Parameter Based Features
# -----------------------------
def qty_dot_params(url):
    """Count the number of dots in the query parameters."""
    params = urlparse(url).query
    return params.count('.')

def qty_and_params(url):
    """Count the number of '&' in the query parameters."""
    params = urlparse(url).query
    return params.count('&')

def params_length(url):
    """Get the length of the query parameters."""
    params = urlparse(url).query
    return len(params)

# -----------------------------
# Network Based Features
# -----------------------------
def time_response(url):
    """Measure the response time of a URL in seconds."""
    domain = urlparse(url).netloc
    try:
        start = time.time()
        socket.gethostbyname(domain)
        return time.time() - start
    except:
        return -1  # Indicates failure

def domain_spf(url):
    """Check if the domain has an SPF record."""
    domain = urlparse(url).netloc
    try:
        answers = dns.resolver.resolve(domain, 'TXT')
        for rdata in answers:
            txt_record = rdata.to_text()
            if "v=spf1" in txt_record.lower():
                return 1  # SPF record exists
        return 0  # No SPF record found
    except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.Timeout):
        return -1  # DNS resolution failed

def asn_ip(url):
    """Get ASN of the domain's IP."""
    domain = urlparse(url).netloc
    try:
        ip = socket.gethostbyname(domain)
        response = requests.get(f"https://api.hackertarget.com/aslookup/?q={ip}")
        return response.text.split()[0] if response.ok else -1
    except:
        return -1

def get_domain_dates(domain):
    """Fetches domain activation and expiration times using WHOIS."""
    try:
        domain_info = whois.whois(domain)
        activation_date = domain_info.creation_date
        expiration_date = domain_info.expiration_date
        if isinstance(activation_date, list):
            activation_date = activation_date[0]
        if isinstance(expiration_date, list):
            expiration_date = expiration_date[0]
        activation_timestamp = int(activation_date.timestamp()) if activation_date else -1
        expiration_timestamp = int(expiration_date.timestamp()) if expiration_date else -1
        return activation_timestamp, expiration_timestamp
    except Exception as e:
        print(f"WHOIS lookup failed: {e}")
        return -1, -1

def time_domain_activation(url):
    """Get domain activation time."""
    try:
        domain = urlparse(url).netloc
        activation, _ = get_domain_dates(domain)
        return activation
    except:
        return -1

def time_domain_expiration(url):
    """Get domain expiration time."""
    try:
        domain = urlparse(url).netloc
        _, expiration = get_domain_dates(domain)
        return expiration
    except:
        return -1

# -----------------------------
# DNS Based Features
# -----------------------------
def qty_ip_resolved(url):
    """Get number of resolved IPs for the domain."""
    try:
        domain = urlparse(url).netloc
        return len(socket.gethostbyname_ex(domain)[2])
    except:
        return 0

def qty_nameservers(url):
    """Get number of nameservers."""
    domain = urlparse(url).netloc
    try:
        return len(dns.resolver.resolve(domain, 'NS'))
    except:
        return 0

def qty_mx_servers(url):
    """Get number of MX (mail) servers."""
    domain = urlparse(url).netloc
    try:
        return len(dns.resolver.resolve(domain, 'MX'))
    except:
        return 0

def ttl_hostname(url):
    """Get the TTL (Time-To-Live) value for the domain."""
    domain = urlparse(url).netloc
    try:
        answers = dns.resolver.resolve(domain, 'A')
        return answers.rrset.ttl
    except:
        return -1

def tls_ssl_certificate(url):
    """Check if the domain has a valid TLS/SSL certificate."""
    domain = urlparse(url).netloc
    try:
        ctx = ssl.create_default_context()
        with ctx.wrap_socket(socket.socket(), server_hostname=domain) as s:
            s.connect((domain, 443))
            cert = s.getpeercert()
            return 1 if cert else 0
    except:
        return 0

def qty_redirects(url):
    """Get the number of redirects when accessing the URL."""
    try:
        response = requests.get(url, allow_redirects=True, timeout=5)
        return len(response.history)
    except:
        return -1

# -----------------------------
# Feature Extraction Wrapper
# -----------------------------
def extract_features(url):
    return {
        # URL features
        "qty_dot_url": qty_dot_url(url),
        "qty_hyphen_url": qty_hyphen_url(url),
        "qty_slash_url": qty_slash_url(url),
        "length_url": length_url(url),
        # Domain features
        "qty_dot_domain": qty_dot_domain(url),
        "qty_hyphen_domain": qty_hyphen_domain(url),
        "qty_vowels_domain": qty_vowels_domain(url),
        "domain_length": domain_length(url),
        # Directory features
        "qty_dot_directory": qty_dot_directory(url),
        "qty_hyphen_directory": qty_hyphen_directory(url),
        "qty_underline_directory": qty_underline_directory(url),
        "qty_slash_directory": qty_slash_directory(url),
        "qty_at_directory": qty_at_directory(url),
        "qty_comma_directory": qty_comma_directory(url),
        "qty_plus_directory": qty_plus_directory(url),
        "qty_percent_directory": qty_percent_directory(url),
        "directory_length": directory_length(url),
        # File features
        "qty_dot_file": qty_dot_file(url),
        "qty_hyphen_file": qty_hyphen_file(url),
        "qty_underline_file": qty_underline_file(url),
        "qty_slash_file": qty_slash_file(url),
        "qty_at_file": qty_at_file(url),
        "qty_and_file": qty_and_file(url),
        "qty_comma_file": qty_comma_file(url),
        "qty_percent_file": qty_percent_file(url),
        "file_length": file_length(url),
        # Parameter based features
        "qty_dot_params": qty_dot_params(url),
        "qty_and_params": qty_and_params(url),
        "params_length": params_length(url),
        # Network based features
        "time_response": time_response(url),
        "domain_spf": domain_spf(url),
        "asn_ip": asn_ip(url),
        "time_domain_activation": time_domain_activation(url),
        "time_domain_expiration": time_domain_expiration(url),
        # DNS based features
        "qty_ip_resolved": qty_ip_resolved(url),
        "qty_nameservers": qty_nameservers(url),
        "qty_mx_servers": qty_mx_servers(url),
        "ttl_hostname": ttl_hostname(url),
        "tls_ssl_certificate": tls_ssl_certificate(url),
        "qty_redirects": qty_redirects(url),
    }

def main(input_csv, output_csv):
    with open(input_csv, 'r', newline='', encoding='utf-8') as csv_in:
        reader = csv.DictReader(csv_in)
        if 'url' not in reader.fieldnames:
            print("Input CSV must have a column named 'url'.")
            return


        rows = list(reader)
        total = len(rows)
    
    header_written = False
    import os
    if os.path.exists(output_csv) and os.path.getsize(output_csv) > 0:
        header_written = True

    with open(output_csv, 'a', newline='', encoding='utf-8') as csv_out:
        writer = csv.writer(csv_out)
        if not header_written:
            first_row = rows[0]
            first_url = first_row['url']
            first_features = extract_features(first_url)
            feature_names = list(first_features.keys())
            header = ['url'] + feature_names
            writer.writerow(header)
            csv_out.flush()  

        for row in tqdm(rows, total=total, desc="Extracting features", unit="url"):
            url = row['url']
            features = extract_features(url)
            writer.writerow([url] + [features[fn] for fn in features])
            csv_out.flush()  

input_csv = "../Generation/synthetic_urls_markov.csv"
output_csv = "../Generation/extracted.csv"

if __name__ == '__main__':
    main(input_csv, output_csv)


Extracting features:   0%|          | 10/2000 [00:56<4:34:49,  8.29s/url]

WHOIS lookup failed: No match for "PL-OFERTA-ID2387942371.NET".
>>> Last update of whois database: 2025-04-11T09:55:46Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to 

Extracting features:   1%|          | 11/2000 [00:58<3:29:25,  6.32s/url]

WHOIS lookup failed: No match for "PL-OFERTA-ID2387942371.NET".
>>> Last update of whois database: 2025-04-11T09:55:46Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to 

Extracting features:   1%|          | 12/2000 [00:59<2:42:08,  4.89s/url]

WHOIS lookup failed: No match for "WEEBLYSITE.NET".
>>> Last update of whois database: 2025-04-11T09:55:46Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to a domain nam

Extracting features:   1%|          | 22/2000 [01:31<1:45:17,  3.19s/url]

WHOIS lookup failed: Domain not found.
>>> Last update of WHOIS database: 2025-04-11T09:56:41Z <<<

Terms of Use: Access to Public Interest Registry WHOIS information is provided to assist persons in determining the contents of a domain name registration record in the Public Interest Registry registry database. The data in this record is provided by Public Interest Registry for informational purposes only, and Public Interest Registry does not guarantee its accuracy. This service is intended only for query-based access. You agree that you will use this data only for lawful purposes and that, under no circumstances will you use this data to (a) allow, enable, or otherwise support the transmission by e-mail, telephone, or facsimile of mass unsolicited, commercial advertising or solicitations to entities other than the data recipient's own existing customers; or (b) enable high volume, automated, electronic processes that send queries or data to the systems of Registry Operator, a Registr

Extracting features:   1%|          | 23/2000 [01:33<1:38:20,  2.98s/url]

WHOIS lookup failed: Domain not found.
>>> Last update of WHOIS database: 2025-04-11T09:56:42Z <<<

Terms of Use: Access to Public Interest Registry WHOIS information is provided to assist persons in determining the contents of a domain name registration record in the Public Interest Registry registry database. The data in this record is provided by Public Interest Registry for informational purposes only, and Public Interest Registry does not guarantee its accuracy. This service is intended only for query-based access. You agree that you will use this data only for lawful purposes and that, under no circumstances will you use this data to (a) allow, enable, or otherwise support the transmission by e-mail, telephone, or facsimile of mass unsolicited, commercial advertising or solicitations to entities other than the data recipient's own existing customers; or (b) enable high volume, automated, electronic processes that send queries or data to the systems of Registry Operator, a Registr

Extracting features:   1%|▏         | 25/2000 [01:42<1:59:41,  3.64s/url]

WHOIS lookup failed: No match for "CL-POJD.COM".
>>> Last update of whois database: 2025-04-11T09:56:32Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to a domain name r

Extracting features:   1%|▏         | 26/2000 [01:45<1:48:44,  3.31s/url]

WHOIS lookup failed: No match for "CL-POJD.COM".
>>> Last update of whois database: 2025-04-11T09:56:48Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to a domain name r

Extracting features:   2%|▏         | 49/2000 [03:21<1:47:29,  3.31s/url]

WHOIS lookup failed: No match for "WEEBLYSITE.NET".
>>> Last update of whois database: 2025-04-11T09:58:17Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to a domain nam

Extracting features:   2%|▎         | 50/2000 [03:24<1:39:24,  3.06s/url]

WHOIS lookup failed: No match for "WEEBLYSITE.NET".
>>> Last update of whois database: 2025-04-11T09:58:17Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to a domain nam

Extracting features:   3%|▎         | 68/2000 [04:45<2:45:34,  5.14s/url]

WHOIS lookup failed: No match for "ESTAFETZM.NET".
>>> Last update of whois database: 2025-04-11T09:59:49Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to a domain name

Extracting features:   3%|▎         | 69/2000 [04:48<2:22:38,  4.43s/url]

WHOIS lookup failed: No match for "ESTAFETZM.NET".
>>> Last update of whois database: 2025-04-11T09:59:49Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to a domain name

Extracting features:   4%|▎         | 74/2000 [05:33<6:08:51, 11.49s/url]

WHOIS lookup failed: Domain not found.
>>> Last update of WHOIS database: 2025-04-11T10:00:44Z <<<

Terms of Use: Access to Public Interest Registry WHOIS information is provided to assist persons in determining the contents of a domain name registration record in the Public Interest Registry registry database. The data in this record is provided by Public Interest Registry for informational purposes only, and Public Interest Registry does not guarantee its accuracy. This service is intended only for query-based access. You agree that you will use this data only for lawful purposes and that, under no circumstances will you use this data to (a) allow, enable, or otherwise support the transmission by e-mail, telephone, or facsimile of mass unsolicited, commercial advertising or solicitations to entities other than the data recipient's own existing customers; or (b) enable high volume, automated, electronic processes that send queries or data to the systems of Registry Operator, a Registr

Extracting features:   4%|▍         | 79/2000 [06:23<5:12:05,  9.75s/url]

WHOIS lookup failed: No match for "RAINDROPSUARITMA.NET".
>>> Last update of whois database: 2025-04-11T10:01:18Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to a doma

Extracting features:   4%|▍         | 82/2000 [06:38<3:41:57,  6.94s/url]

WHOIS lookup failed: No match for "PUB-C49A4FB9B3024270B6B23.WIXSITE.COM".
>>> Last update of whois database: 2025-04-11T10:01:33Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or 

Extracting features:   7%|▋         | 135/2000 [12:56<5:02:34,  9.73s/url] 

WHOIS lookup failed: No match for "WEEBLYSITE.NET".
>>> Last update of whois database: 2025-04-11T10:07:48Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to a domain nam

Extracting features:   8%|▊         | 154/2000 [14:58<2:45:51,  5.39s/url]

WHOIS lookup failed: No match for "TICKETING-ATT.NET".
>>> Last update of whois database: 2025-04-11T10:09:48Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to a domain 

Extracting features:   8%|▊         | 155/2000 [15:00<2:20:20,  4.56s/url]

WHOIS lookup failed: No match for "TICKETING-ATT.NET".
>>> Last update of whois database: 2025-04-11T10:09:48Z <<<

NOTICE: The expiration date displayed in this record is the date the
registrar's sponsorship of the domain name registration in the registry is
currently set to expire. This date does not necessarily reflect the expiration
date of the domain name registrant's agreement with the sponsoring
registrar.  Users may consult the sponsoring registrar's Whois database to
view the registrar's reported date of expiration for this registration.

TERMS OF USE: You are not authorized to access or query our Whois
database through the use of electronic processes that are high-volume and
automated except as reasonably necessary to register domain names or
modify existing registrations; the Data in VeriSign Global Registry
Services' ("VeriSign") Whois database is provided by VeriSign for
information purposes only, and to assist persons in obtaining information
about or related to a domain 

Extracting features:   8%|▊         | 165/2000 [16:24<5:57:12, 11.68s/url]

WHOIS lookup failed: [Errno 11001] getaddrinfo failed
WHOIS lookup failed: [Errno 11001] getaddrinfo failed
