In [20]:
import requests
import socket
import ipinfo
import ssl
from cryptography import x509
from cryptography.hazmat.backends import default_backend

ipinfo_api_key = '3f709cf966c756'


#### Gets the IP adress of the given website

In [25]:
def ip_address(website):
    if "://" in website:
        website = website.split("://")[1]

    parts = website.split("/")
    domain = parts[0]
    try:
        ip_address = socket.gethostbyname(domain)
        return (domain, ip_address)
    except socket.gaierror:
        return (domain, "Invalid domain or unable to resolve IP address")


In [24]:
ip_address("http://dsu.edu")

('dsu.edu', '23.99.192.132')

#### Gets the IP information of the given IP address

In [45]:
def ip_info(ip_address):
    handler = ipinfo.getHandler(ipinfo_api_key)

    details = handler.getDetails(ip_address)

 
    print(details.all)


In [46]:
ip_info("23.99.192.132")

{'ip': '23.99.192.132', 'city': 'Des Moines', 'region': 'Iowa', 'country': 'US', 'loc': '41.6005,-93.6091', 'org': 'AS8075 Microsoft Corporation', 'postal': '50307', 'timezone': 'America/Chicago', 'country_name': 'United States', 'isEU': False, 'country_flag_url': 'https://cdn.ipinfo.io/static/images/countries-flags/US.svg', 'country_flag': {'emoji': '🇺🇸', 'unicode': 'U+1F1FA U+1F1F8'}, 'country_currency': {'code': 'USD', 'symbol': '$'}, 'continent': {'code': 'NA', 'name': 'North America'}, 'latitude': '41.6005', 'longitude': '-93.6091'}


#### Gets the SSL certificate of the given IP adress

In [47]:
def get_ssl(hostname, port=443):
    
    context = ssl.create_default_context()
    conn = context.wrap_socket(socket.socket(socket.AF_INET, socket.SOCK_STREAM), server_hostname=hostname)
    conn.connect((hostname, port))

    certs = conn.getpeercert(True)
    return(x509.load_der_x509_certificate(certs, default_backend()))


In [30]:
get_ssl("dsu.edu")

<Certificate(subject=<Name(CN=dsu.edu)>, ...)>

#### Reverse DNS

In [53]:
def reverse_dns_lookup(ip_address):
    try:
        hostname, _, _ = socket.gethostbyaddr(ip_address)
        return hostname
    except socket.herror:
        return "No reverse DNS record found"

hostname = reverse_dns_lookup(ip_address)
print(f"Hostname: {hostname}")


Hostname: No reverse DNS record found


In [None]:
reverse_dns_lookup("23.99.192.132")

##### Get Records

In [48]:
def get_records(domain):
    results = {}
    ids = ['NONE', 'A', 'NS', 'MD', 'MF', 'CNAME', 'SOA', 'MB', 'MG', 'MR', 'NULL', 'WKS', 'PTR', 'HINFO', 'MINFO', 'MX', 'TXT', 'RP', 'AFSDB', 'X25', 'ISDN', 'RT', 'NSAP', 'NSAP-PTR', 'SIG', 'KEY', 'PX', 'GPOS', 'AAAA', 'LOC', 'NXT', 'SRV', 'NAPTR', 'KX', 'CERT', 'A6', 'DNAME', 'OPT', 'APL', 'DS', 'SSHFP', 'IPSECKEY', 'RRSIG', 'NSEC', 'DNSKEY', 'DHCID', 'NSEC3', 'NSEC3PARAM', 'TLSA', 'HIP', 'CDS', 'CDNSKEY', 'CSYNC', 'SPF', 'UNSPEC', 'EUI48', 'EUI64', 'TKEY', 'TSIG', 'IXFR', 'AXFR', 'MAILB', 'MAILA', 'ANY', 'URI', 'CAA', 'TA', 'DLV']
    
    for a in ids:
        try:
            answers = dns.resolver.resolve(domain, a)
            results[a] = [rdata.to_text() for rdata in answers]
        except Exception as e:
            continue
    
    return results

In [52]:
get_records("dsu.edu")

{}

#### Sitemap Parser

In [34]:
from urllib import parse, robotparser, request
import re
import json

def get_sitemaps(website):
    robotstxturl = parse.urljoin(website, "robots.txt")
    try:
        rp = robotparser.RobotFileParser()
        rp.set_url(robotstxturl)
        rp.read()
        sitemaps = rp.site_maps()
    except robotparser.RobotFileParserError as e:
        print(f"error: {e}")
    except Exception as e:
        print(f"Error: {e}")
        
    return sitemaps

def sitemap_parser(sitemap):
    r = request.urlopen(sitemap)
    xml = r.read().decode('utf8')
    elements = re.findall(r'<loc>(.*?)<\/loc>', xml, re.DOTALL)

    urls = []

    for element in elements:
        if element.endswith('.xml'):
            urls.extend(sitemap_parser(element))  # Recursively call sitemap_parser
        else:
            urls.append(element)

    return urls

def main():
    url = "https://dsu.edu/"
    sitemaps = get_sitemaps(url)
    
    all_urls = []
    
    for sitemap in sitemaps:
        all_urls.extend(sitemap_parser(sitemap))

    urls_json = json.dumps(all_urls, indent=4)
    
    print(urls_json)

if __name__ == "__main__":
    main()


[
    "https://dsu.edu/academics/majors-degrees.html",
    "https://dsu.edu/directory/degroot-aj.html",
    "https://dsu.edu/directory/spars-gail.html",
    "https://dsu.edu/directory/sewell-christina.html",
    "https://dsu.edu/directory/albers-shelly.html",
    "https://dsu.edu/directory/heflin-christy.html",
    "https://dsu.edu/directory/keyman-dayton.html",
    "https://dsu.edu/directory/stover-leslie.html",
    "https://dsu.edu/directory/graves-russell.html",
    "https://dsu.edu/directory/dececchi-alex.html",
    "https://dsu.edu/directory/kaabi-jihene.html",
    "https://dsu.edu/directory/spencer-daniel.html",
    "https://dsu.edu/directory/janke-megan.html",
    "https://dsu.edu/directory/lawson-evan.html",
    "https://dsu.edu/directory/randall-mark.html",
    "https://dsu.edu/directory/dehaai-rylan.html",
    "https://dsu.edu/directory/janusiak-ronald.html",
    "https://dsu.edu/directory/guo-peng.html",
    "https://dsu.edu/directory/schroeder-justin.html",
    "https://dsu