diff --git a/README.md b/README.md index 38a0348..dccc8d5 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,7 @@

-Static Badge Static Badge - +Static Badge Static Badge Static Badge

> Attention! DPULSE is a research tool. It is not intended for criminal activities! Use DPULSE only on allowed domains and for legal purposes! @@ -58,7 +57,7 @@ DPULSE is a software solution for conducting OSINT research in relation to a cer - CPEs, used web-technologies and so on - It also can download sitemap.xml and robots.txt files from a domain -2. ***PageSearch standard scan:*** extended subdomains deep search function, which starts in addition to basic scan and which can find: +2. ***PageSearch scan:*** extended subdomains deep search function, which starts in addition to basic scan and which can find: - more e-mail addresses - API keys - exposed passwords @@ -67,18 +66,17 @@ DPULSE is a software solution for conducting OSINT research in relation to a cer - documents, config files, databases files (and PageSearch can download them!) - specified words by user in PDF files -3. ***PageSearch Sitemap inspection scan:*** sitemap links crawler which starts in addition to basic scan and which can find even more e-mails - -4. ***Dorking scan:*** extended domain research function with prepared Google Dorking databases for different purposes, such as: +3. ***Dorking scan:*** extended domain research function with prepared Google Dorking databases for different purposes, such as: - IoT dorking - files dorking - admin panels dorking - web elements dorking - Moreover, this mode allows you to create your own custom Google Dorking database -6. ***API scan:*** extended domain research function with prepared functions for 3rd party APIs usage. Currently DPULSE supports these API: +4. ***API scan:*** extended domain research function with prepared functions for 3rd party APIs usage. Currently DPULSE supports these API: - VirusTotal API (for brief domain information gathering) - SecurityTrails API (deep subdomains and DNS enumeration) + - HudsonRock API (for querying a database with exposed computers which were compromised through global info-stealer campaigns) Finally, DPULSE compiles all found data into an easy-to-read HTML or XLSX report by category. It also saves all information about scan in local report storage database, which can be restored later. @@ -164,8 +162,8 @@ If you have problems with starting installer.sh, you should try to use `dos2unix # Tasks to complete before new release -- [ ] Add web pages snapshoting (with screenshots) -- [ ] Add web pages snapshoting (with web pages copying as HTML objects) +- [x] Add web pages snapshoting (with screenshots) +- [x] Add web pages snapshoting (with web pages copying as HTML objects) - [ ] Add web pages snapshoting (with Wayback Machine) # DPULSE mentions in social medias diff --git a/apis/api_hudsonrock.py b/apis/api_hudsonrock.py new file mode 100644 index 0000000..4373ee1 --- /dev/null +++ b/apis/api_hudsonrock.py @@ -0,0 +1,150 @@ +import requests +from colorama import Fore, Style +import re + +def hudsonrock_html_prep(formatted_output): + formatted_output = re.sub(r'\x1b\[([0-9,A-Z]{1,2}(;[0-9]{1,2})?(;[0-9]{3})?)?[m|K]?', '', formatted_output) + start_marker = "=== HUDSONROCK API REPORT ===" + end_marker = "[+] Email Data:" + start_index = formatted_output.find(start_marker) + end_index = formatted_output.find(end_marker) + if start_index != -1 and end_index != -1: + formatted_output = formatted_output[:start_index] + formatted_output[end_index:] + return formatted_output + +def api_hudsonrock_get(email=None, username=None, domain=None, ip=None): + base_url = "https://cavalier.hudsonrock.com/api/json/v2/osint-tools/" + results = {} + + def make_request(url): + try: + response = requests.get(url) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + return {'error': str(e)} + + if email: + email_url = f"{base_url}search-by-email?email={email}" + results['email'] = make_request(email_url) + + if username: + username_url = f"{base_url}search-by-username?username={username}" + results['username'] = make_request(username_url) + + if domain: + domain_url = f"{base_url}search-by-domain?domain={domain}" + results['domain'] = make_request(domain_url) + + urls_by_domain_url = f"{base_url}urls-by-domain?domain={domain}" + results['urls_by_domain'] = make_request(urls_by_domain_url) + + if ip: + ip_url = f"{base_url}search-by-ip?ip={ip}" + results['ip'] = make_request(ip_url) + + return results + + +def api_hudsonrock_check(domain, ip, email, username): + results = api_hudsonrock_get(email, username, domain, ip) + formatted_output = Fore.LIGHTBLUE_EX + "\n=== HUDSONROCK API REPORT ===\n" + Style.RESET_ALL + formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Provided Data:{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Domain:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{domain}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{ip}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}E-mail:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{email}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Username:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{username}{Style.RESET_ALL}\n" + + def format_section(title, data): + nonlocal formatted_output + formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] {title}:{Style.RESET_ALL}\n" + if 'error' in data: + formatted_output += f"{Fore.RED}Error appeared when trying to get results for {title} requests. Probably given data is incorrect.{Style.RESET_ALL}\n" + return + + if title == 'Email Data': + formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n" + for i, stealer in enumerate(data.get('stealers', []), 1): + formatted_output += f"\n{Fore.GREEN}--- STEALER {i} ---{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Computer Name:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('computer_name', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}OS:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('operating_system', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Date Compromised:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('date_compromised', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Malware Path:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('malware_path', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('ip', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Top Passwords:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_passwords', []))}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Top Logins:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_logins', []))}{Style.RESET_ALL}\n" + + elif title == 'Username Data': + formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n" + for i, stealer in enumerate(data.get('stealers', []), 1): + formatted_output += f"\n{Fore.GREEN}--- STEALER {i} ---{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Stealer Family:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('stealer_family', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Computer Name:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('computer_name', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}OS:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('operating_system', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Date Compromised:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('date_compromised', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Malware Path:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('malware_path', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('ip', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Top Passwords:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_passwords', []))}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Top Logins:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_logins', []))}{Style.RESET_ALL}\n" + + elif title == 'Domain Data': + formatted_output += f"{Fore.GREEN}Total Entries:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('total', 0)}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Total Stealers:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('totalStealers', 0)}{Style.RESET_ALL}\n" + formatted_output += f"\n{Fore.GREEN}Sample Employee URLs:{Style.RESET_ALL}\n" + employee_urls = data.get('data', {}).get('employees_urls', []) + if employee_urls: + for url_data in employee_urls[:10]: + formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('type', 'N/A')}{Style.RESET_ALL}" + formatted_output += f" {Fore.GREEN}| URL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('url', 'N/A')}{Style.RESET_ALL}" + formatted_output += f" {Fore.GREEN}| Occurrence:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('occurrence', 'N/A')}{Style.RESET_ALL}\n" + else: + formatted_output += f"{Fore.RED}No employee URLs available.{Style.RESET_ALL}\n" + + elif title == 'Attack Surface Data': + formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n" + formatted_output += f"\n{Fore.GREEN}Sample Employee URLs:{Style.RESET_ALL}\n" + employees = data.get('data', {}).get('employees_urls', []) + if employees: + for url_data in employees[:10]: + formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('type', 'N/A')}{Style.RESET_ALL}" + formatted_output += f" {Fore.GREEN}| URL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('url', 'N/A')}{Style.RESET_ALL}" + formatted_output += f" {Fore.GREEN}| Occurrence:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('occurrence', 'N/A')}{Style.RESET_ALL}\n" + else: + formatted_output += f"{Fore.RED}No employee URLs available.{Style.RESET_ALL}\n" + formatted_output += f"\n{Fore.GREEN}Sample Client URLs:{Style.RESET_ALL}\n" + clients = data.get('data', {}).get('clients_urls', []) + if clients: + for url_data in clients[:10]: + formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('type', 'N/A')}{Style.RESET_ALL}" + formatted_output += f" {Fore.GREEN}| URL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('url', 'N/A')}{Style.RESET_ALL}" + formatted_output += f" {Fore.GREEN}| Occurrence:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('occurrence', 'N/A')}{Style.RESET_ALL}\n" + else: + formatted_output += f"{Fore.LIGHTCYAN_EX}No client URLs available.{Style.RESET_ALL}\n" + + elif title == 'IP Data': + formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n" + if data.get('stealers'): + for i, stealer in enumerate(data.get('stealers', []), 1): + formatted_output += f"\n{Fore.GREEN}--- STEALER {i} ---{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Computer Name:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('computer_name', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}OS:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('operating_system', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Date Compromised:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('date_compromised', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Malware Path:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('malware_path', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('ip', 'Not Found')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Top Passwords:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_passwords', []))}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Top Logins:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_logins', []))}{Style.RESET_ALL}\n" + formatted_output += "\n" + + if 'email' in results: + format_section('Email Data', results['email']) + if 'username' in results: + format_section('Username Data', results['username']) + if 'domain' in results: + format_section('Domain Data', results['domain']) + if 'urls_by_domain' in results: + format_section('Attack Surface Data', results['urls_by_domain']) + if 'ip' in results: + format_section('IP Data', results['ip']) + + print(formatted_output) + return formatted_output diff --git a/apis/api_keys.db b/apis/api_keys.db index 8534e92..aea7889 100644 Binary files a/apis/api_keys.db and b/apis/api_keys.db differ diff --git a/apis/api_keys_reference.db b/apis/api_keys_reference.db index 0ff949e..aea7889 100644 Binary files a/apis/api_keys_reference.db and b/apis/api_keys_reference.db differ diff --git a/apis/api_securitytrails.py b/apis/api_securitytrails.py index 3fc399f..06cbd84 100644 --- a/apis/api_securitytrails.py +++ b/apis/api_securitytrails.py @@ -1,73 +1,141 @@ import requests import sqlite3 +import re from colorama import Fore, Style -def api_securitytrails_check(domain): - conn = sqlite3.connect('apis//api_keys.db') - cursor = conn.cursor() - cursor.execute("SELECT api_name, api_key FROM api_keys") - rows = cursor.fetchall() - for row in rows: - api_name, api_key = row - if api_name == 'SecurityTrails': - api_key = str(row[1]) - print(Fore.GREEN + 'Got SecurityTrails API key. Starting SecurityTrails scan...\n') - - alive_subdomains = [] - txt_records = [] - a_records_list = [] - mx_records_list = [] - ns_records_list = [] - soa_records_list = [] +def securitytrails_html_prep(formatted_output): + formatted_output = re.sub(r'\x1b\[([0-9,A-Z]{1,2}(;[0-9]{1,2})?(;[0-9]{3})?)?[m|K]?', '', formatted_output) + start_marker = "=== SECURITYTRAILS API REPORT ===" + end_marker = "[+] Domain General Information:" + start_index = formatted_output.find(start_marker) + end_index = formatted_output.find(end_marker) + if start_index != -1 and end_index != -1: + formatted_output = formatted_output[:start_index] + formatted_output[end_index:] + return formatted_output + +def check_domain_securitytrails(domain, api_key): + api_key = api_key.strip() + api_key = re.sub(r'[\s\u200B\uFEFF]+', '', api_key) + subdomains_url = f"https://api.securitytrails.com/v1/domain/{domain}/subdomains?apikey={api_key}" - response = requests.get(subdomains_url) + general_url = f"https://api.securitytrails.com/v1/domain/{domain}?apikey={api_key}" - url = f"https://api.securitytrails.com/v1/domain/{domain}?apikey={api_key}" - general_response = requests.get(url) - general_data = general_response.json() + try: + general_response = requests.get(general_url) + general_data = general_response.json() + except Exception as e: + return Fore.RED + f"Error while parsing JSON: {e}" + Style.RESET_ALL - print(Fore.GREEN + "[DOMAIN GENERAL INFORMATION]\n") - print(Fore.GREEN + "Alexa Rank: " + Fore.LIGHTCYAN_EX + f"{general_data['alexa_rank']}") - print(Fore.GREEN + "Apex Domain: " + Fore.LIGHTCYAN_EX + f"{general_data['apex_domain']}") - print(Fore.GREEN + "Hostname: " + Fore.LIGHTCYAN_EX + f"{general_data['hostname']}" + Style.RESET_ALL) + formatted_output = Fore.LIGHTBLUE_EX + "=== SECURITYTRAILS API REPORT ===\n" + Style.RESET_ALL + formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Domain General Information:{Style.RESET_ALL}\n" + formatted_output += ( + f"{Fore.GREEN}Alexa Rank: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{general_data.get('alexa_rank')}{Style.RESET_ALL}\n" + f"{Fore.GREEN}Apex Domain: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{general_data.get('apex_domain')}{Style.RESET_ALL}\n" + f"{Fore.GREEN}Hostname: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{general_data.get('hostname')}{Style.RESET_ALL}\n" + ) - print(Fore.GREEN + "\n[DNS RECORDS]" + Style.RESET_ALL) - for record_type, record_data in general_data['current_dns'].items(): - print(Fore.GREEN + f"\n[+] {record_type.upper()} RECORDS:" + Style.RESET_ALL) + formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] DNS Records:{Style.RESET_ALL}\n" + current_dns = general_data.get('current_dns', {}) + for record_type, record_data in current_dns.items(): + formatted_output += f"\n{Fore.GREEN}[{record_type.upper()} RECORDS]:{Style.RESET_ALL}\n" for value in record_data.get('values', []): if record_type == 'a': - print(Fore.GREEN + "IP: " + Fore.LIGHTCYAN_EX + f"{value['ip']} " + Fore.GREEN + "| Organization: " + Fore.LIGHTCYAN_EX + f"{value['ip_organization']}") - a_records_list.append({'ip': value.get('ip', ''), 'organization': value.get('ip_organization', '')}) + ip = value.get('ip', '') + org = value.get('ip_organization', '') + formatted_output += ( + f"{Fore.GREEN}IP: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{ip}{Style.RESET_ALL} " + f"{Fore.GREEN}| Organization: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{org}{Style.RESET_ALL}\n" + ) elif record_type == 'mx': - print(Fore.GREEN + "Hostname: " + Fore.LIGHTCYAN_EX + f"{value['hostname']} " + Fore.GREEN + "| Priority: " + Fore.LIGHTCYAN_EX + f"{value['priority']} " + Fore.GREEN + "| Organization: " + Fore.LIGHTCYAN_EX + f"{value['hostname_organization']}") - mx_records_list.append({'mx_hostname': value.get('hostname', ''), 'mx_priority': value.get('priority', ''), 'mx_organization': value.get('hostname_organization', '')}) + hostname = value.get('hostname', '') + priority = value.get('priority', '') + org = value.get('hostname_organization', '') + formatted_output += ( + f"{Fore.GREEN}Hostname: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{hostname}{Style.RESET_ALL} " + f"{Fore.GREEN}| Priority: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{priority}{Style.RESET_ALL} " + f"{Fore.GREEN}| Organization: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{org}{Style.RESET_ALL}\n" + ) elif record_type == 'ns': - print(Fore.GREEN + "Nameserver: " + Fore.LIGHTCYAN_EX + f"{value['nameserver']} " + Fore.GREEN + "| Organization: " + Fore.LIGHTCYAN_EX + f"{value['nameserver_organization']}") - ns_records_list.append({'ns_nameserver': value.get('nameserver', ''), 'ns_organization': value.get('nameserver_organization', '')}) + nameserver = value.get('nameserver', '') + org = value.get('nameserver_organization', '') + formatted_output += ( + f"{Fore.GREEN}Nameserver: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{nameserver}{Style.RESET_ALL} " + f"{Fore.GREEN}| Organization: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{org}{Style.RESET_ALL}\n" + ) elif record_type == 'soa': - print(Fore.GREEN + "Email: " + Fore.LIGHTCYAN_EX + f"{value['email']} " + Fore.GREEN + "| TTL: " + Fore.LIGHTCYAN_EX + f"{value['ttl']}") - soa_records_list.append({'soa_email': value.get('email', ''), 'soa_ttl': value.get('ttl', '')}) + email = value.get('email', '') + ttl = value.get('ttl', '') + formatted_output += ( + f"{Fore.GREEN}Email: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{email}{Style.RESET_ALL} " + f"{Fore.GREEN}| TTL: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{ttl}{Style.RESET_ALL}\n" + ) elif record_type == 'txt': - print(Fore.GREEN + "Value: " + Fore.LIGHTCYAN_EX + f"{value['value']}") - txt_records.append(value['value']) - - if response.status_code == 200: - data = response.json() - print(Fore.GREEN + "\n[SUBDOMAINS DEEP ENUMERATION]\n") - print(Fore.GREEN + f"Found " + Fore.LIGHTCYAN_EX + f"{data['subdomain_count']} " + Fore.GREEN + "subdomains") - print(Fore.GREEN + "Subdomains list: ") - for i, subdomain in enumerate(data['subdomains'], start=1): - subdomain_url = f"http://{subdomain}.{domain}" - try: - response = requests.get(subdomain_url, timeout=5) - if response.status_code == 200: - print(Fore.GREEN + f"{i}. " + Fore.LIGHTCYAN_EX + f"{subdomain_url} " + Fore.GREEN + "is alive") - alive_subdomains.append(subdomain_url) - else: + txt_value = value.get('value', '') + formatted_output += ( + f"{Fore.GREEN}Value: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{txt_value}{Style.RESET_ALL}\n" + ) + + subdomains_response = requests.get(subdomains_url) + if subdomains_response.status_code == 200: + subdomains_data = subdomains_response.json() + sub_count = subdomains_data.get('subdomain_count', 0) + subdomains = subdomains_data.get('subdomains', []) + + formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Subdomains Deep Enumeration:{Style.RESET_ALL}\n" + formatted_output += ( + f"{Fore.GREEN}Found {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{sub_count}{Style.RESET_ALL}" + f"{Fore.GREEN} subdomains.{Style.RESET_ALL}\n" + ) + + if subdomains: + formatted_output += f"{Fore.GREEN}Subdomains list:{Style.RESET_ALL}\n" + alive_count = 0 + for i, subdomain in enumerate(subdomains, start=1): + subdomain_url = f"http://{subdomain}.{domain}" + try: + r = requests.get(subdomain_url, timeout=5) + if r.status_code == 200: + alive_count += 1 + formatted_output += ( + f"{Fore.GREEN}{i}. {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{subdomain_url}{Style.RESET_ALL}" + f"{Fore.GREEN} is alive{Style.RESET_ALL}\n" + ) + except Exception: pass - except Exception: - pass + + if alive_count == 0: + formatted_output += (f"{Fore.RED}No alive subdomains found (by HTTP 200 check).{Style.RESET_ALL}\n") + else: + formatted_output += f"{Fore.RED}No subdomains found in SecurityTrails data.{Style.RESET_ALL}\n" else: - pass + formatted_output += (f"{Fore.RED}Error while gathering subdomains: {subdomains_response.status_code}{Style.RESET_ALL}\n") + + formatted_output += Fore.LIGHTBLUE_EX + "\n=== END OF SECURITYTRAILS API REPORT ===\n" + Style.RESET_ALL + return formatted_output + + +def api_securitytrails_check(domain): + conn = sqlite3.connect('apis//api_keys.db') + cursor = conn.cursor() + cursor.execute("SELECT api_name, api_key FROM api_keys") + rows = cursor.fetchall() + + api_key = None + for row in rows: + api_name, key = row + if api_name == 'SecurityTrails': + api_key = str(key) + api_key = api_key.strip() + api_key = re.sub(r'[\s\u200B\uFEFF]+', '', api_key) + print(Fore.GREEN + 'Got SecurityTrails API key. Starting SecurityTrails scan...\n' + Style.RESET_ALL) + break + + if not api_key: + print(Fore.RED + "SecurityTrails API key not found." + Style.RESET_ALL) + conn.close() + return None - return general_data['alexa_rank'], general_data['apex_domain'], general_data['hostname'], alive_subdomains, txt_records, a_records_list, mx_records_list, ns_records_list, soa_records_list + formatted_output = check_domain_securitytrails(domain, api_key) + conn.close() + print(formatted_output) + return formatted_output diff --git a/apis/api_virustotal.py b/apis/api_virustotal.py index ea866dc..4d96235 100644 --- a/apis/api_virustotal.py +++ b/apis/api_virustotal.py @@ -1,51 +1,84 @@ +from datetime import datetime import requests import sqlite3 from colorama import Fore, Style +import re + +def virustotal_html_prep(formatted_output): + formatted_output = re.sub(r'\x1b\[([0-9,A-Z]{1,2}(;[0-9]{1,2})?(;[0-9]{3})?)?[m|K]?', '', formatted_output) + start_marker = "=== VIRUSTOTAL API REPORT ===" + end_marker = "[+] Domain Information:" + start_index = formatted_output.find(start_marker) + end_index = formatted_output.find(end_marker) + if start_index != -1 and end_index != -1: + formatted_output = formatted_output[:start_index] + formatted_output[end_index:] + return formatted_output def check_domain(domain, api_key): - url = "https://www.virustotal.com/vtapi/v2/domain/report" - params = { - 'domain': domain, - 'apikey': api_key - } + api_key = api_key.strip() + api_key = re.sub(r'[\s\u200B\uFEFF]+', '', api_key) - response = requests.get(url, params=params) + url = f"https://www.virustotal.com/api/v3/domains/{domain}" + headers = { + "x-apikey": api_key + } + response = requests.get(url, headers=headers) - if response.status_code == 200: - return response.json() - else: - print(Fore.RED + f"Error: {response.status_code}" + Style.RESET_ALL) + try: + result = response.json() + formatted_output = Fore.LIGHTBLUE_EX + "\n=== VIRUSTOTAL API REPORT ===\n" + Style.RESET_ALL + formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Domain Information:{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Domain:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{domain}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Creation Date:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{datetime.fromtimestamp(result['data']['attributes']['creation_date']).strftime('%Y-%m-%d')}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Last Update:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{datetime.fromtimestamp(result['data']['attributes']['last_update_date']).strftime('%Y-%m-%d')}{Style.RESET_ALL}\n" + formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] DNS Records:{Style.RESET_ALL}\n" + for record in result['data']['attributes']['last_dns_records']: + formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{record['type']:<6}{Style.RESET_ALL} " + formatted_output += f"{Fore.GREEN}TTL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{record['ttl']:<6}{Style.RESET_ALL} " + formatted_output += f"{Fore.GREEN}Value:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{record['value']}{Style.RESET_ALL}\n" + formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Categories:{Style.RESET_ALL}\n" + for vendor, category in result['data']['attributes']['categories'].items(): + formatted_output += f"{Fore.GREEN}{vendor:<25}:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{category}{Style.RESET_ALL}\n" + formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Analysis Stats:{Style.RESET_ALL}\n" + stats = result['data']['attributes']['last_analysis_stats'] + formatted_output += f"{Fore.GREEN}Harmless:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['harmless']}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Malicious:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['malicious']}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Suspicious:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['suspicious']}{Style.RESET_ALL}\n" + formatted_output += f"{Fore.GREEN}Undetected:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['undetected']}{Style.RESET_ALL}\n" + formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Detailed Analysis Results:{Style.RESET_ALL}\n" + results = result['data']['attributes']['last_analysis_results'] + categories = {'harmless': [], 'malicious': [], 'suspicious': [], 'undetected': []} + for engine, data in results.items(): + categories[data['category']].append(engine) + for category, engines in categories.items(): + if engines: + formatted_output += f"\n{Fore.GREEN}{category.title()} ({len(engines)}):{Style.RESET_ALL}\n" + for engine in sorted(engines): + formatted_output += f"{Fore.LIGHTCYAN_EX}- {engine}{Style.RESET_ALL}\n" + formatted_output += f"\n{Fore.LIGHTBLUE_EX}=== END OF VIRUSTOTAL API REPORT ==={Style.RESET_ALL}\n" + print(formatted_output) + return formatted_output + except Exception as e: + formatted_output = Fore.RED + f"Error while parsing JSON: {e}" + Style.RESET_ALL + print(formatted_output) return None - def api_virustotal_check(domain): conn = sqlite3.connect('apis//api_keys.db') cursor = conn.cursor() cursor.execute("SELECT api_name, api_key FROM api_keys") rows = cursor.fetchall() + api_key = None for row in rows: - api_name, api_key = row + api_name, key = row if api_name == 'VirusTotal': - api_key = str(row[1]) - print(Fore.GREEN + 'Got VirusTotal API key. Starting VirusTotal scan...\n') - - result = check_domain(domain, api_key) - - if result: - print(Fore.GREEN + "[VIRUSTOTAL DOMAIN REPORT]") - print(Fore.GREEN + f"Domain: {result.get('domain')}") - print(Fore.GREEN + f"Categories: {result.get('categories')}") - print(Fore.GREEN + f"Detected URLs: {len(result.get('detected_urls', []))}") - print(Fore.GREEN + f"Detected Samples: {len(result.get('detected_samples', []))}") - print(Fore.GREEN + f"Undetected Samples: {len(result.get('undetected_samples', []))}\n") - print(Fore.LIGHTGREEN_EX + "-------------------------------------------------\n" + Style.RESET_ALL) - conn.close() - return result.get('categories'), len(result.get('detected_urls', [])), len(result.get('detected_samples', [])), len(result.get('undetected_samples', [])) - else: - print(Fore.RED + "Failed to get domain report\n") - print(Fore.LIGHTGREEN_EX + "-------------------------------------------------\n" + Style.RESET_ALL) + api_key = key + print(Fore.GREEN + 'Got VirusTotal API key. Starting VirusTotal scan...') + break + if not api_key: + print(Fore.RED + "VirusTutal API key was not found.") conn.close() - return 'Got no information from VirusTotal API', 'Got no information from VirusTotal API', 'Got no information from VirusTotal API', 'Got no information from VirusTotal API' - pass - + return None + formatted_output = check_domain(domain, api_key) + return formatted_output diff --git a/datagather_modules/data_assembler.py b/datagather_modules/data_assembler.py index 8bb7c14..384dc4c 100644 --- a/datagather_modules/data_assembler.py +++ b/datagather_modules/data_assembler.py @@ -2,15 +2,20 @@ sys.path.append('service') sys.path.append('pagesearch') sys.path.append('dorking') +sys.path.append('snapshotting') import crawl_processor as cp import dorking_handler as dp import networking_processor as np -from pagesearch_main import normal_search, sitemap_inspection_search +from pagesearch_main import normal_search from logs_processing import logging from api_virustotal import api_virustotal_check from api_securitytrails import api_securitytrails_check +from api_hudsonrock import api_hudsonrock_check, api_hudsonrock_get from db_creator import get_dorking_query +from screen_snapshotting import take_screenshot +from config_processing import read_config +from html_snapshotting import save_page_as_html try: import requests @@ -20,6 +25,7 @@ from colorama import Fore, Style import webbrowser import sqlite3 + import configparser except ImportError as e: print(Fore.RED + "Import error appeared. Reason: {}".format(e) + Style.RESET_ALL) sys.exit() @@ -68,7 +74,7 @@ def report_preprocessing(self, short_domain, report_file_type): os.makedirs(report_folder, exist_ok=True) return casename, db_casename, db_creation_date, robots_filepath, sitemap_filepath, sitemap_links_filepath, report_file_type, report_folder, files_ctime, report_ctime - def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, keywords, keywords_flag, dorking_flag, used_api_flag): + def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, keywords, keywords_flag, dorking_flag, used_api_flag, snapshotting_flag, username): casename, db_casename, db_creation_date, robots_filepath, sitemap_filepath, sitemap_links_filepath, report_file_type, report_folder, ctime, report_ctime = self.report_preprocessing(short_domain, report_file_type) logging.info(f'### THIS LOG PART FOR {casename} CASE, TIME: {ctime} STARTS HERE') print(Fore.GREEN + "Started scanning domain" + Style.RESET_ALL) @@ -130,11 +136,11 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = total_links_counter = accessed_links_counter = emails_amount = 'No results because no subdomains were found' ps_emails_return = "" pass - elif pagesearch_flag.lower() == 'si': - print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN START: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL) - ps_emails_return, total_links_counter, accessed_links_counter, emails_amount = sitemap_inspection_search(report_folder) - accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = "No results because Sitemap Inspection mode does not gather these categories" - print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN END: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL) + #elif pagesearch_flag.lower() == 'si': + #print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN START: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL) + #ps_emails_return, total_links_counter, accessed_links_counter, emails_amount = sitemap_inspection_search(report_folder) + #accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = "No results because Sitemap Inspection mode does not gather these categories" + #print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN END: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL) elif pagesearch_flag.lower() == 'n': ps_emails_return = "" accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = total_links_counter = accessed_links_counter = emails_amount = "No results because user did not selected PageSearch for this scan" @@ -153,22 +159,43 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k if used_api_flag != ['Empty']: print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN START: API SCANNING]\n" + Style.RESET_ALL) if '1' in used_api_flag: - vt_cats, vt_deturls, vt_detsamples, vt_undetsamples = api_virustotal_check(short_domain) + virustotal_output = api_virustotal_check(short_domain) api_scan_db.append('VirusTotal') if '2' in used_api_flag: - st_alexa, st_apex, st_hostname, st_alivesds, st_txt, a_records_list, mx_records_list, ns_records_list, soa_records_list = api_securitytrails_check(short_domain) + securitytrails_output = api_securitytrails_check(short_domain) api_scan_db.append('SecurityTrails') + if '3' in used_api_flag: + if username.lower() == 'n': + username = None + hudsonrock_output = api_hudsonrock_check(short_domain, ip, mails, username) + api_scan_db.append('HudsonRock') + else: + hudsonrock_output = api_hudsonrock_check(short_domain, ip, mails, username) + api_scan_db.append('HudsonRock') if '1' not in used_api_flag: - vt_cats = vt_deturls = vt_detsamples = vt_undetsamples = 'No results because user did not selected VirusTotal API scan' + virustotal_output = 'No results because user did not selected VirusTotal API scan' if '2' not in used_api_flag: - st_alexa = st_apex = st_hostname = st_alivesds = st_txt = a_records_list = mx_records_list = ns_records_list = soa_records_list = 'No results because user did not selected SecurityTrails API scan' + securitytrails_output = 'No results because user did not selected SecurityTrails API scan' + if '3' not in used_api_flag: + hudsonrock_output = 'No results because user did not selected HudsonRock API scan' print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN END: API SCANNING]\n" + Style.RESET_ALL) else: - vt_cats = vt_deturls = vt_detsamples = vt_undetsamples = 'No results because user did not selected VirusTotal API scan' - st_alexa = st_apex = st_hostname = st_alivesds = st_txt = a_records_list = mx_records_list = ns_records_list = soa_records_list = 'No results because user did not selected SecurityTrails API scan' + virustotal_output = 'No results because user did not selected VirusTotal API scan' + securitytrails_output = 'No results because user did not selected SecurityTrails API scan' + hudsonrock_output = 'No results because user did not selected HudsonRock API scan' api_scan_db.append('No') pass - + if snapshotting_flag.lower() in ['s', 'p', 'w']: + config_values = read_config() + installed_browser = config_values['installed_browser'] + print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN START: PAGE SNAPSHOTTING]\n" + Style.RESET_ALL) + if snapshotting_flag.lower() == 's': + take_screenshot(installed_browser, url, report_folder + '//screensnapshot.png') + elif snapshotting_flag.lower() == 'p': + save_page_as_html(url, report_folder + '//domain_html_copy.html') + print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN END: PAGE SNAPSHOTTING]\n" + Style.RESET_ALL) + else: + pass cleaned_dorking = [item.strip() for item in dorking_results if item.strip()] @@ -179,7 +206,7 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k hostnames, cpes, tags, vulns, common_socials, total_socials, ps_emails_return, accessible_subdomains, emails_amount, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter, total_links_counter, accessed_links_counter, cleaned_dorking, - vt_cats, vt_deturls, vt_detsamples, vt_undetsamples, st_alexa, st_apex, st_hostname, st_alivesds, st_txt, a_records_list, mx_records_list, ns_records_list, soa_records_list] + virustotal_output, securitytrails_output, hudsonrock_output] elif report_file_type == 'html': if pagesearch_flag.lower() == 'y': @@ -197,11 +224,11 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = total_links_counter = accessed_links_counter = emails_amount = 'No results because no subdomains were found' keywords_messages_list = ['No data was gathered because no subdomains were found'] pass - elif pagesearch_flag.lower() == 'si': - print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN START: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL) - ps_emails_return, total_links_counter, accessed_links_counter, emails_amount = sitemap_inspection_search(report_folder) - accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = keywords_messages_list = "No results because Sitemap Inspection mode does not gather these categories" - print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN END: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL) + #elif pagesearch_flag.lower() == 'si': + #print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN START: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL) + #ps_emails_return, total_links_counter, accessed_links_counter, emails_amount = sitemap_inspection_search(report_folder) + #accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = keywords_messages_list = "No results because Sitemap Inspection mode does not gather these categories" + #print(Fore.LIGHTMAGENTA_EX + "\n[EXTENDED SCAN END: PAGESEARCH SITEMAP INSPECTION]\n" + Style.RESET_ALL) elif pagesearch_flag.lower() == 'n': accessible_subdomains = files_counter = cookies_counter = api_keys_counter = website_elements_counter = exposed_passwords_counter = total_links_counter = accessed_links_counter = emails_amount = keywords_messages_list = "No results because user did not selected PageSearch for this scan" ps_emails_return = "" @@ -220,22 +247,43 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k if used_api_flag != ['Empty']: print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN START: API SCANNING]\n" + Style.RESET_ALL) if '1' in used_api_flag: - vt_cats, vt_deturls, vt_detsamples, vt_undetsamples = api_virustotal_check(short_domain) + virustotal_output = api_virustotal_check(short_domain) api_scan_db.append('VirusTotal') if '2' in used_api_flag: - st_alexa, st_apex, st_hostname, st_alivesds, st_txt, a_records_list, mx_records_list, ns_records_list, soa_records_list = api_securitytrails_check(short_domain) + securitytrails_output = api_securitytrails_check(short_domain) api_scan_db.append('SecurityTrails') + if '3' in used_api_flag: + if username.lower() == 'n': + username = None + hudsonrock_output = api_hudsonrock_check(short_domain, ip, mails, username) + api_scan_db.append('HudsonRock') + else: + hudsonrock_output = api_hudsonrock_check(short_domain, ip, mails, username) + api_scan_db.append('HudsonRock') if '1' not in used_api_flag: - vt_cats = vt_deturls = vt_detsamples = vt_undetsamples = 'No results because user did not selected VirusTotal API scan' + virustotal_output = 'No results because user did not selected VirusTotal API scan' if '2' not in used_api_flag: - st_alexa = st_apex = st_hostname = st_alivesds = st_txt = a_records_list = mx_records_list = ns_records_list = soa_records_list = 'No results because user did not selected SecurityTrails API scan' + securitytrails_output = 'No results because user did not selected SecurityTrails API scan' + if '3' not in used_api_flag: + hudsonrock_output = 'No results because user did not selected HudsonRock API scan' print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN END: API SCANNING]\n" + Style.RESET_ALL) else: - vt_cats = vt_deturls = vt_detsamples = vt_undetsamples = 'No results because user did not selected VirusTotal API scan' - st_alexa = st_apex = st_hostname = st_alivesds = st_txt = a_records_list = mx_records_list = ns_records_list = soa_records_list = 'No results because user did not selected SecurityTrails API scan' + virustotal_output = 'No results because user did not selected VirusTotal API scan' + securitytrails_output = 'No results because user did not selected SecurityTrails API scan' + hudsonrock_output = 'No results because user did not selected HudsonRock API scan' api_scan_db.append('No') pass - + if snapshotting_flag.lower() in ['s', 'p', 'w']: + config_values = read_config() + installed_browser = config_values['installed_browser'] + print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN START: PAGE SNAPSHOTTING]\n" + Style.RESET_ALL) + if snapshotting_flag.lower() == 's': + take_screenshot(installed_browser, url, report_folder + '//screensnapshot.png') + elif snapshotting_flag.lower() == 'p': + save_page_as_html(url, report_folder + '//domain_html_copy.html') + print(Fore.LIGHTMAGENTA_EX + f"\n[EXTENDED SCAN END: PAGE SNAPSHOTTING]\n" + Style.RESET_ALL) + else: + pass data_array = [ip, res, mails, subdomains, subdomains_amount, social_medias, subdomain_mails, sd_socials, subdomain_ip, issuer, subject, notBefore, notAfter, commonName, serialNumber, mx_records, @@ -244,7 +292,7 @@ def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, k hostnames, cpes, tags, vulns, common_socials, total_socials, ps_emails_return, accessible_subdomains, emails_amount, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter, total_links_counter, accessed_links_counter, keywords_messages_list, dorking_status, dorking_file_path, - vt_cats, vt_deturls, vt_detsamples, vt_undetsamples, st_alexa, st_apex, st_hostname, st_alivesds, st_txt, a_records_list, mx_records_list, ns_records_list, soa_records_list] + virustotal_output, securitytrails_output, hudsonrock_output] report_info_array = [casename, db_casename, db_creation_date, report_folder, ctime, report_file_type, report_ctime, api_scan_db, used_api_flag] logging.info(f'### THIS LOG PART FOR {casename} CASE, TIME: {ctime} ENDS HERE') diff --git a/docs/dpulse-docs/docs/api.md b/docs/dpulse-docs/docs/api.md index 9ce09bf..e86eee0 100644 --- a/docs/dpulse-docs/docs/api.md +++ b/docs/dpulse-docs/docs/api.md @@ -2,10 +2,11 @@ Currently DPULSE supports two third-party APIs: -* SecurityTrails API (securitytrails.com) for deep subdomains and DNS enumeration -* VirusTotal API (virustotal.com) for brief domain information gathering +* SecurityTrails API (securitytrails.com) for deep subdomains and DNS enumeration (this API requires key) +* VirusTotal API (virustotal.com) for brief domain information gathering (this API requires key) +* HudsonRock API (hudsonrock.com) for querying domain through a database of over 30,821,440 computers which were compromised through global info-stealer campaigns performed by threat actors (this API does not require key) -## SecurityTrails API +## SecurityTrails API (key required) SecurityTrails API is used to gather information about a specified domain. It retrieves various types of DNS records, subdomains, and other details. SecurityTrails API in DPULSE returns these details about target domain: @@ -16,7 +17,7 @@ SecurityTrails API is used to gather information about a specified domain. It re * All subdomains list * Alive (pingable) subdomains list -## VirusTotal API +## VirusTotal API (key required) VirusTotal API is used to interact with the VirusTotal service programmatically and analyze files and URLs using multiple antivirus engines and website scanners, providing insights into whether they are malicious. VirusTotal API in DPULSE returns these details about target domain: @@ -25,6 +26,10 @@ VirusTotal API is used to interact with the VirusTotal service programmatically * Undetected samples * Detected URLs +## HudsonRock API (no key required) + +HudsonRock Cavalier API is based on forensic technologies and operational knowhow developed at the IDF’s 8200 Unit to counter nation-state adversaries and professional threat-actors. It is a unique cybercrime intelligence data source composed of millions of machines compromised in global malware spreading campaigns. + ## API Keys database In order to ensure the functioning of API services individually for each DPULSE user, API keys storage database was created. Similar to report storage database, it is lightweight .db extension database with simple structure shown below: diff --git a/docs/dpulse-docs/docs/config.md b/docs/dpulse-docs/docs/config.md new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/docs/dpulse-docs/docs/config.md @@ -0,0 +1 @@ + diff --git a/docs/dpulse-docs/docs/index.md b/docs/dpulse-docs/docs/index.md index 7fa7b9e..de7238b 100644 --- a/docs/dpulse-docs/docs/index.md +++ b/docs/dpulse-docs/docs/index.md @@ -8,12 +8,10 @@ DPULSE is a software solution for conducting OSINT research in relation to a cer 1. ***Basic scan:*** extracts general information about domain such as WHOIS information, subdomains, e-mail addresses, IP addresses, social medias links/posts/profiles, SSL certificate info, possible vulnerabilities, open ports, CPEs, used web-technologies and so on. It also can download sitemap.xml and robots.txt files from a domain -2. ***PageSearch standard scan:*** extended subdomains deep search function, which starts in addition to basic scan and which can find more e-mail addresses, API keys, exposed passwords, cookies, hidden forms of data and other web page elements, documents, config files, databases files (and PageSearch can download them!), specified words by user in PDF files +2. ***PageSearch scan:*** extended subdomains deep search function, which starts in addition to basic scan and which can find more e-mail addresses, API keys, exposed passwords, cookies, hidden forms of data and other web page elements, documents, config files, databases files (and PageSearch can download them!), specified words by user in PDF files -3. ***PageSearch Sitemap inspection scan:*** sitemap links crawler which starts in addition to basic scan and which can find even more e-mails +3. ***Dorking scan:*** extended domain research function with prepared Google Dorking databases for different purposes, such as IoT dorking, files dorking, admin panels dorking, web elements dorking. Moreover, this mode allows you to create your own custom Google Dorking database -4. ***Dorking scan:*** extended domain research function with prepared Google Dorking databases for different purposes, such as IoT dorking, files dorking, admin panels dorking, web elements dorking. Moreover, this mode allows you to create your own custom Google Dorking database - -5. ***API scan:*** extended domain research function with prepared functions for 3rd party APIs usage. Currently DPULSE supports VirusTotal API (for brief domain information gathering) and SecurityTrails API (deep subdomains and DNS enumeration) +4. ***API scan:*** extended domain research function with prepared functions for 3rd party APIs usage. Currently DPULSE supports VirusTotal API (for brief domain information gathering) and SecurityTrails API (deep subdomains and DNS enumeration) Finally, DPULSE compiles all found data into an easy-to-read HTML or XLSX report by category. It also saves all information about scan in local report storage database, which can be restored later. diff --git a/docs/dpulse-docs/docs/logging.md b/docs/dpulse-docs/docs/logging.md new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/docs/dpulse-docs/docs/logging.md @@ -0,0 +1 @@ + diff --git a/docs/dpulse-docs/docs/pagesearch_sitemap.md b/docs/dpulse-docs/docs/pagesearch_sitemap.md deleted file mode 100644 index 3f29b9f..0000000 --- a/docs/dpulse-docs/docs/pagesearch_sitemap.md +++ /dev/null @@ -1,9 +0,0 @@ -# PageSearch Sitemap Inspection mode - -PageSearch Sitemap Inspection is an additional extended sitemap links inspection function, which basically crawls into them and extract all e-mails from them. User can choose whether to use PageSearch Sitemap Inspection or not during pre-scan preparation steps. - -## PageSearch Sitemap Inspection results - -Basically PageSearch Sitemap Inspection mode returns lots of e-mails from all links, extracted from sitemap.xml - - diff --git a/docs/dpulse-docs/mkdocs.yml b/docs/dpulse-docs/mkdocs.yml index c9618f9..10fcb67 100644 --- a/docs/dpulse-docs/mkdocs.yml +++ b/docs/dpulse-docs/mkdocs.yml @@ -6,10 +6,11 @@ nav: - Getting started: getting_started.md - Basic scan: basic_scan.md - PageSearch: pagesearch.md - - PageSearch Sitemap Inspection: pagesearch_sitemap.md - Built-in automatic Dorking: dorking.md - Built-in API scanning: api.md - Reporting and report types: reporting.md + - Configuration file: config.md + - Logging: logging.md - Demo and use-cases: demo.md - Contact developer: contact_dev.md diff --git a/dpulse.py b/dpulse.py index 34a1127..4856f9e 100644 --- a/dpulse.py +++ b/dpulse.py @@ -7,6 +7,7 @@ sys.path.append('reporting_modules') sys.path.append('dorking') sys.path.append('apis') +sys.path.append('snapshotting') from config_processing import create_config, check_cfg_presence, read_config, print_and_return_config @@ -52,7 +53,7 @@ cli = cli_init.Menu() cli.welcome_menu() -def process_report(report_filetype, short_domain, url, case_comment, keywords_list, keywords_flag, dorking_flag, used_api_flag, pagesearch_flag, pagesearch_ui_mark, spinner_thread): +def process_report(report_filetype, short_domain, url, case_comment, keywords_list, keywords_flag, dorking_flag, used_api_flag, pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username): import xlsx_report_creation as xlsx_rc import html_report_creation as html_rc from misc import time_processing @@ -60,16 +61,16 @@ def process_report(report_filetype, short_domain, url, case_comment, keywords_li try: start = time() if pagesearch_flag in ['y', 'si']: - data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), keywords_list, keywords_flag, dorking_flag.lower(), used_api_flag) + data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), keywords_list, keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username) else: - data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), '', keywords_flag, dorking_flag.lower(), used_api_flag) + data_array, report_info_array = data_processing.data_gathering(short_domain, url, report_filetype.lower(), pagesearch_flag.lower(), '', keywords_flag, dorking_flag.lower(), used_api_flag, snapshotting_flag, username) end = time() - start endtime_string = time_processing(end) if report_filetype == 'xlsx': - xlsx_rc.create_report(short_domain, url, case_comment, data_array, report_info_array, pagesearch_ui_mark, pagesearch_flag, endtime_string) + xlsx_rc.create_report(short_domain, url, case_comment, data_array, report_info_array, pagesearch_ui_mark, pagesearch_flag, endtime_string, snapshotting_ui_mark) elif report_filetype == 'html': - html_rc.report_assembling(short_domain, url, case_comment, data_array, report_info_array, pagesearch_ui_mark, pagesearch_flag, endtime_string) + html_rc.report_assembling(short_domain, url, case_comment, data_array, report_info_array, pagesearch_ui_mark, pagesearch_flag, endtime_string, snapshotting_ui_mark) finally: spinner_thread.do_run = False spinner_thread.join() @@ -114,14 +115,14 @@ def run(): print(Fore.RED + "Entered domain is not accessible. Scan is impossible" + Style.RESET_ALL) break case_comment = input(Fore.YELLOW + "Enter case comment >> ") - report_filetype = input(Fore.YELLOW + "Enter report file extension [XLSX/HTML] >> ") + report_filetype = input(Fore.YELLOW + "Enter report file extension [HTML] >> ") if not report_filetype: print(Fore.RED + "\nReport filetype cannot be empty") - if report_filetype.lower() not in ['xlsx', 'html']: - print(Fore.RED + '\nYou need to choose between XLSX or HTML report file types') + if report_filetype.lower() not in ['html']: # temporarily disabled since v1.2.1 (['xlsx', 'html']) + print(Fore.RED + '\nTemporarily you have to choose only HTML report file type') else: - print(Fore.GREEN + "[!] SI mode suppose you to have sitemap_links.txt file in report folder [!]\n[!] It'll visit every link from this file [!]") - pagesearch_flag = input(Fore.YELLOW + "Would you like to use PageSearch function? [Y/SI/N (for No)] >> ") + #print(Fore.GREEN + "[!] SI mode suppose you to have sitemap_links.txt file in report folder [!]\n[!] It'll visit every link from this file [!]") + pagesearch_flag = input(Fore.YELLOW + "Would you like to use PageSearch function? [Y/N (for No)] >> ") if pagesearch_flag.lower() == 'y': keywords_input = input(Fore.YELLOW + "Enter keywords (separate by comma) to search in files during PageSearch process (or write N if you don't need it) >> ") if keywords_input.lower() != "n": @@ -137,10 +138,10 @@ def run(): elif pagesearch_flag.lower() == 'n': keywords_list = None keywords_flag = 0 - elif pagesearch_flag.lower() == 'si': - keywords_list = None - keywords_flag = 0 - if report_filetype.lower() == 'xlsx' or report_filetype.lower() == 'html': + #elif pagesearch_flag.lower() == 'si': + #keywords_list = None + #keywords_flag = 0 + if report_filetype.lower() == 'html': #report_filetype.lower() == 'xlsx' or (temporarily disabled xlsx reporting) dorking_flag = input(Fore.YELLOW + "Select Dorking mode [Basic/IoT/Files/Admins/Web/Custom/N (for None)] >> ") api_flag = input(Fore.YELLOW + "Would you like to use 3rd party API in scan? [Y/N (for No)] >> ") if api_flag.lower() == 'y': @@ -149,6 +150,10 @@ def run(): print(Fore.GREEN + "Pay attention that APIs with red-colored API Key field are unable to use!\n") to_use_api_flag = input(Fore.YELLOW + "Select APIs IDs you want to use in scan (separated by comma) >> ") used_api_flag = [item.strip() for item in to_use_api_flag.split(',')] + if '3' in used_api_flag: + username = input(Fore.YELLOW + "If you know some username from this domain, please enter it here (or N if not) >> " + Style.RESET_ALL) + else: + username = None if db.check_api_keys(used_api_flag): print(Fore.GREEN + 'Found API key. Continuation') else: @@ -158,17 +163,19 @@ def run(): elif api_flag.lower() == 'n': used_api_ui = 'No' used_api_flag = ['Empty'] + username = None pass else: print(Fore.RED + "\nInvalid API usage mode" + Style.RESET_ALL) break - if pagesearch_flag.lower() == 'y' or pagesearch_flag.lower() == 'n' or pagesearch_flag.lower() == 'si': + snapshotting_flag = input(Fore.YELLOW + "Select Snapshotting mode [S(creenshot)/P(age Copy)/N (for None)] >> ") + if pagesearch_flag.lower() == 'y' or pagesearch_flag.lower() == 'n':# or pagesearch_flag.lower() == 'si': if pagesearch_flag.lower() == "n": pagesearch_ui_mark = 'No' elif pagesearch_flag.lower() == 'y' and keywords_flag == 1: pagesearch_ui_mark = f'Yes, with {keywords_list} keywords search' - elif pagesearch_flag.lower() == 'si': - pagesearch_ui_mark = 'Yes, in Sitemap Inspection mode' + #elif pagesearch_flag.lower() == 'si': + #pagesearch_ui_mark = 'Yes, in Sitemap Inspection mode' else: pagesearch_ui_mark = 'Yes, without keywords search' if dorking_flag.lower() not in ['basic', 'iot', 'n', 'admins', 'files', 'web', 'custom']: @@ -190,16 +197,27 @@ def run(): row_count = get_columns_amount(f'dorking//{custom_db_name}.db', 'dorks') dorking_ui_mark = f'Yes, Custom table dorking ({row_count} dorks)' dorking_flag = str(dorking_flag.lower() + f"+{custom_db_name}.db") - cli_init.print_prescan_summary(short_domain, report_filetype.upper(), pagesearch_ui_mark, dorking_ui_mark, used_api_ui, case_comment) + if snapshotting_flag.lower() not in ['s', 'p', 'w', 'n']: + print(Fore.RED + "\nInvalid Snapshotting mode. Please select mode among S/P/W or N") + break + else: + snapshotting_ui_mark = 'No' + if snapshotting_flag.lower() == 's': + snapshotting_ui_mark = "Yes, domain's main page snapshotting as a screenshot" + elif snapshotting_flag.lower() == 'p': + snapshotting_ui_mark = "Yes, domain's main page snapshotting as a .HTML file" + elif snapshotting_flag.lower() == 'w': # not supported at the moment + snapshotting_ui_mark = "Yes, domain's main page snapshotting using Wayback Machine" + cli_init.print_prescan_summary(short_domain, report_filetype.upper(), pagesearch_ui_mark, dorking_ui_mark, used_api_ui, case_comment, snapshotting_ui_mark) print(Fore.LIGHTMAGENTA_EX + "[BASIC SCAN START]\n" + Style.RESET_ALL) spinner_thread = ProgressBar() spinner_thread.start() - if report_filetype.lower() in ['xlsx', 'html']: + if report_filetype.lower() in ['html']: # ['xlsx'] temporarily disabled process_report(report_filetype, short_domain, url, case_comment, keywords_list, keywords_flag, dorking_flag, used_api_flag, - pagesearch_flag, pagesearch_ui_mark, spinner_thread) + pagesearch_flag, pagesearch_ui_mark, spinner_thread, snapshotting_flag, snapshotting_ui_mark, username) else: - print(Fore.RED + "\nUnsupported PageSearch mode. Please choose between Y, N or SI") + print(Fore.RED + "\nUnsupported PageSearch mode. Please choose between Y or N") elif choice == "2": import configparser diff --git a/pagesearch/pagesearch_deepsearch.py b/pagesearch/pagesearch_deepsearch.py deleted file mode 100644 index 37880a2..0000000 --- a/pagesearch/pagesearch_deepsearch.py +++ /dev/null @@ -1,52 +0,0 @@ -import requests -from colorama import Fore, Style -from bs4 import BeautifulSoup -import re -import os - -def sitemap_inspection(report_folder): - if os.path.exists(report_folder + '//03-sitemap_links.txt'): - try: - accessed_links_counter = 0 - print(Fore.GREEN + "Trying to access sitemap_links.txt file" + Style.RESET_ALL) - with open(report_folder + '//03-sitemap_links.txt', "r") as file: - links = file.readlines() - print(Fore.GREEN + "Reading file and forming links list" + Style.RESET_ALL) - ps_docs_path = report_folder + '//sitemap_inspection' - if not os.path.exists(ps_docs_path): - os.makedirs(ps_docs_path) - total_emails = [] - email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' - links = [link.strip() for link in links] - total_links_counter = len(links) - for url in links: - response = requests.get(url) - if response.status_code == 200: - accessed_links_counter += 1 - soup = BeautifulSoup(response.content, 'html.parser') - emails = re.findall(email_pattern, soup.text) - total_emails.append(emails) - - print(Fore.GREEN + f" Inspecting links: " + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"Link #{accessed_links_counter}" + Style.RESET_ALL, end="\r") - - ds_emails_list = [x for x in total_emails if x] - ds_emails_cleaned = [', '.join(sublist) for sublist in ds_emails_list] - ds_emails_return = list(set(ds_emails_cleaned)) - print(Fore.GREEN + "PageSearch Sitemap Inspection successfully ended\n") - print(Fore.LIGHTGREEN_EX + "-------------------------------------------------") - print(Fore.GREEN + f"\nDuring PageSearch Sitemap Inspection process:\n[+] Total {total_links_counter} links were checked") - print(Fore.GREEN + f"[+] Among them, {accessed_links_counter} links were accessible") - print(Fore.GREEN + f"[+] In result, {len(ds_emails_return)} unique e-mail addresses were found") - with open(ps_docs_path + "//inspection_logs.txt", "w") as si_logs: - si_logs.write('# THIS IS PAGESEARCH SITEMAP INSPECTION LOGS' + '\n') - si_logs.write('# HERE YOU CAN FIND INFO THAT WAS DISCOVERED DURING SITEMAP INSPECTION PROCESS' + '\n' * 3) - si_logs.write(f'[+] LINKS: Received {total_links_counter} links. Amount of accessible links: {accessed_links_counter}' + '\n') - si_logs.write(f'[+] EMAILS: Returned and stored in PDF/XLSX report. Total {len(ds_emails_return)} unique emails found' + '\n') - return ds_emails_return, total_links_counter, accessed_links_counter, len(ds_emails_return) - except FileNotFoundError: - print(Fore.RED + f"Cannot start PageSearch in Sitemap Inspection mode because sitemap_links.txt file doesn't exist" + Style.RESET_ALL) - return 'PageSearch Sitemap Inspection was not started because sitemap_links.txt file was not gathered', 'PageSearch Sitemap Inspection was not started because sitemap_links.txt file was not gathered', 'PageSearch Sitemap Inspection was not started because sitemap_links.txt file was not gathered', 'PageSearch Sitemap Inspection was not started because sitemap_links.txt file was not gathered' - else: - print(Fore.RED + f"Cannot start PageSearch in Sitemap Inspection mode because sitemap_links.txt file doesn't exist" + Style.RESET_ALL) - return 'PageSearch Sitemap Inspection was not started because sitemap_links.txt file was not gathered', 'PageSearch Sitemap Inspection was not started because sitemap_links.txt file was not gathered', 'PageSearch Sitemap Inspection was not started because sitemap_links.txt file was not gathered', 'PageSearch Sitemap Inspection was not started because sitemap_links.txt file was not gathered' - diff --git a/pagesearch/pagesearch_main.py b/pagesearch/pagesearch_main.py index cf0f00a..7cd92cc 100644 --- a/pagesearch/pagesearch_main.py +++ b/pagesearch/pagesearch_main.py @@ -1,15 +1,7 @@ from colorama import Fore, Style from pagesearch_parsers import subdomains_parser -from pagesearch_deepsearch import sitemap_inspection def normal_search(to_search_array, report_folder, keywords, keywords_flag): print(Fore.GREEN + "Conducting PageSearch. Please, be patient, it may take a long time\n" + Style.RESET_ALL) ps_emails_return, accessible_subdomains, emails_amount, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter, keywords_messages_list = subdomains_parser(to_search_array[0], report_folder, keywords, keywords_flag) return ps_emails_return, accessible_subdomains, emails_amount, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter, keywords_messages_list - -def sitemap_inspection_search(report_folder): - print(Fore.GREEN + "Conducting PageSearch in Sitemap Inspection mode. Please, be patient, it will take a long time\n" + Style.RESET_ALL) - ds_emails_return, total_links_counter, accessed_links_counter, emails_amount = sitemap_inspection(report_folder) - return ds_emails_return, total_links_counter, accessed_links_counter, emails_amount - - diff --git a/poetry.lock b/poetry.lock index 4b233f6..fac3ab5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -33,13 +33,13 @@ six = "*" [[package]] name = "certifi" -version = "2024.12.14" +version = "2025.1.31" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"}, - {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"}, + {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, + {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, ] [[package]] @@ -516,13 +516,13 @@ files = [ [[package]] name = "pygments" -version = "2.18.0" +version = "2.19.1" description = "Pygments is a syntax highlighting package written in Python." optional = false python-versions = ">=3.8" files = [ - {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, - {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, + {file = "pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c"}, + {file = "pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f"}, ] [package.extras] diff --git a/pyproject.toml b/pyproject.toml index 273a66f..0e4c847 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "dpulse" -version = "1.2" +version = "1.2.1" description = "Convenient,fast and user-friendly collector of domain information from Open-Sources" authors = ["OSINT-TECHNOLOGIES "] readme = "README.md" @@ -41,6 +41,8 @@ dnspython = "2.6.1" lxml = "5.2.2" openpyxl = "3.1.2" PyMuPDF = "1.24.7" +selenium = "4.28.1" +webdriver-manager = "4.0.2" [tool.poetry.scripts] dpulse = 'dpulse.dpulse:run' diff --git a/reporting_modules/html_report_creation.py b/reporting_modules/html_report_creation.py index 3eb01ee..0b662e9 100644 --- a/reporting_modules/html_report_creation.py +++ b/reporting_modules/html_report_creation.py @@ -2,10 +2,14 @@ sys.path.append('service') sys.path.append('service//pdf_report_templates') +sys.path.append('apis') from logs_processing import logging import db_processing as db import files_processing as fp +from api_hudsonrock import hudsonrock_html_prep +from api_virustotal import virustotal_html_prep +from api_securitytrails import securitytrails_html_prep try: from datetime import datetime @@ -13,6 +17,7 @@ import os from colorama import Fore, Style import sqlite3 + import re except ImportError as e: print(Fore.RED + "Import error appeared. Reason: {}".format(e) + Style.RESET_ALL) sys.exit() @@ -25,7 +30,7 @@ def generate_report(data, output_file, template_path): f.write(html_output) return True -def report_assembling(short_domain, url, case_comment, data_array, report_info_array, pagesearch_ui_mark, pagesearch_keyword, end): +def report_assembling(short_domain, url, case_comment, data_array, report_info_array, pagesearch_ui_mark, pagesearch_keyword, end, snapshotting_ui_mark): try: ip = data_array[0] res = data_array[1] @@ -71,19 +76,20 @@ def report_assembling(short_domain, url, case_comment, data_array, report_info_a keywords_messages_list = data_array[42] dorking_status = data_array[43] dorking_file_path = data_array[44] - vt_cats = data_array[45] - vt_deturls = data_array[46] - vt_detsamples = data_array[47] - vt_undetsamples = data_array[48] - st_alexa = data_array[49] - st_apex = data_array[50] - st_hostname = data_array[51] - st_alivesds = data_array[52] - st_txt = data_array[53] - a_records_list = data_array[54] - mx_records_list = data_array[55] - ns_records_list = data_array[56] - soa_records_list = data_array[57] + virustotal_output = data_array[45] + #vt_deturls = data_array[46] + #vt_detsamples = data_array[47] + #vt_undetsamples = data_array[48] + securitytrails_output = data_array[46] + #st_apex = data_array[47] + #st_hostname = data_array[48] + #st_alivesds = data_array[49] + #st_txt = data_array[50] + #a_records_list = data_array[51] + #mx_records_list = data_array[52] + #ns_records_list = data_array[53] + #soa_records_list = data_array[54] + hudsonrock_output = data_array[47] casename = report_info_array[0] db_casename = report_info_array[1] db_creation_date = report_info_array[2] @@ -92,40 +98,9 @@ def report_assembling(short_domain, url, case_comment, data_array, report_info_a api_scan_db = report_info_array[7] used_api_flag = report_info_array[8] - if '2' in used_api_flag: - st_a_combined = [] - if len(a_records_list) > 0: - if len(a_records_list) == 1: - record = a_records_list[0] - st_a_combined = [f"IPv4 address: {record.get('ip', '')}, owned by {record.get('organization', '')}"] - else: - st_a_combined = [f"IPv4 address: {record.get('ip', '')}, owned by {record.get('organization', '')}" for record in a_records_list] - - st_mx_combined = [] - if len(mx_records_list) > 0: - if len(mx_records_list) == 1: - record = mx_records_list[0] - st_mx_combined = [f"Hostname {record.get('mx_hostname', '')} with priority={record.get('mx_priority', '')}, owned by {record.get('mx_organization', '')}"] - else: - st_mx_combined = [f"Hostname {record.get('mx_hostname', '')} with priority={record.get('mx_priority', '')}, owned by {record.get('mx_organization', '')}" for record in mx_records_list] - - st_ns_combined = [] - if len(ns_records_list) > 0: - if len(ns_records_list) == 1: - record = ns_records_list[0] - st_ns_combined = [f"Nameserver: {record.get('ns_nameserver', '')}, owned by {record.get('ns_organization', '')}"] - else: - st_ns_combined = [f"Nameserver: {record.get('ns_nameserver', '')}, owned by {record.get('ns_organization', '')}" for record in ns_records_list] - - st_soa_combined = [] - if len(soa_records_list) > 0: - if len(soa_records_list) == 1: - record = soa_records_list[0] - st_soa_combined = [f"Email: {record.get('soa_email', '')}, TTL={record.get('soa_ttl', '')}"] - else: - st_soa_combined = [f"Email: {record.get('soa_email', '')}, TTL={record.get('soa_ttl', '')}" for record in soa_records_list] - else: - st_soa_combined = st_ns_combined = st_mx_combined = st_a_combined = st_txt = st_alivesds = ['No results because user did not selected SecurityTrails API scan'] + hudsonrock_output = hudsonrock_html_prep(hudsonrock_output) + virustotal_output = virustotal_html_prep(virustotal_output) + securitytrails_output = securitytrails_html_prep(securitytrails_output) pdf_templates_path = 'service//pdf_report_templates' @@ -182,9 +157,8 @@ def report_assembling(short_domain, url, case_comment, data_array, report_info_a 'tags': tags, 'vulns': vulns, 'a_tsm': total_socials, 'pagesearch_ui_mark': pagesearch_ui_mark, 'dorking_status': dorking_status, 'add_dsi': add_dsi, 'ps_s': accessible_subdomains, 'ps_e': emails_amount, 'ps_f': files_counter, 'ps_c': cookies_counter, 'ps_a': api_keys_counter, - 'ps_w': website_elements_counter, 'ps_p': exposed_passwords_counter, 'ss_l': total_links_counter, 'ss_a': accessed_links_counter, 'vt_cats': vt_cats, 'vt_deturls': vt_deturls, - 'vt_detsampls': vt_detsamples, 'vt_undetsampls': vt_undetsamples, 'st_alexa': st_alexa, 'st_apex': st_apex, 'st_hostname': st_hostname, 'st_ip_combined': st_a_combined, 'st_val': st_txt, 'st_subds': st_alivesds, 'st_mx_combined': st_mx_combined, - 'st_ns_combined': st_ns_combined, 'st_soa_combined': st_soa_combined} + 'ps_w': website_elements_counter, 'ps_p': exposed_passwords_counter, 'ss_l': total_links_counter, 'ss_a': accessed_links_counter, 'hudsonrock_output': hudsonrock_output, "snapshotting_ui_mark": snapshotting_ui_mark, + 'virustotal_output': virustotal_output, 'securitytrails_output': securitytrails_output} html_report_name = report_folder + '//' + casename if generate_report(context, html_report_name, template_path): diff --git a/reporting_modules/xlsx_report_creation.py b/reporting_modules/xlsx_report_creation.py index 334e56c..cae3cc7 100644 --- a/reporting_modules/xlsx_report_creation.py +++ b/reporting_modules/xlsx_report_creation.py @@ -18,7 +18,7 @@ print(Fore.RED + "Import error appeared. Reason: {}".format(e) + Style.RESET_ALL) sys.exit() -def create_report(short_domain, url, case_comment, data_array, report_info_array, pagesearch_ui_mark, pagesearch_keyword, end): +def create_report(short_domain, url, case_comment, data_array, report_info_array, pagesearch_ui_mark, pagesearch_keyword, end, snapshotting_ui_mark): try: ip = data_array[0] res = data_array[1] diff --git a/requirements.txt b/requirements.txt index 4d6f65e..ac21e71 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,3 +17,5 @@ dnspython==2.6.1 lxml==5.2.2 openpyxl==3.1.2 PyMuPDF==1.24.7 +selenium==4.28.1 +webdriver-manager==4.0.2 diff --git a/service/cli_init.py b/service/cli_init.py index 05f1de6..72d846a 100644 --- a/service/cli_init.py +++ b/service/cli_init.py @@ -20,7 +20,7 @@ def welcome_menu(self): fig = Figlet(font=wm_font) print('\n') self.console.print(fig.renderText('DPULSE'), style=preview_style) - print(Fore.MAGENTA + Style.BRIGHT + '[DPULSE-CLI] - [v1.2 stable] - [OSINT-TECHNOLOGIES]\n' + Style.RESET_ALL) + print(Fore.MAGENTA + Style.BRIGHT + '[DPULSE-CLI] - [v1.2.1 rolling] - [OSINT-TECHNOLOGIES]\n' + Style.RESET_ALL) print(Fore.MAGENTA + Style.BRIGHT + '[Visit our pages]\nGitHub repository: https://github.com/OSINT-TECHNOLOGIES\nPyPi page: https://pypi.org/project/dpulse/\nDocumentation: https://dpulse.readthedocs.io' + Style.RESET_ALL) def print_main_menu(self): @@ -62,13 +62,14 @@ def api_manager(self): print(Fore.CYAN + "2. Restore reference API Keys DB") print(Fore.LIGHTRED_EX + "3. Return to main menu" + Style.RESET_ALL) -def print_prescan_summary(short_domain, report_filetype, pagesearch_ui_mark, dorking_ui_mark, used_api_ui, case_comment): +def print_prescan_summary(short_domain, report_filetype, pagesearch_ui_mark, dorking_ui_mark, used_api_ui, case_comment, snapshotting_ui_mark): print(Fore.LIGHTMAGENTA_EX + "\n[PRE-SCAN SUMMARY]\n" + Style.RESET_ALL) print(Fore.GREEN + "Determined target: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + short_domain + Style.RESET_ALL) print(Fore.GREEN + "Report type: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + report_filetype.lower() + Style.RESET_ALL) print(Fore.GREEN + "PageSearch conduction: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + pagesearch_ui_mark + Style.RESET_ALL) print(Fore.GREEN + "Dorking conduction: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + dorking_ui_mark + Style.RESET_ALL) print(Fore.GREEN + "APIs scan: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + used_api_ui + Style.RESET_ALL) + print(Fore.GREEN + "Snapshotting conduction: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + snapshotting_ui_mark + Style.RESET_ALL) print(Fore.GREEN + "Case comment: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + case_comment + Style.RESET_ALL + "\n") def print_api_db_msg(): diff --git a/service/config_processing.py b/service/config_processing.py index 258f04c..c7a5d11 100644 --- a/service/config_processing.py +++ b/service/config_processing.py @@ -30,6 +30,7 @@ def create_config(): config['LOGGING'] = {'log_level': 'info'} config['CLI VISUAL'] = {'preview_color': 'red', 'font': 'slant'} config['DORKING'] = {'dorking_delay (secs)': '2', 'delay_step': '5'} + config['SNAPSHOTTING'] = {'installed_browser': 'firefox', 'opera_browser_path': 'None'} config['USER-AGENTS'] = {} for i, agent in enumerate(basic_user_agents): config['USER-AGENTS'][f'agent_{i + 1}'] = agent @@ -53,6 +54,9 @@ def read_config(): delay_step = config.get('DORKING', 'delay_step') user_agents = [value for key, value in config['USER-AGENTS'].items()] proxies_file_path = config.get('PROXIES', 'proxies_file_path') + installed_browser = config.get('SNAPSHOTTING', 'installed_browser') + opera_browser_path = config.get('SNAPSHOTTING', 'opera_browser_path') + config_values = { 'logging_level': log_level, @@ -61,7 +65,9 @@ def read_config(): 'dorking_delay (secs)': dorking_delay, 'delay_step': delay_step, 'user_agents': user_agents, - 'proxies_file_path': proxies_file_path + 'proxies_file_path': proxies_file_path, + 'installed_browser': installed_browser, + 'opera_browser_path': opera_browser_path } return config_values diff --git a/service/db_processing.py b/service/db_processing.py index b179977..2e4c9ce 100644 --- a/service/db_processing.py +++ b/service/db_processing.py @@ -133,6 +133,14 @@ def insert_blob(report_file_type, pdf_blob, db_casename, creation_date, case_com api_scan_insert = 'VirusTotal' elif 'SecurityTrails' in api_scan_db: api_scan_insert = 'SecurityTrails' + elif 'HudsonRock' in api_scan_db: + api_scan_insert = 'HudsonRock' + elif 'VirusTotal' and 'HudsonRock' in api_scan_db: + api_scan_insert = 'VirusTotal and HudsonRock' + elif 'SecurityTrails' and 'HudsonRock' in api_scan_db: + api_scan_insert = 'SecurityTrails and HudsonRock' + elif 'VirusTotal' and 'SecurityTrails' and 'HudsonRock' in api_scan_db: + api_scan_insert = 'SecurityTrails, HudsonRock and VirusTotal' sqlite_insert_blob_query = """INSERT INTO report_storage (report_file_extension, report_content, creation_date, target, comment, sitemap_file, robots_text, sitemap_text, dorks_results, api_scan) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" diff --git a/service/pdf_report_templates/default_report_temp.html b/service/pdf_report_templates/default_report_temp.html index 713bd51..d9af0f3 100644 --- a/service/pdf_report_templates/default_report_temp.html +++ b/service/pdf_report_templates/default_report_temp.html @@ -22,10 +22,10 @@

8. Dorking queries and results links (if was selected)

[PAGESEARCH SCAN INFO]

9. PageSearch results (if was selected)

-

10. PageSearch Sitemap Inspection results (if was selected)

[API SCAN INFO]

-

11. VirusTotal API scan results (if was selected)

-

12. SecurityTrails API scan results (if was selected)

+

10. VirusTotal API scan results (if was selected)

+

11. SecurityTrails API scan results (if was selected)

+

12. HudsonRock API scan results (if was selected)

 


 
@@ -37,6 +37,7 @@

Status of sitemap.xml links extraction: {{sitemap_links}}

Google Dorking status: {{dorking_status}}

PageSearch conduction: {{pagesearch_ui_mark}}

+

Snapshotting conduction: {{snapshotting_ui_mark}}

Report creation time: {{ctime}}

 


@@ -197,51 +198,21 @@

 


 
-

PAGESEARCH SITEMAP INSPECTION RESULTS

+

VIRUSTOTAL API SCAN RESULTS

 

-

Total links amount: {{ss_l}}

-

Amount of accessed links: {{ss_a}}

+
{{ virustotal_output }}

 


 

-

VIRUSTOTAL API SCAN RESULTS

+

SECURITYTRAILS API SCAN RESULTS

 

-

Categories: {{vt_cats}}

-

Detected URLs: {{vt_deturls}}

-

Detected samples: {{vt_detsampls}}

-

Undetected samples: {{vt_undetsampls}}

+
{{ securitytrails_output }}

 


 

-

SECURITY TRAILS API SCAN RESULTS

+

HUDSONROCK API SCAN RESULTS

 

-

Alexa rank: {{st_alexa}}

-

Apex domain: {{st_apex}}

-

Hostname: {{st_hostname}}

-

A records:

-
    {% for st_ip in st_ip_combined %} -
  • => {{ st_ip }}
  • - {% endfor %}
-

MX records:

-
    {% for st_mx in st_mx_combined %} -
  • => {{ st_mx }}
  • - {% endfor %}
-

NS records:

-
    {% for st_ns in st_ns_combined %} -
  • => {{ st_ns }}
  • - {% endfor %}
-

SOA records:

-
    {% for st_soa in st_soa_combined %} -
  • => {{ st_soa }}
  • - {% endfor %}
-

TXT records values:

-
    {% for val in st_val %} -
  • => {{ val }}
  • - {% endfor %}
-

Subdomains list

-
    {% for subd in st_subds %} -
  • => {{ subd }}
  • - {% endfor %}
+
{{ hudsonrock_output }}

 


 

diff --git a/snapshotting/__init__.py b/snapshotting/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/snapshotting/__init__.py @@ -0,0 +1 @@ + diff --git a/snapshotting/html_snapshotting.py b/snapshotting/html_snapshotting.py new file mode 100644 index 0000000..bee2bab --- /dev/null +++ b/snapshotting/html_snapshotting.py @@ -0,0 +1,13 @@ +import requests +from colorama import Fore, Style + +def save_page_as_html(url, filename): + try: + print(Fore.GREEN + "Getting web page's content" + Style.RESET_ALL) + response = requests.get(url) + print(Fore.GREEN + "Creating .HTML file" + Style.RESET_ALL) + with open(filename, 'w', encoding='utf-8') as file: + file.write(response.text) + print(Fore.GREEN + ".HTML snapshot was successfully created" + Style.RESET_ALL) + except Exception as e: + print(Fore.RED + f"Error: {e}" + Style.RESET_ALL) diff --git a/snapshotting/screen_snapshotting.py b/snapshotting/screen_snapshotting.py new file mode 100644 index 0000000..c390f7a --- /dev/null +++ b/snapshotting/screen_snapshotting.py @@ -0,0 +1,64 @@ +from selenium import webdriver +from selenium.webdriver.chrome.service import Service as ChromeService +from selenium.webdriver.firefox.service import Service as FirefoxService +from selenium.webdriver.edge.service import Service as EdgeService +from webdriver_manager.chrome import ChromeDriverManager +from webdriver_manager.firefox import GeckoDriverManager +from webdriver_manager.microsoft import EdgeChromiumDriverManager +from selenium.webdriver.chrome.options import Options as ChromeOptions +from selenium.webdriver.firefox.options import Options as FirefoxOptions +from selenium.webdriver.edge.options import Options as EdgeOptions +from colorama import Fore, Style +import sys +sys.path.append('snapshotting') + +def setup_driver(browser_name): + if browser_name == "chrome": + service = ChromeService(ChromeDriverManager().install()) + options = ChromeOptions() + options.add_argument('--headless=new') + driver = webdriver.Chrome(service=service, options=options) + + elif browser_name == "firefox": + service = FirefoxService(GeckoDriverManager().install()) + options = FirefoxOptions() + options.add_argument('-headless') + driver = webdriver.Firefox(service=service, options=options) + + elif browser_name == "edge": + service = EdgeService(EdgeChromiumDriverManager().install()) + options = EdgeOptions() + options.add_argument('--headless=new') + driver = webdriver.Edge(service=service, options=options) + + elif browser_name == "safari": + options = webdriver.SafariOptions() + driver = webdriver.Safari(options=options) + + elif browser_name == "opera": + from config_processing import read_config + config_values = read_config() + service = ChromeService(ChromeDriverManager().install()) + options = ChromeOptions() + options.add_argument('--headless=new') + options.binary_location = config_values['opera_browser_path'] + driver = webdriver.Chrome(service=service, options=options) + else: + raise ValueError("Unsupported browser") + driver.set_window_size(1920, 1080) + return driver + +def take_screenshot(browser_name, url, screenshot_path): + try: + print(Fore.GREEN + f"Starting {browser_name} browser in headless mode..." + Style.RESET_ALL) + driver = setup_driver(browser_name) + print(Fore.GREEN + f"Going to {url}" + Style.RESET_ALL) + driver.get(url) + print(Fore.GREEN + "Taking screenshot..." + Style.RESET_ALL) + driver.save_screenshot(screenshot_path) + driver.quit() + print(Fore.GREEN + f"Screenshot successfully saved in report folder" + Style.RESET_ALL) + except Exception as e: + print(Fore.RED + f"Error appeared: {str(e)}" + Style.RESET_ALL) + if 'driver' in locals(): + driver.quit()