In [1]:
import requests
from bs4 import BeautifulSoup
import re
import tldextract
import hashlib
import ipaddress

# Define the URL of the cyber threat report
url = "https://www.secureworks.com/blog/opsec-mistakes-reveal-cobalt-mirage-threat-actors"

# Fetch the content of the URL
response = requests.get(url)
content = response.content

# Parse the content using BeautifulSoup
soup = BeautifulSoup(content, 'html.parser')
text = soup.get_text()

# Define regular expressions for IP addresses, URLs, and hashes
ip_regex = r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b'
url_regex = r'(https?://\S+)'
hash_regex = r'[A-Fa-f0-9]{64}'

# Extract IP addresses, URLs, and hashes from the text
ip_addresses = re.findall(ip_regex, text)
urls = re.findall(url_regex, text)
hashes = re.findall(hash_regex, text)

# Validate and extract the hashes
valid_hashes = []
for hash_value in hashes:
    try:
        hashlib.sha256(bytes.fromhex(hash_value)).hexdigest()
        valid_hashes.append(hash_value)
    except:
        pass

# Validate and extract the IP addresses
valid_ips = []
for ip in ip_addresses:
    try:
        ipaddress.ip_address(ip)
        valid_ips.append(ip)
    except:
        pass

# Write the extracted IP addresses, URLs, and hashes to a file or database
with open('extracted_data.txt', 'w') as f:
    f.write('IP Addresses:\n')
    for ip in valid_ips:
        f.write(ip + '\n')

    f.write('\nURLs:\n')
    for url in urls:
        f.write(url + '\n')

    f.write('\nHashes:\n')
    for hash_value in valid_hashes:
        f.write(hash_value + '\n')


In [2]:
import pandas as pd

tables = pd.read_html("https://www.secureworks.com/blog/opsec-mistakes-reveal-cobalt-mirage-threat-actors")
rankings = tables[-1]
rankings = rankings.sort_values(by="Type")

with open("extracted_table.txt", "w") as f_out:
    f_out.write("Domain name:\n")
    f_out.write("\n".join(rankings[rankings["Type"] == "Domain name"]["Indicator"]))
    f_out.write("\n\nIP address:\n")
    f_out.write("\n".join(rankings[rankings["Type"] == "IP address"]["Indicator"]))
    f_out.write("\n\nHashes:\n")
    f_out.write("\n".join(rankings[rankings["Type"].str.contains("hash", case=False)]["Indicator"]))
