Skip to content

Commit

Permalink
Fix for issue #43
Browse files Browse the repository at this point in the history
Now allows for base64 encoded strings parsing when working with embedded JSON structures
  • Loading branch information
battleoverflow committed Mar 15, 2023
1 parent 10903fe commit 058a66d
Showing 1 changed file with 33 additions and 1 deletion.
34 changes: 33 additions & 1 deletion iocextract.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,14 +397,15 @@ def found_ws(s):

yield url

def extract_encoded_urls(data, refang=False, strip=False, delimiter=None):
def extract_encoded_urls(data, refang=False, strip=False, delimiter=None, parse_json=False):
"""
Extract only encoded URLs!
:param data: Input text
:param bool refang: Refang output
:param bool strip: Strip possible garbage from the end of URLs
:param bool delimiter: Continue extracting even after whitespace is detected
:param bool parse_json: Allows you to recursively parse JSON data to locate base64 strings
:rtype: Iterator[:class:`str`]
"""

Expand Down Expand Up @@ -448,6 +449,37 @@ def extract_encoded_urls(data, refang=False, strip=False, delimiter=None):

yield url

def validate_base64(b64_data):
"""
Validate a string is Base64 encoded.
:param b64_data: Input base64 string
"""

try:
if isinstance(b64_data, str):
base64_bytes = bytes(b64_data, "ascii")
elif isinstance(b64_data, bytes):
base64_bytes = b64_data
else:
raise ValueError("Data type should be a string or bytes")

return base64.b64encode(base64.b64decode(base64_bytes)) == base64_bytes
except Exception:
return False

if parse_json:
try:
try:
for json_data in json.loads(data):
for _, value in json_data.items():
if validate_base64(value):
yield base64.b64decode(value).decode("ascii")
except json.decoder.JSONDecodeError:
pass
except AttributeError:
pass

def extract_ips(data, refang=False):
"""
Extract IP addresses!
Expand Down

0 comments on commit 058a66d

Please sign in to comment.