In [3]:
import os
import re2 as re

## file name sanitizer

In [6]:
# example: path = os.path.join(UPLOAD_DIR, filename) without validation
# os.path.join -> "/var/app/uploads/../../etc/passwd.mp3"
# This resolves to "/etc/passwd.mp3" (outside uploads)
# now that you are in a sensitive location, depending on code, you might read/write/overwrite sensitive files
filename = "../../etc/passwd.mp3"

# \x00 is a string terminator in many C/C++ APIs
# Extension bypass: validation sees .mp3, but the OS/tool executes evil.exe
filename = "evil.exe\x00.mp3"


# filename itself can be treated as HTML/JS code by the browser if you insert it into a web page without escaping.
# <p>Uploaded file: test<script>alert(1)</script>.mp3</p>
# see <script>alert(1)</script> as a real script tag and runs it.
filename = "test<script>.mp3"

In [7]:
filename = os.path.basename(filename)
print(filename)

test<script>.mp3


In [8]:
# null bytes
safe_name = filename.replace("\x00", "")
print(safe_name)

test<script>.mp3


In [9]:
# path traversal patterns
safe_name = safe_name.replace("..", "").replace("/", "").replace("\\", "")
print(safe_name)

test<script>.mp3


In [10]:
# Keep only safe characters: alphanumeric, dash, underscore, period
# Remove any HTML/script characters
# here our 'sample 3.mp3' becomes 'sample_3.mp3' becos space not allowed
safe_name = re.sub(r"[^a-zA-Z0-9._-]", "_", safe_name)
print(safe_name)

test_script_.mp3


In [11]:
# collapse multiple underscores
safe_name = re.sub(r"_+", "_", safe_name)
print(safe_name)

test_script_.mp3


In [12]:
# leading/trailing underscores and periods
safe_name = safe_name.strip("_.")
print(safe_name)

test_script_.mp3


## sanitize search query

In [None]:
# sql vulnerability
# fake_name'; close the first query
# --' comments out the rest of the SQL line "AND active = true;"
# ' AND active = true; becomees a comment and is ignored
user_input = "fake_name'; DROP TABLE users; --"
sql = f"SELECT * FROM users WHERE email = '{user_input}' AND active = true;"  # neg example to use fstring, use param or orm
print(sql)

SELECT * FROM users WHERE email = 'fake_name'; DROP TABLE users; --' AND active = true;


In [None]:
# 'fake_name DROP TABLE users --' is a where string in this case.
sanitized = re.sub(r"[<>\"';(){}\\]", "", user_input)
sql = f"SELECT * FROM users WHERE email = '{sanitized}';"
print(sql)

SELECT * FROM users WHERE email = 'fake_name DROP TABLE users --';
