### 这是一个CGI (Common Gateway Interface) URL解码函数，用于将编码后的URL字符串转换回原始字符串。

主要处理两种情况：
- 将+符号转换为空格
- 将%xx格式的十六进制编码转换为对应的ASCII字符


### URL编码是将URL中的非ASCII字符和特殊字符转换为可以安全传输的格式。

### 为什么需要URL编码？
- URL只能使用ASCII字符集
- 某些字符在URL中有特殊含义（如 /, ?, &, = 等）
- 需要传输非英文字符（如中文、日文等）

In [None]:
def cgi_decode(s: str) -> str:
    """Decode the CGI-encoded string `s`:
       * replace '+' by ' '
       * replace "%xx" by the character with hex number xx.
       Return the decoded string.  Raise `ValueError` for invalid inputs."""

    # Mapping of hex digits to their integer values
    hex_values = {
        '0': 0, '1': 1, '2': 2, '3': 3, '4': 4,
        '5': 5, '6': 6, '7': 7, '8': 8, '9': 9,
        'a': 10, 'b': 11, 'c': 12, 'd': 13, 'e': 14, 'f': 15,
        'A': 10, 'B': 11, 'C': 12, 'D': 13, 'E': 14, 'F': 15,
    }

    t = ""
    i = 0
    # decode the string
        # 逐字符遍历输入字符串
        # 遇到+转换为空格
        # 遇到%则读取后续两个字符作为十六进制数
        # digit_high和digit_low分别代表十六进制数的高位和低位
    while i < len(s):
        c = s[i]
        if c == '+':
            t += ' '
        elif c == '%':
            digit_high, digit_low = s[i + 1], s[i + 2]
            i += 2
            if digit_high in hex_values and digit_low in hex_values:
                v = hex_values[digit_high] * 16 + hex_values[digit_low]
                t += chr(v)
            else:
                raise ValueError("Invalid encoding")
        else:
            t += c
        i += 1
    return t

In [3]:
cgi_decode("hello+world") # "hello world"

'hello world'

In [4]:
import sys
# First code block - Trace function
def traceit(frame, event, arg):
   line = frame.f_lineno 
   variables = frame.f_locals
   function_name = frame.f_code.co_name
   print(f"{function_name}: {event} {line} {variables}")
   return traceit

# Second code block - Trace wrapper
def trace_cgi_decode(s):
   sys.settrace(traceit)
   ret = cgi_decode(s)  
   sys.settrace(None)
   return ret

In [5]:
trace_cgi_decode("hello+world") # "hello world"

cgi_decode: call 1 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 9 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 9 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 9 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 9 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 9 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 10 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 10 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 10 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 10 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 10 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 11 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 11 {'s': 'hello+w

'hello world'

In [6]:
from fuzzingbook.Coverage import Coverage
with Coverage() as cov:
    trace_cgi_decode("hello+world")

cgi_decode: call 1 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 9 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 9 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 9 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 9 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 9 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 10 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 10 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 10 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 10 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 10 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 11 {'s': 'hello+world'}
cgi_decode: line 8 {'s': 'hello+world'}
cgi_decode: line 11 {'s': 'hello+w

In [7]:
cov.trace()

[('trace_cgi_decode', 12), ('trace_cgi_decode', 13), ('trace_cgi_decode', 14)]