In [None]:
import hashlib
import base64
import pandas as pd

def derive_key(password: str, length: int) -> bytes:
    hasher = hashlib.sha256()
    hasher.update(password.encode())
    key = hasher.digest()
    return key * (length // len(key)) + key[: length % len(key)]

def decrypt(ciphertext_b64: str, password: str) -> str:
    encrypted = base64.b64decode(ciphertext_b64)
    key = derive_key(password, len(encrypted))
    decrypted = bytes(a ^ b for a, b in zip(encrypted, key))
    return decrypted.decode()

def decrypt_string(text: str, password: str) -> str:
    if pd.isna(text):
        return text
    return decrypt(str(text), password)

In [None]:
encrypted_df = pd.read_csv('./data/browsecomp-zh-encrypted.csv')

PASSWORD = encrypted_df['canary'].iloc[0]
print(f"使用密码: {PASSWORD}")

decrypted_df = encrypted_df.copy()
for col in decrypted_df.columns:
    if col != 'canary':
        decrypted_df[col] = decrypted_df[col].apply(lambda x: decrypt_string(x, PASSWORD))

print(f"数据行数: {len(decrypted_df)}")
print(decrypted_df.iloc[2].to_dict())