In [142]:
# -*- coding: utf-8 -*-
from Crypto.Cipher import DES,AES,ARC4
from Crypto.Random import get_random_bytes,random
import hashlib
import random
import string
from Crypto import Random
from Crypto.Hash import SHA256,HMAC
from Crypto.Util.Padding import pad, unpad

In [143]:
# 生成随机定长字符串的函数
def generate_random_string(min_length, max_length):
    length = random.randint(min_length, max_length)
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(length))

In [144]:
# 为keyword生成对应的hash值
def generate_hash(keyword):
    hash_object = hashlib.sha256(keyword.encode())
    return hash_object.hexdigest()

In [145]:
# 为已有的keyword生成对应的陷门trapdoor
def generate_trapdoor(keyword):
    trapdoor = []
    for i in range(len(keyword)):
        trapdoor.append(generate_hash(keyword[i])[0])
    return trapdoor

In [146]:
# 加密文档
def encrypt_document(document, trapdoors):
    encrypted_document = []
    for i in range(len(document)):
        encrypted_word = []
        for j in range(len(document[i])):
            encrypted_char = chr(ord(document[i][j]) + ord(trapdoors[i][j % len(trapdoors[i])]))
            encrypted_word.append(encrypted_char)
        encrypted_document.append(''.join(encrypted_word))
    return encrypted_document

In [148]:
# 解密文档
def decrypt_document(document, trapdoors):
    decrypted_document = []
    for i in range(len(document)):
        decrypted_word = []
        for j in range(len(document[i])):
            decrypted_char = chr(ord(document[i][j]) - ord(trapdoors[i][j % len(trapdoors[i])]))
            decrypted_word.append(decrypted_char)
        decrypted_document.append(''.join(decrypted_word))
    return decrypted_document

In [147]:
# 通过已有的keyword查询正向索引，返回包含该keyword的文档
def retrieve_documents(keyword, index):
    documents = []
    for char in keyword:
        if char in index:
            documents.append(set(index[char]))
    if len(documents) == 0:
        return []
    else:
        return list(set.intersection(*documents))

In [149]:
# 生成随机文档，长度不定
document = []
for i in range(10):
    document.append(generate_random_string(5, 10))  # 随机生成长度在5到10之间的字符串

In [150]:
print("原始文档:")
print(document)

原始文档:
['tvfuxpg', 'ruvbfxic', 'xhsjdblhoz', 'ehgctmd', 'pvjftaiclj', 'dzvata', 'gdruvo', 'kvmbizufs', 'szkjuifbo', 'zonvypg']


In [151]:
# 为文档中每个keyword生成对应的陷门trapdoor
trapdoors = []
for i in range(len(document)):
    trapdoors.append(generate_trapdoor(document[i]))

In [152]:
# 使用陷门加密文档
encrypted_document = encrypt_document(document, trapdoors)

In [153]:
# 构建正向索引
index = {}
for i in range(len(encrypted_document)):
    for j in range(len(encrypted_document[i])):
        keyword = encrypted_document[i][j]
        if keyword not in index:
            index[keyword] = []
        index[keyword].append(i)

In [154]:
# 检索包含指定keyword的文档
query = encrypted_document[0][0]
retrieved_documents = retrieve_documents(query, index)

In [155]:
print("要查询包含 %s 的文档" % decrypt_document(query, trapdoors[0][0])[0])

要查询包含 t 的文档


In [156]:
# 解密已检索到的文档
decrypted_documents = []
for i in range(len(retrieved_documents)):
    decrypted_documents.append(decrypt_document([encrypted_document[retrieved_documents[i]]], [trapdoors[retrieved_documents[i]]])[0])

In [157]:
# 打印原始文档和解密后的文档
print("原始文档:")
print(document)
print("查询包含 %s 的文档" % decrypt_document(query, trapdoors[0][0])[0])
print("查询到的解密后的文档:")
print(decrypted_documents)

原始文档:
['tvfuxpg', 'ruvbfxic', 'xhsjdblhoz', 'ehgctmd', 'pvjftaiclj', 'dzvata', 'gdruvo', 'kvmbizufs', 'szkjuifbo', 'zonvypg']
查询包含 t 的文档
查询到的解密后的文档:
['tvfuxpg', 'ehgctmd', 'pvjftaiclj', 'dzvata']
