In [1]:
import hashlib
import json
import os
import time
import uuid
import argon2
import cityhash

class BloomFilter:
    def __init__(self):
        self.filter_size = 256
        self.hash_functions = 3
        self.filters = {}  # 存储普通布隆过滤器
        self.counter_filter = [0] * self.filter_size  # 计数布隆过滤器
        self.has_filter = False  # 标志位
        self.current_filter_index = 0

    def get_hash_indexes(self, uuid):
        indexes = []
        for i in range(self.hash_functions):
            hash_value = hashlib.sha256((uuid + str(i)).encode()).hexdigest()
            indexes.append(int(hash_value, 16) % self.filter_size)
        return indexes

    def create_new_filter(self):
        filter_array = [0] * self.filter_size
        self.filters[self.current_filter_index] = filter_array
        self.has_filter = True
        self.current_filter_index += 1
        return self.current_filter_index - 1

    def generate_key_hash(self, key):
        # 使用 Argon2 生成密钥哈希
        ph = argon2.PasswordHasher()
        derived_key1 = ph.hash(key)  # 使用 Argon2 生成第一个派生密钥

        # 使用 CityHash 生成第二个派生密钥
        derived_key2 = cityhash.CityHash64(derived_key1.encode())  # 使用 CityHash 生成第二个派生密钥
        return hex(derived_key2)  # 返回十六进制字符串

    def save_key_value_pairs(self, key_hash, filter_index, uuid):
        # 保存键值对文件1
        key_value_path1 = './data/key_bloom_map.json'
        key_map = {}
        if os.path.exists(key_value_path1):
            with open(key_value_path1, 'r', encoding='utf-8') as f:
                key_map = json.load(f)
        key_map[key_hash] = filter_index
        with open(key_value_path1, 'w', encoding='utf-8') as f:
            json.dump(key_map, f, ensure_ascii=False, indent=2)

        # 保存键值对文件2
        key_value_path2 = './data/bloom_uuid_map.json'
        uuid_map = {}
        if os.path.exists(key_value_path2):
            with open(key_value_path2, 'r', encoding='utf-8') as f:
                uuid_map = json.load(f)

        if filter_index not in uuid_map:
            uuid_map[filter_index] = {}

        indexes = self.get_hash_indexes(uuid)
        for index in indexes:
            uuid_map[filter_index][index] = uuid

        with open(key_value_path2, 'w', encoding='utf-8') as f:
            json.dump(uuid_map, f, ensure_ascii=False, indent=2)

    def load_key_value_pairs(self):
        key_value_path1 = './data/key_bloom_map.json'
        key_value_path2 = './data/bloom_uuid_map.json'

        key_map = {}
        uuid_map = {}

        if os.path.exists(key_value_path1):
            with open(key_value_path1, 'r', encoding='utf-8') as f:
                key_map = json.load(f)

        if os.path.exists(key_value_path2):
            with open(key_value_path2, 'r', encoding='utf-8') as f:
                uuid_map = json.load(f)

        return key_map, uuid_map

    def find_existing_uuid(self, uuid, indexes):
        _, uuid_map =  self.load_key_value_pairs()

        for filter_index, filter_locations in uuid_map.items():
            for index in indexes:
                if filter_locations.get(index) == uuid:
                    return True
        return False

    def check_uuid(self, uuid):
        if not self.has_filter:
            return False

        indexes = self.get_hash_indexes(uuid)
        return self.find_existing_uuid(uuid, indexes)

    def add_uuid(self, uuid, key):
        key_hash =  self.generate_key_hash(key)
        filter_index = self.create_new_filter()
        indexes = self.get_hash_indexes(uuid)

        # 更新布隆过滤器和计数器
        filter_array = self.filters[filter_index]
        for index in indexes:
            filter_array[index] = 1
            self.counter_filter[index] += 1

        # 保存键值对文件
        self.save_key_value_pairs(key_hash, filter_index, uuid)

        return True

    def verify_uuid_and_key(self, uuid, key):
        if not self.has_filter:
            return {'success': False, 'message': '未检索到您的身份，请先注册'}

        key_hash = self.generate_key_hash(key)
        filter_data = self.load_key_value_pairs()
        filter_index = filter_data[0].get(key_hash)

        if filter_index is None:
            print("检索失败")

        filter_array = self.filters[filter_index]
        indexes = self.get_hash_indexes(uuid)

        if all(filter_array[index] == 1 for index in indexes):
            print("身份认证成功")

        if all(self.counter_filter[index] == 0 for index in indexes):
            print("检索失败")

    def reset(self):
        self.filters.clear()
        self.counter_filter = [0] * self.filter_size
        self.has_filter = False
        self.current_filter_index = 0
        # 重置键值对文件为 {}
        key_value_path1 = './data/key_bloom_map.json'
        key_value_path2 = './data/bloom_uuid_map.json'

        with open(key_value_path1, 'w', encoding='utf-8') as f:
            json.dump({}, f, ensure_ascii=False, indent=2)  # 写入空字典

        with open(key_value_path2, 'w', encoding='utf-8') as f:
            json.dump({}, f, ensure_ascii=False, indent=2)  # 写入空字典

class UUIDKeyGenerator:
    @staticmethod
    def generate_uuid_key_pairs(count):
        pairs = []
        for _ in range(count):
            uuid_value = str(uuid.uuid4())
            key_value = os.urandom(8).hex()  # 生成64位的key
            pairs.append((uuid_value, key_value))
        return pairs


if __name__ == "__main__":
    bloom_filter = BloomFilter()
    
    exist_user_count = [0, 100, 1000, 10000, 100000]
    time_consume = []
    for num in exist_user_count:
        if num:
            bloom_filter.reset()
            for uuid_value, key_value in UUIDKeyGenerator.generate_uuid_key_pairs(num):
                bloom_filter.add_uuid(uuid_value, key_value) 
            start_time = time.perf_counter()
            for uuid_value, key_value in UUIDKeyGenerator.generate_uuid_key_pairs(1):
                bloom_filter.add_uuid(uuid_value, key_value)
            end_time = time.perf_counter()
            time_consume.append((end_time - start_time)*1000)
            print(time_consume)
        else:
            bloom_filter.reset()
            start_time = time.perf_counter()
            for uuid_value, key_value in UUIDKeyGenerator.generate_uuid_key_pairs(1):
                bloom_filter.add_uuid(uuid_value, key_value)
            end_time = time.perf_counter()
            time_consume.append((end_time - start_time)*1000)
            print(time_consume)
    print("耗时测试结果（单位: ms）：" + time_consume)

[30.268199974671006]
[30.268199974671006, 27.432700037024915]
[30.268199974671006, 27.432700037024915, 90.33140004612505]
[30.268199974671006, 27.432700037024915, 90.33140004612505, 66.59469997975975]
[30.268199974671006, 27.432700037024915, 90.33140004612505, 66.59469997975975, 439.02910000178963]


TypeError: can only concatenate str (not "list") to str