In [1]:
import pickle
import random
import hashlib
import decimal
import fractions
import uuid
import platform
import sys
import datetime

In [2]:
def describe_environment():
    print("操作系统:", platform.system(), platform.release())
    print("Python 版本:", sys.version)

In [3]:
def hash_of(obj):
    try:
        b = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
        return hashlib.sha256(b).hexdigest()
    except Exception as e:
        return f"<PickleError: {e}>"

def is_hashable(obj):
    try:
        hash(obj)
        return True
    except TypeError:
        return False

def generate_fuzzy_hashable_object(depth=0):
    if depth > 3:
        # 递归深度限制时返回简单可哈希类型
        return random.choice([
            None, True, False,
            random.randint(-9999, 9999),
            round(random.uniform(-9999, 9999), 4),
            str(uuid.uuid4()),
            decimal.Decimal(str(round(random.uniform(0, 100), 4))),
            fractions.Fraction(random.randint(1, 10), random.randint(1, 10)),
            uuid.uuid4(),
            datetime.date.today(),
            datetime.datetime.now(),
            complex(random.uniform(-100, 100), random.uniform(-100, 100)),
        ])

    # 只用可哈希类型，不用list和dict
    types = [
        lambda: random.randint(-1000, 1000),
        lambda: random.uniform(-1e6, 1e6),
        lambda: random.choice([True, False, None]),
        lambda: random.choice(["abc", "中文", "", "null", str(uuid.uuid4())]),
        lambda: tuple(generate_fuzzy_hashable_object(depth + 1) for _ in range(random.randint(0, 3))),
        lambda: frozenset(
            generate_fuzzy_hashable_object(depth + 1) for _ in range(random.randint(0, 3))
        ),
        lambda: decimal.Decimal(str(round(random.uniform(0, 100), 4))),
        lambda: fractions.Fraction(random.randint(1, 10), random.randint(1, 10)),
        lambda: uuid.uuid4(),
        lambda: datetime.datetime.now(),
        lambda: datetime.date.today(),
        lambda: complex(random.uniform(-100, 100), random.uniform(-100, 100)),
    ]

    return random.choice(types)()

def test_pickle_consistency_fuzzing(num_tests=100):
    print(f"🔍 Running fuzzy pickle consistency test with only hashable types ({num_tests} cases)...\n")
    inconsistencies = 0
    printed_types = set()

    for i in range(num_tests):
        obj = generate_fuzzy_hashable_object()
        h1 = hash_of(obj)
        h2 = hash_of(obj)

        type_name = type(obj).__name__

        if type_name not in printed_types:
            print(f"[{type_name}] Example:")
            print(f"  Object: {repr(obj)}")
            print(f"  Hash1:  {h1}")
            print(f"  Hash2:  {h2}\n")
            printed_types.add(type_name)

        if h1 != h2:
            inconsistencies += 1
            print(f"❌ Inconsistent pickle output detected for type: {type_name}")
            print(f"  Object: {repr(obj)}")
            print(f"  Hash1:  {h1}")
            print(f"  Hash2:  {h2}\n")

    print(f"✅ Test complete. Total: {num_tests}, Inconsistencies: {inconsistencies}")

## 模糊测试 — 同一环境下，不同python 版本 

In [4]:
describe_environment()

操作系统: Windows 11
Python 版本: 3.12.4 | packaged by Anaconda, Inc. | (main, Jun 18 2024, 15:03:56) [MSC v.1929 64 bit (AMD64)]


In [5]:
# python=3.12.4:
test_pickle_consistency_fuzzing(100)

🔍 Running fuzzy pickle consistency test with only hashable types (100 cases)...

[str] Example:
  Object: '中文'
  Hash1:  d4d57798c4c8c87ad3fc7b5b0e284bbaf8a80dc6aa11d6421abadff376629378
  Hash2:  d4d57798c4c8c87ad3fc7b5b0e284bbaf8a80dc6aa11d6421abadff376629378

[tuple] Example:
  Object: ('', Decimal('75.4675'), False)
  Hash1:  d92d661719048978f980cec0568080c8c522cbcc8b7d470937731cd872dd645d
  Hash2:  d92d661719048978f980cec0568080c8c522cbcc8b7d470937731cd872dd645d

[bool] Example:
  Object: False
  Hash1:  132915fa0f4abd3a7939610b8d088fbbcdff866e17b5cbb2c0bdcb37782f4da2
  Hash2:  132915fa0f4abd3a7939610b8d088fbbcdff866e17b5cbb2c0bdcb37782f4da2

[date] Example:
  Object: datetime.date(2025, 5, 21)
  Hash1:  0950a3d21957cb73da43aca43fc1b10624fc313b8cd8d338ea841e83c82a0438
  Hash2:  0950a3d21957cb73da43aca43fc1b10624fc313b8cd8d338ea841e83c82a0438

[frozenset] Example:
  Object: frozenset({UUID('92a2f526-2901-4aca-8d3a-6ad9545b4822')})
  Hash1:  3ec1c7d37e749bca5ece2509fdfa651cbf04d730f7

In [6]:
describe_environment()

操作系统: Windows 10
Python 版本: 3.7.12 | packaged by conda-forge | (default, Oct 26 2021, 05:35:01) [MSC v.1916 64 bit (AMD64)]


In [7]:
# python=3.7.12
test_pickle_consistency_fuzzing(100)
#code

🔍 Running fuzzy pickle consistency test with only hashable types (100 cases)...

[float] Example:
  Object: -203625.05270700692
  Hash1:  7d62dcbbe713b7c73a892ea8fc4f06a5bb38b136ed8d6c8e82234396a8094e5e
  Hash2:  7d62dcbbe713b7c73a892ea8fc4f06a5bb38b136ed8d6c8e82234396a8094e5e

[frozenset] Example:
  Object: frozenset({Fraction(5, 9), (58.84104121497205-0.10703980024848647j), Decimal('60.0785')})
  Hash1:  d9a6b9b81e30b259e01ee5c2df54e6857ba46ad0a8ddcd747a4a7056dc3e6270
  Hash2:  d9a6b9b81e30b259e01ee5c2df54e6857ba46ad0a8ddcd747a4a7056dc3e6270

[int] Example:
  Object: -103
  Hash1:  952a100c89f84492e78e94ddeb700fb1838b63573f2937c1b0a894e0c3441e52
  Hash2:  952a100c89f84492e78e94ddeb700fb1838b63573f2937c1b0a894e0c3441e52

[bool] Example:
  Object: False
  Hash1:  24a5341c9a6e30357187cbeaebee0a02714ee3b3d6cead951a613e96ffb746dc
  Hash2:  24a5341c9a6e30357187cbeaebee0a02714ee3b3d6cead951a613e96ffb746dc

[complex] Example:
  Object: (1.6147632524049982+77.19256387329705j)
  Hash1:  3f27b9

###  Conclusion on Pickle Serialization Stability

The fuzzy testing of Python's `pickle` module with 100 diverse hashable object instances demonstrated **excellent serialization stability** when serializing the same object multiple times.

#### Key Observations:

- ✅ All tested objects, including complex types such as `frozenset`, `Decimal`, `Fraction`, `datetime`, `UUID`, and `complex` numbers, produced **identical SHA256 hashes across repeated serializations**;
- ✅ Basic types like `int`, `float`, `bool`, `str`, `NoneType`, and compound types like `tuple` also showed perfect consistency;
- ✅ This confirms that `pickle.dumps()` produces **deterministic and repeatable serialized byte streams** for the same input object under consistent environment conditions;
- ✅ No inconsistencies or hash mismatches were detected in any of the 100 test cases, indicating strong internal consistency in the serialization algorithm.

#### Implications:

- The deterministic nature of `pickle` serialization supports reliable caching, hashing, and comparison workflows based on serialized data;
- Applications depending on byte-level stability of pickled objects can trust that repeated serializations of the same object state will yield identical outputs;
- This stability is crucial for debugging, distributed systems, and storage scenarios where serialized data integrity is important.

Overall, the results provide strong evidence that Python’s `pickle` serialization mechanism is robust and stable with respect to producing consistent outputs for identical objects when repeated under the same environment.
