In [1]:
import hashlib
import random
import os

# STEP 1: Create a sample file
sample_text = "This is a sample file containing multiple lines.\n" * 100
file_path = "sample_data.txt"

with open(file_path, "w") as f:
    f.write(sample_text)

print(f"Sample file created at: {file_path}")

# STEP 2: Split the file into fixed-size blocks
def split_file_into_blocks(filepath, block_size=64):
    with open(filepath, 'rb') as f:
        while True:
            block = f.read(block_size)
            if not block:
                break
            yield block

# STEP 3: Generate SHA-256 checksum for each block
def compute_checksum(block):
    return hashlib.sha256(block).hexdigest()

original_blocks = list(split_file_into_blocks(file_path))
original_checksums = [compute_checksum(block) for block in original_blocks]

print(f"Total blocks: {len(original_blocks)}")

# STEP 4: Simulate corruption in 3 random blocks
corrupted_blocks = original_blocks.copy()
corruption_indices = random.sample(range(len(corrupted_blocks)), k=3)

for idx in corruption_indices:
    corrupted_blocks[idx] = b"CORRUPTED_BLOCK_DATA"

print(f"Corrupted block indices: {corruption_indices}")

# STEP 5: Validate integrity by comparing checksums
def validate_blocks(blocks, original_checksums):
    report = []
    for i, block in enumerate(blocks):
        current_checksum = compute_checksum(block)
        is_valid = current_checksum == original_checksums[i]
        report.append((i, is_valid))
    return report

validation_report = validate_blocks(corrupted_blocks, original_checksums)

# STEP 6: Show results in a table
import pandas as pd

df = pd.DataFrame(validation_report, columns=["Block Index", "Is Valid"])
print(df)


Sample file created at: sample_data.txt
Total blocks: 77
Corrupted block indices: [13, 5, 11]
    Block Index  Is Valid
0             0      True
1             1      True
2             2      True
3             3      True
4             4      True
..          ...       ...
72           72      True
73           73      True
74           74      True
75           75      True
76           76      True

[77 rows x 2 columns]
