In [9]:
import os

SHIBBOLETH = b'shibboleth'
SPLIT_SIZE = 1024  # Bytes

def create_shibboleth_split(original_file):
    """Divide the original file into pieces and embed the shibboleth."""
    with open(original_file, 'rb') as f:
        content = f.read()

    n = len(content)
    pieces = [content[i:i + SPLIT_SIZE] + SHIBBOLETH for i in range(0, n, SPLIT_SIZE)]
    return pieces

def create_decoy_split(count, size):
    """Generate decoy pieces that don't contain the shibboleth."""
    decoys = [os.urandom(size) for _ in range(count)]
    return decoys

def save_pieces(directory, pieces, prefix="piece_"):
    """Save the pieces in a directory."""
    if not os.path.exists(directory):
        os.makedirs(directory)

    for index, piece in enumerate(pieces, start=1):
        with open(os.path.join(directory, f"{prefix}{index}.dat"), 'wb') as f:
            f.write(piece)

def reconstruct_shibboleth_file(directory):
    """Extract all pieces with the shibboleth marker and concatenate them."""
    pieces = []

    for filename in sorted(os.listdir(directory)):
        with open(os.path.join(directory, filename), 'rb') as f:
            piece = f.read()
            if piece.endswith(SHIBBOLETH):
                pieces.append(piece[:-len(SHIBBOLETH)])

    return b''.join(pieces)

## Deconstruct the file and save the pieces along with decoy files

In [10]:
# Create shibboleth-labeled pieces and decoys
pieces = create_shibboleth_split("large_file.dat")
decoys = create_decoy_split(len(pieces), SPLIT_SIZE)

# Save real pieces and decoys in a directory
save_pieces("pieces_directory", pieces)
save_pieces("pieces_directory", decoys, prefix="decoy_")

## Reassemble the pieces of the file

In [11]:
reconstructed_content = reconstruct_shibboleth_file("pieces_directory")

with open("reconstructed.dat", 'wb') as f:
    f.write(reconstructed_content)

## Demonstration of naive assembly of split file

In [12]:
def incorrect_reassemble(directory):
    """Reassemble file from all files in directory without checking for the shibboleth."""
    pieces = []

    for filename in sorted(os.listdir(directory)):
        with open(os.path.join(directory, filename), 'rb') as f:
            piece = f.read()
            # Don't strip the shibboleth, if present, to show the error
            pieces.append(piece)

    return b''.join(pieces)

In [13]:
incorrect_content = incorrect_reassemble("pieces_directory")

with open("incorrect_reconstructed.dat", 'wb') as f:
    f.write(incorrect_content)