In [34]:
import itertools
import json

# Function to calculate Hamming distance
def hamming_distance(seq1, seq2):
    return sum(c1 != c2 for c1, c2 in zip(seq1, seq2))

# Function to generate empty barcodes
def generate_empty_barcodes(barcodes, min_distance, count):
    length = len(barcodes[0])
    nucleotides = 'ACGT'
    candidates = [''.join(p) for p in itertools.product(nucleotides, repeat=length)]
    
    empty_barcodes = []
    
    for candidate in candidates:
        if all(hamming_distance(candidate, barcode) >= min_distance for barcode in barcodes):
            empty_barcodes.append(candidate)
            barcodes.append(candidate)
            if len(empty_barcodes) == count:
                break
    
    return empty_barcodes


def codebook_with_emptyBarcode(codebook_path, output_path, min_distance=2, count=5):
    codebook = json.load(open(codebook_path))

    # Extract existing barcodes from the codebook
    barcodes = []
    for mapping in codebook['mappings']:
        barcode = [''] * 4
        for code in mapping['codeword']:
            barcode[code['r']] = 'ACGT'[code['c']]
        barcodes.append(''.join(barcode))

    empty_barcodes = generate_empty_barcodes(barcodes, min_distance, count)

    # Add new empty barcodes to the codebook
    for i, empty_barcode in enumerate(empty_barcodes):
        codeword = [{"r": idx, "c": "ACGT".index(nuc), "v": 1.0} for idx, nuc in enumerate(empty_barcode)]
        new_mapping = {"codeword": codeword, "target": f"Fake{i+1}"}
        codebook['mappings'].append(new_mapping)

    # Print the updated codebook
    print(json.dumps(codebook, indent=4))

    with open(output_path, 'w') as outfile:
        json.dump(codebook, outfile, indent=4)

# Provided codebook
codebook_path = '/hpc/scratch/hdd2/nv066607/iss-nf/postcode_example/codebook_org.json'
output_path = '/hpc/scratch/hdd2/nv066607/iss-nf/postcode_example/codebook.json'
codebook_with_emptyBarcode(codebook_path, output_path, min_distance=2, count=5)

{
    "version": "0.0.0",
    "mappings": [
        {
            "codeword": [
                {
                    "r": 0,
                    "c": 0,
                    "v": 1.0
                },
                {
                    "r": 1,
                    "c": 0,
                    "v": 1.0
                },
                {
                    "r": 2,
                    "c": 1,
                    "v": 1.0
                },
                {
                    "r": 3,
                    "c": 2,
                    "v": 1.0
                }
            ],
            "target": "Adra1b"
        },
        {
            "codeword": [
                {
                    "r": 0,
                    "c": 3,
                    "v": 1.0
                },
                {
                    "r": 1,
                    "c": 2,
                    "v": 1.0
                },
                {
                    "r": 2,
                    "c": 2,
                    "v