In [1]:
#user variables
num_barcodes = 200 # Change this to generate a different number of barcodes
length = 8 # Change this to generate a different barcode length
min_hamming_distance = 3 # This is the minimum number of nucleotide differences between barcodes in the set

In [2]:
import random

def generate_dna_barcodes(num_barcodes, length, min_hamming_distance):
    """
    Generate a specified number of DNA barcodes, each 8 bases long,
    ensuring that they are at least 3 Hamming distances apart.

    :param num_barcodes: Number of barcodes to generate.
    :param length: Length of each DNA barcode.
    :param min_hamming_distance: Minimum Hamming distance between any two barcodes.
    :return: List of DNA barcodes.
    """

    def hamming_distance(s1, s2):
        """Calculate the Hamming distance between two strings."""
        return sum(c1 != c2 for c1, c2 in zip(s1, s2))

    bases = ['A', 'T', 'G', 'C']
    barcodes = []

    while len(barcodes) < num_barcodes:
        new_barcode = ''.join(random.choices(bases, k=length))
        if all(hamming_distance(new_barcode, existing_barcode) >= min_hamming_distance for existing_barcode in barcodes):
            barcodes.append(new_barcode)

    return barcodes

def write_barcodes_to_fasta(barcodes, filename):
    """
    Write the list of DNA barcodes to a FASTA file.

    :param barcodes: List of DNA barcodes.
    :param filename: Name of the file to save the barcodes.
    """
    with open(filename, 'w') as file:
        for i, barcode in enumerate(barcodes):
            file.write(f">barcode{i+1}\n{barcode}\n")

In [3]:
# Generate barcodes
barcodes = generate_dna_barcodes(num_barcodes,length,min_hamming_distance)

# Write barcodes to a FASTA file
filename = 'dna_barcodes.fasta'
write_barcodes_to_fasta(barcodes, filename)