In [1]:
from qiskit import QuantumCircuit, Aer, execute
import time

In [2]:
def quantum_multiple_pattern_matching(targets, sequence):
    target_lengths = [len(target) for target in targets]
    sequence_length = len(sequence)

    # Create a quantum circuit with n+1 qubits, where n is the length of the longest target sequence
    max_target_length = max(target_lengths)
    qc = QuantumCircuit(max_target_length + 1, max_target_length)

    # Apply quantum gates to implement all target sequences
    for target in targets:
        for i, bit in enumerate(target):
            if bit == "1":
                qc.x(i)  # Apply a NOT gate for each '1' in the target sequence

    # Apply an X gate to the last qubit to prepare it in the |1⟩ state
    qc.x(max_target_length)

    # Measure all qubits
    for i in range(max_target_length):
        qc.measure(i, i)

    # Use the Aer simulator to run the quantum circuit
    simulator = Aer.get_backend('qasm_simulator')
    job = execute(qc, simulator, shots=1)
    
    start_time = time.time()
    job = execute(qc, simulator, shots=1)
    execution_time = time.time() - start_time
    
    # Get the result
    result = job.result()
    counts = result.get_counts()

    # Initialize a dictionary to store the positions of target sequence matches
    positions = {target: [] for target in targets}

    # Search for the target sequences in the input sequence
    for target in targets:
        target_length = len(target)
        for i in range(sequence_length - target_length + 1):
            subsequence = sequence[i:i + target_length]
            if subsequence == target:
                positions[target].append(i)

    return positions, execution_time

In [3]:
# Define the target sequence and the input sequence
target_sequences = ["GCA","NNN","ATA", "IIL"]
input_sequence = "MSTHDTSLKTTEEVAFQIILLCQFGVGTFANVFLFVYNFSPISTGSKQRPRQVILRHMAVANALTLFLTIFPNNMMTFAPIIPQTDLKCKLEFFTRLVARSTNLCSTCVLSIHQFVTLVPVNSGKGILRASVTNMASYSCYSCWFFSVLNNIYIPIKVTGPQLTDNNNNSKSKLFCSTSDFSVGIVFLRFAHDATFMSIMVWTSVSMVLLLHRHCQRMQYIFTLNQDPRGQAETTATHTILMLVVTFVGFYLLSLICIIFYTYFIYSHHSLRHCNDILVSGFPTISPLLLTFRDPKGPCSVFFNC"

In [4]:
# Find positions of the target sequences in the input sequence using a quantum-inspired approach
matched_positions, execution_time = quantum_multiple_pattern_matching(target_sequences, input_sequence)

In [5]:
for target, positions in matched_positions.items():
    if positions:
        print(f"Target sequence '{target}' found at positions:", positions)
    else:
        print(f"Target sequence '{target}' not found in the input sequence.")

Target sequence 'GCA' not found in the input sequence.
Target sequence 'NNN' found at positions: [165, 166]
Target sequence 'ATA' not found in the input sequence.
Target sequence 'IIL' found at positions: [17]


In [6]:
print(f"Execution time: {execution_time:.6f} seconds")

Execution time: 0.010994 seconds


In [7]:
import time

def translate_and_find_amino_acids(dna_sequence):
    start_time = time.time()

    # Define a genetic code dictionary
    genetic_code = {
        'ATA': 'I', 'ATC': 'I', 'ATT': 'I', 'ATG': 'M',
        'ACA': 'T', 'ACC': 'T', 'ACG': 'T', 'ACT': 'T',
        'AAC': 'N', 'AAT': 'N', 'AAA': 'K', 'AAG': 'K',
        'AGC': 'S', 'AGT': 'S', 'AGA': 'R', 'AGG': 'R',
        'CTA': 'L', 'CTC': 'L', 'CTG': 'L', 'CTT': 'L',
        'CCA': 'P', 'CCC': 'P', 'CCG': 'P', 'CCT': 'P',
        'CAC': 'H', 'CAT': 'H', 'CAA': 'Q', 'CAG': 'Q',
        'CGA': 'R', 'CGC': 'R', 'CGG': 'R', 'CGT': 'R',
        'GTA': 'V', 'GTC': 'V', 'GTG': 'V', 'GTT': 'V',
        'GCA': 'A', 'GCC': 'A', 'GCG': 'A', 'GCT': 'A',
        'GAC': 'D', 'GAT': 'D', 'GAA': 'E', 'GAG': 'E',
        'GGA': 'G', 'GGC': 'G', 'GGG': 'G', 'GGT': 'G',
        'TCA': 'S', 'TCC': 'S', 'TCG': 'S', 'TCT': 'S',
        'TTC': 'F', 'TTT': 'F', 'TTA': 'L', 'TTG': 'L',
        'TAC': 'Y', 'TAT': 'Y', 'TAA': '_', 'TAG': '_',
        'TGC': 'C', 'TGT': 'C', 'TGA': '_', 'TGG': 'W',
    }

    protein_sequence = ""
    amino_acids_and_indexes = []

    i = 0
    while i < len(dna_sequence):
        codon = dna_sequence[i:i+3]
        amino_acid = genetic_code.get(codon, '_')
        protein_sequence += amino_acid

        if amino_acid != '_':
            amino_acids_and_indexes.append((amino_acid, i // 3))

        i += 3

    execution_time = time.time() - start_time

    return protein_sequence, amino_acids_and_indexes, execution_time

# DNA sequence
dna_sequence = "MSTHDTSLKTTEEVAFQIILLCQFGVGTFANVFLFVYNFSPISTGSKQRPRQVILRHMAVANALTLFLTIFPNNMMTFAPIIPQTDLKCKLEFFTRLVARSTNLCSTCVLSIHQFVTLVPVNSGKGILRASVTNMASYSCYSCWFFSVLNNIYIPIKVTGPQLTDNNNNSKSKLFCSTSDFSVGIVFLRFAHDATFMSIMVWTSVSMVLLLHRHCQRMQYIFTLNQDPRGQAETTATHTILMLVVTFVGFYLLSLICIIFYTYFIYSHHSLRHCNDILVSGFPTISPLLLTFRDPKGPCSVFFNC"

# Translate the DNA sequence into amino acids and find their indexes
protein_sequence, amino_acids_and_indexes, execution_time = translate_and_find_amino_acids(dna_sequence)

# Display the results
print("Protein Sequence:")
print(protein_sequence)
print("\nAmino Acids and Their Indexes:")
print(amino_acids_and_indexes)
print(f"\nExecution Time: {execution_time:.6f} seconds")

Protein Sequence:
______________________________________________________________________________Y_______________________

Amino Acids and Their Indexes:
[('Y', 78)]

Execution Time: 0.000000 seconds
