# DNA Sequence Comparison Analysis

This notebook demonstrates the analysis of DNA sequence comparison using quantum computing techniques, specifically leveraging IBM Quantum's Qiskit framework.

In [1]:
# Install necessary libraries
!pip install qiskit qiskit-aer pylatexenc qiskit-ibm-runtime numpy matplotlib ipywidgets

In [2]:
# Import necessary libraries
from qiskit import QuantumCircuit, QuantumRegister, ClassicalRegister, transpile
from qiskit_ibm_runtime import QiskitRuntimeService, Session, Sampler
import numpy as np
import matplotlib.pyplot as plt
from qiskit.visualization import plot_circuit_layout, circuit_drawer

In [3]:
# Connect to IBM Quantum
QiskitRuntimeService.save_account(channel="ibm_quantum", token="token_here", overwrite=True)
service = QiskitRuntimeService()

# Select the backend (ibm_sherbrooke in this case)
backend_name = 'ibm_sherbrooke'
backend_real = service.get_backend(backend_name)
print(f"Selected backend: {backend_real.name}")

In [4]:
# Encode the DNA sequences as quantum states
def encode_dna_sequence(dna_sequence, max_qubits=127):
    mapping = {'A': '00', 'T': '01', 'C': '10', 'G': '11'}
    binary_sequence = ''.join([mapping[char] for char in dna_sequence])
    return binary_sequence[:max_qubits]  # Ensure the sequence is within the qubit limit

In [5]:
# Create a quantum circuit based on the encoded DNA sequence
def create_dna_circuit(dna_binary_sequence, num_qubits):
    qr = QuantumRegister(num_qubits)
    cr = ClassicalRegister(num_qubits)
    qc = QuantumCircuit(qr, cr)
    for i, bit in enumerate(dna_binary_sequence):
        if bit == '1':
            qc.x(qr[i])
    qc.h(qr)  # Apply Hadamard to create superposition
    return qc, qr, cr

In [6]:
# Calculate the similarity between two quantum states
def calculate_similarity(qc1, qc2, qr, cr):
    qc = qc1.compose(qc2)  # Create a circuit for overlap comparison
    qc.measure(qr, cr)
    return qc

In [7]:
# Sample DNA sequences
YEAST = "ATCG" * 32
PROTOZOAN = "ATGC" * 32
BACTERIAL = "TGCA" * 32
REFERENCE = "GCTA" * 32

In [8]:
# Encode the sequences
encoded_yeast = encode_dna_sequence(YEAST)
encoded_protozoan = encode_dna_sequence(PROTOZOAN)
encoded_bacterial = encode_dna_sequence(BACTERIAL)
encoded_reference = encode_dna_sequence(REFERENCE)

In [9]:
# Determine the number of qubits
num_qubits = len(encoded_yeast)

In [10]:
# Create the DNA quantum circuits
qc_yeast, qr_yeast, cr_yeast = create_dna_circuit(encoded_yeast, num_qubits)
qc_protozoan, qr_protozoan, cr_protozoan = create_dna_circuit(encoded_protozoan, num_qubits)
qc_bacterial, qr_bacterial, cr_bacterial = create_dna_circuit(encoded_bacterial, num_qubits)
qc_reference, qr_reference, cr_reference = create_dna_circuit(encoded_reference, num_qubits)

In [11]:
# Compare sequences with the reference
qc_similarity_yeast = calculate_similarity(qc_yeast, qc_reference, qr_yeast, cr_yeast)
qc_similarity_protozoan = calculate_similarity(qc_protozoan, qc_reference, qr_protozoan, cr_protozoan)
qc_similarity_bacterial = calculate_similarity(qc_bacterial, qc_reference, qr_bacterial, cr_bacterial)

In [12]:
# Transpile circuits for the selected backend
qc_similarity_yeast = transpile(qc_similarity_yeast, backend=backend_real)
qc_similarity_protozoan = transpile(qc_similarity_protozoan, backend=backend_real)
qc_similarity_bacterial = transpile(qc_similarity_bacterial, backend=backend_real)

In [13]:
# Use IBMQ Sampler for executing the quantum circuits
with Session(backend=backend_real) as session:
    sampler = Sampler(session=session)

    # Run the sampler
    job_yeast = sampler.run([qc_similarity_yeast])
    job_protozoan = sampler.run([qc_similarity_protozoan])
    job_bacterial = sampler.run([qc_similarity_bacterial])

    # Extract results
    result_yeast = job_yeast.result()
    result_protozoan = job_protozoan.result()
    result_bacterial = job_bacterial.result()

In [14]:
# Extract BitArrays
def extract_bitarray(result):
    data = result._pub_results[0].data
    for key in data.keys():
        if key.startswith('c'):
            return data[key]
    return None

bitarray_yeast = extract_bitarray(result_yeast)
bitarray_protozoan = extract_bitarray(result_protozoan)
bitarray_bacterial = extract_bitarray(result_bacterial)

In [15]:
# Convert BitArray to counts
def bitarray_to_counts(bitarray):
    return bitarray.get_counts()

counts_yeast = bitarray_to_counts(bitarray_yeast)
counts_protozoan = bitarray_to_counts(bitarray_protozoan)
counts_bacterial = bitarray_to_counts(bitarray_bacterial)

In [16]:
# Calculate similarity scores
def calculate_similarity_scores(counts):
    total = sum(counts.values())
    similarities = {key: (value / total) * 100 for key, value in counts.items()}
    return similarities

similarity_scores_yeast = calculate_similarity_scores(counts_yeast)
similarity_scores_protozoan = calculate_similarity_scores(counts_protozoan)
similarity_scores_bacterial = calculate_similarity_scores(counts_bacterial)

In [17]:
# Visualize similarity scores
def plot_similarity_scores(scores, title):
    plt.bar(scores.keys(), scores.values())
    plt.xlabel('Measurement Outcomes')
    plt.ylabel('Similarity Score (%)')
    plt.title(title)
    plt.show()

plot_similarity_scores(similarity_scores_yeast, "Yeast Similarity Scores")
plot_similarity_scores(similarity_scores_protozoan, "Protozoan Similarity Scores")
plot_similarity_scores(similarity_scores_bacterial, "Bacterial Similarity Scores")