In [1]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import random

def reverse_complement(sequence):
    return str(Seq(sequence).reverse_complement())

def introduce_mutation(sequence):
    position = random.randint(0, len(sequence) - 1)
    original_nucleotide = sequence[position]
    mutated_nucleotide = random.choice([base for base in 'ACGT' if base != original_nucleotide])
    mutated_sequence = sequence[:position] + mutated_nucleotide + sequence[position + 1:]
    return mutated_sequence

# Define file paths
primary_file = 'E:/Basesolve/my_primary.fasta'
output_file = 'E:/Basesolve/modified_primary.fasta'

# Read the primary multi-FASTA file
all_sequences = list(SeqIO.parse(primary_file, "fasta"))

# Randomly select 10 sequences
selected_sequences = random.sample(all_sequences, min(10, len(all_sequences)))

processed_records = []

# Process the selected sequences
for sequence_record in selected_sequences:
    scaffold = sequence_record.id
    sequence = str(sequence_record.seq)

    # Randomly select start and end positions for modification
    start = random.randint(0, len(sequence) - 1)
    end = random.randint(start + 1, len(sequence))

    # Extract the subsequence for modification
    subsequence = sequence[start:end]

    # Reverse complement the extracted subsequence
    reversed_sequence = reverse_complement(subsequence)

    # Introduce a random mutation in the reversed subsequence
    processed_sequence = introduce_mutation(reversed_sequence)

    # Insert the processed subsequence back into the modified sequence
    modified_sequence = sequence[:start] + processed_sequence + sequence[end:]

    # Create a SeqRecord for the modified sequence
    processed_record = SeqRecord(Seq(modified_sequence), id=f"{scaffold}_processed", description="")
    processed_records.append(processed_record)

# Write the modified primary sequence to a new multi-FASTA file
with open(output_file, "w") as output:
    SeqIO.write(processed_records, output, "fasta")

print(f"Modified primary sequence saved to {output_file}")


Modified primary sequence saved to E:/Basesolve/modified_primary.fasta
