In [14]:
import random

def mutate(seq, mutation_rate=0.1):
    """
    Mutates a wild-type amino acid sequence by randomly substituting residues.
    
    Args:
        seq (str): The wild-type amino acid sequence.
        mutation_rate (float): Probability of mutating each residue (default 0.1).
    
    Returns:
        str: The mutated amino acid sequence.
    """
    # Define the set of valid amino acids
    amino_acids = 'ACDEFGHIKLMNPQRSTVWY'
    mutated_seq = []
    
    for aa in seq:
        # With probability equal to mutation_rate, substitute the amino acid
        if random.random() < mutation_rate:
            # Choose an alternative amino acid (different from the current one)
            alternatives = [a for a in amino_acids if a != aa]
            mutated_seq.append(random.choice(alternatives))
        else:
            mutated_seq.append(aa)
    return ''.join(mutated_seq)

# Example usage:

print("Mutated:   ", mutated)

Mutated:    GQSFTHPP


In [15]:
num_seq = 200
mutated_seqs = []
wt_seq = "GDSFTHP"
for i in range(num_seq):
    mutated_seqs.append(mutate(wt_seq, mutation_rate=0.1))



In [16]:
import pandas as pd

# Create DataFrame with mutated sequences and corresponding wild-type sequence
data = pd.DataFrame({"mutated_seq": mutated_seqs, "wt_seq": [wt_seq]*len(mutated_seqs)})

In [17]:
data.to_csv("EGFR_antigen_loop")