<a href="https://colab.research.google.com/github/PintoBI/mutagenic_primer_design/blob/main/generatemutagenicprimer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install Bio
!git clone https://github.com/PintoBI/mutagenic_primer_design.git

Collecting Bio
  Downloading bio-1.7.1-py3-none-any.whl (280 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.0/281.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting biopython>=1.80 (from Bio)
  Downloading biopython-1.84-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gprofiler-official (from Bio)
  Downloading gprofiler_official-1.0.0-py3-none-any.whl (9.3 kB)
Collecting mygene (from Bio)
  Downloading mygene-3.2.2-py2.py3-none-any.whl (5.4 kB)
Collecting biothings-client>=0.2.6 (from mygene->Bio)
  Downloading biothings_client-0.3.1-py2.py3-none-any.whl (29 kB)
Installing collected packages: biopython, gprofiler-official, biothings-client, mygene, Bio
Successfully installed Bio-1.7.1 biopython-1.84 biothings-client-0.3.1 gprofiler-official-1.0.0 mygene-3.2.2
Cloning into 'mutagenic_primer_des

In [2]:
from Bio.SeqUtils import MeltingTemp as mt
from Bio import Seq
import csv
from Bio import SeqIO
import random

def parse_fasta(file_path):
    # Parse the FASTA file and extract the sequence
    with open(file_path, 'r') as file:
        records = SeqIO.parse(file, 'fasta')
        for record in records:
            return str(record.seq)


def calculate_tm(primer):
    """
    Calculates the melting temperature (Tm) of a mutagenic primer .

        primer (str): The primer sequence with X in the mismatches.

    Returns:
        float: The melting temperature (Tm) in degrees Celsius.
    """
    tm =mt.Tm_GC(primer,valueset=2)
    return tm


def translate_dna(dna_sequence):

    # Translates a DNA sequence to a protein sequence.

    # Args:
    #     dna_sequence (str): The DNA sequence.

    # Returns:
    #     str: The translated protein sequence.

    coding_dna = Seq.Seq(dna_sequence)
    protein_sequence = coding_dna.translate()
    return str(protein_sequence)


def find_codon_position(dna_sequence, protein_position, firstresid):

    # Finds the start position of the codon and retrieves the corresponding codon for a given protein position.
    # Args:
    #     dna_sequence (str): The DNA sequence.
    #     protein_position (int): The position of the protein.
    #     first_residue (int): The position of the first residue in the protein.
    # Returns:
    #     tuple: A tuple containing the start position of the codon and the codon itself.

    codon_start = (protein_position - firstresid ) * 3
    codon_end = codon_start + 3
    codon = dna_sequence[codon_start:codon_end]
    return codon_start, codon


def import_codon_table(csv_file):

    # Imports the codon table from a CSV file.

    # Args:
    #     csv_file (str): The path to the CSV file.

    # Returns:
    #     dict: The codon table as a dictionary.

    genetic_code = {}
    with open(csv_file, 'r', encoding='utf-8-sig') as file:
        reader = csv.reader(file)
        for row in reader:
            amino_acid = row[0]
            codons = row[1:]
            genetic_code[amino_acid] = codons
    return genetic_code


def get_codons(codon_table, amino_acid, current_codon):
    # Retrieves the codons for a given amino acid from the codon table.

    # Args:
    #     codon_table (dict): The codon table.
    #     amino_acid (str): The amino acid.

    # Returns:
    #     list: The list of codons for the amino acid.
    amino_acid = amino_acid.upper()  # Convert amino acid to uppercase
    current_codon = current_codon.upper()  # Convert current_codon to uppercase

    if amino_acid in codon_table:
        codons = codon_table[amino_acid]
        min_differences = float('inf')
        selected_codon = None
        for codon in codons:
            if codon:  # Skip empty codons
                differences = sum(c1 != c2 for c1, c2 in zip(codon, current_codon))
                #print(differences,codon)
                if differences < min_differences:
                    min_differences = differences
                    selected_codon = codon
        return selected_codon
    else:
        return []

def design_primer(dna_sequence, codon_start, mutant_codon, length):
    start_index = codon_start - length
    end_index = codon_start + 3 + length
    primer_sequence = "".join([dna_sequence[start_index:codon_start], mutant_codon, dna_sequence[codon_start+3:end_index]])
    primer_seq = Seq.Seq(primer_sequence)

    return primer_seq


def save_primers_to_csv(primer_dict):
    # Save the primer dictionary to a CSV file
    file_path = 'primers.csv'  # Specify the desired file path

    with open(file_path, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Primer Name', 'Sequence'])  # Write the header row

        for primer_name, primer_seq in primer_dict.items():
            writer.writerow([primer_name, primer_seq])  # Write each primer entry



def parse_mutations(file_path):
    # Parse the mutations file and extract the mutations

    mutations = []
    with open(file_path, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            position = row['Position']
            mutation = row['Mutation']
            mutations.append((position, mutation))

    return mutations

def save_primers_to_csv(primer_dict, file_path):
    # Save the primer dictionary to a CSV file
    with open(file_path, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Mutation', 'Primer Sequence', 'Tm'])  # Write the header row

        for mutation, (primer_seq, tm) in primer_dict.items():
            writer.writerow([mutation, str(primer_seq), tm])  # Write each primer entry

def convert_mismatches(primer_seq, target_seq):
    # Convert mismatched bases to 'X' in the primer sequence
    converted_seq = ''
    for p_base, t_base in zip(primer_seq, target_seq):
        if p_base != t_base:
            converted_seq += 'X'
        else:
            converted_seq += p_base
    return converted_seq


In [4]:
# Example usage
csv_file = '/content/mutagenic_primer_design/example/codontableXlaevis.csv'#high frequency codons for X_laevis
codon_table = import_codon_table(csv_file)

# Call the import function to start the process and save the cds_sequence
cds_sequence = parse_fasta("/content/mutagenic_primer_design/example/Shaker_FL.fa")# File path to a fasta sequence containing the coding DNA sequence
protein_sequence = translate_dna(cds_sequence)
starting_residue=1 #residue number of the first codon

#select mutations
mutations = parse_mutations("mutations.csv") # File path to a CSV file containing the desired mutations
threshold_tm = 78.0  # Set the threshold temperature here (78 works ok)
max_length=26 # max primer length is 2*max_length+3

# Create a dictionary to store the generated primers
primer_dict = {}

for mutation in mutations:
    position, mutation = mutation
    codonstart, mutatecodon = find_codon_position(cds_sequence, int(position), starting_residue)
    mutant_codon = get_codons(codon_table, mutation, mutatecodon)
    tm = 0.0  # Initialize tm to start the loop
    length = 7  # Initial length value
    while tm < threshold_tm and length <= max_length:
        length += 1  # Increase the length by 1
        primer_sequence = design_primer(cds_sequence, codonstart, mutant_codon, length).upper()
        primer_target= Seq.Seq(cds_sequence[codonstart - length:codonstart+3+length]).upper()
        mismatch_primer=convert_mismatches(primer_sequence,primer_target)
        tm = calculate_tm(mismatch_primer)
        primer_reverse_complement = str(Seq.Seq(primer_sequence).reverse_complement())
    primer_dict['_'.join([position, mutation,'Fw'])] = primer_sequence, tm
    primer_dict['_'.join([position, mutation,'Rv'])]=primer_reverse_complement, tm

# Specify the desired file path to save the primers
csv_file_path = 'primers.csv'

# Call the function to save the primer dictionary as a CSV file
save_primers_to_csv(primer_dict, csv_file_path)
# Download results
from google.colab import files
files.download(csv_file_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>