In [2]:
## This scripts validates that all designed primers and guides are 
## a part of the original plasmid sequence. 


import numpy as np
import pandas as pd
import re
import glob
import os
from snapgene_reader import snapgene_file_to_dict
from natsort import natsorted
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord



def Validate_primers_guides(plasmid_dna, Primer_file):
    
    try:
        primer_df = pd.read_csv(Primer_file)
    except:
        primer_df = pd.DataFrame()
        temp_df = pd.DataFrame()
        print('\n File not found:   ', Primer_file, '\n')
    
    plasmid_dna = plasmid_dna.upper()
    plasmid_dna = plasmid_dna+plasmid_dna
    plasmid_dna = Seq(plasmid_dna)
    
    temp_df = pd.DataFrame(columns = ['Name', 'Seqeunce', 'Search_Status'])
    list1 = []
    
    for index, row in primer_df.iterrows():
        Found = "Found"
        Name = str(primer_df['Name'].loc[index])
        Sequence = str(primer_df['Sequence'].loc[index])
        
        Sequence = Sequence.upper()
        Sequence = Seq(Sequence)
        
        find_primer = plasmid_dna.find(Sequence)
        
        if find_primer == -1:
            Sequence_revC = Sequence.reverse_complement()
            find_primer = plasmid_dna.find(Sequence_revC)
            
        if find_primer == -1:
            Found = "Not Found"
            list1 = [Name, Sequence, Found]
            a_series = pd.Series(list1, index = temp_df.columns)
            temp_df = temp_df.append(a_series, ignore_index=True)        
                    
    return temp_df

    
def read_DNA_Primer_Guide_files():

    Not_Found_Primers = pd.DataFrame()
    DNA_files = natsorted( glob.glob("*.dna"))
    
    for files in DNA_files:

            snap_file = files  
            Primer_file = 'IDT_'+snap_file[:-4]+'.csv'
            print('\t Filename:  ',snap_file, '\t \t',Primer_file)

            dictionary = snapgene_file_to_dict(snap_file)
            Plasmid_seq = str(dictionary['seq'])

            temp_df = Validate_primers_guides(Plasmid_seq, Primer_file)
            Not_Found_Primers = Not_Found_Primers.append(temp_df)
            
    if Not_Found_Primers.empty:
        pass
    else:
        Not_Found_Primers.to_csv('NotFound.csv', index = False)
    
    Len_df = len(Not_Found_Primers['Search_Status'])
    if Len_df > 0:
        print('\n {} primers/guides could not be located in the original DNA files'.format(Len_df))
    else:
        print('\n All primers/guides found in original DNA files')
        
        
        
if __name__ == '__main__':
    
    print(os.getcwd(), '\n')
    read_DNA_Primer_Guide_files()
  
    

C:\Users\Nilmani\Desktop\Validations\Mammalian_validations 

	 Filename:   1MZ.dna 	 	 IDT_1MZ.csv
	 Filename:   2MZ.dna 	 	 IDT_2MZ.csv
	 Filename:   3MZ.dna 	 	 IDT_3MZ.csv
	 Filename:   4MZ.dna 	 	 IDT_4MZ.csv
	 Filename:   5MZ.dna 	 	 IDT_5MZ.csv
	 Filename:   6MZ.dna 	 	 IDT_6MZ.csv
	 Filename:   7MZ.dna 	 	 IDT_7MZ.csv
	 Filename:   8MZ.dna 	 	 IDT_8MZ.csv
	 Filename:   9MZ.dna 	 	 IDT_9MZ.csv
	 Filename:   10MZ.dna 	 	 IDT_10MZ.csv

 All primers/guides found in original DNA files
