# Run Sequence 


`IDT_Ordering_Module`
- Removes duplicate oligos and creates IDT ordering format. <br>

`Module 1` 
- Re-formats the IDT information sheet.  <br>

`Module 2` 
- Add water to primers/guides, create picklists for Fluent.
- Create picklist for transfer to 384well Echo source plate for PCR setup.

`Module 3` 
- Picklist for guide mixing and phosphorylation. <br>

`Module 4`
- Picklist for primer and template mixing in Echo.
- There is an extension module for module 4 meant to be used for extension PCR. 

`Module 5`
- Equimolar mixing of PCR fragments.

Each module is dependent on successful run of the previous module. 

# Module 4: PCR

#### Need to run the `Module 3` before this.

- For deatiled instructions on this module, please go to the end of this script


In [2]:
import os, sys
import traceback
import pandas as pd
import re
import math
import numpy as np
from string import ascii_uppercase
import glob
from natsort import natsorted
import time

def write_to_subfolder(filename, dataframe_object, subdirectory):
    
    curr_dir = os.getcwd()
    #subdirectory = 'PCR_files'
    new_dir = os.path.join(curr_dir,subdirectory)
    counter = 0
    
    if curr_dir.find(subdirectory) == -1:
        
        try:
            os.mkdir(subdirectory)
        except Exception:
            pass
    
        os.chdir(new_dir)
        print("\t File written:-   ", filename)
        dataframe_object.to_csv(filename, index=False)
        counter = 1
        os.chdir('..')
        
    else:
        os.chdir(new_dir)
        print("\t File written:-   ", filename)
        dataframe_object.to_csv(filename, index=False)
        os.chdir('..')


def Write_384ECHO_pos(df_primer, df_IDT_primers):
    
    # Write the position in 384-well plate of primers
    # for seeting up PCR and ECHO picklist generation
    
    Position_384well_list = []
    
    for index, row in df_primer.iterrows():
        Name = df_primer['Name'].loc[index]
        isPrimer = ((re.search("[F][P]", Name) or re.search("[R][P]", Name)))     

        Sequence = df_primer['Sequence'].loc[index]
        Match_seq = df_IDT_primers[df_IDT_primers['Sequence'].str.match(Sequence)]
        
        if isPrimer and Match_seq.empty:
            print('Primer not found in IDT order', Name, Sequence)
            Position_384well_list.append('')
            continue

        elif isPrimer:
            counter2 = 0
            for index2, row2 in Match_seq.iterrows():
                if counter2 == 0:
                    well_pos_384  = Match_seq['384_well_position'].loc[index2]
                    Seq2 = Match_seq['Sequence'].loc[index2]
                    
                    if Sequence == Seq2:
                        Position_384well_list.append(well_pos_384)
                        counter2 = counter2 + 1
        
        else:
            Position_384well_list.append('')

    df_primer['384well_pos_ECHO'] = Position_384well_list
    
    return df_primer
    

def Read_Primer_Position_file():
    
    file_list = glob.glob("**/*Pos_Primers_Guides_neb_*csv")
    file_list = natsorted(file_list)
    IDT_primers = glob.glob("**/*IDT_Primers_*.csv")
    
    if len(file_list) < 1 or len(IDT_primers) < 1:
        print('Verify original files exists in subfolder!!!')
        quit()
        
    df_IDT_primers = pd.read_csv(IDT_primers[0])
    print(' Number of primers received from IDT:- ', len(df_IDT_primers))
    
    df_temp = pd.DataFrame()
    df_pick = pd.DataFrame()
    
    for filename in file_list:
        df_plasmid = pd.read_csv(filename)
        df_primer = Write_384ECHO_pos(df_plasmid, df_IDT_primers)
        
        write_to_subfolder(filename.split('/')[1], df_primer, 'Position_files')
        
        df_temp = Create_PCR_Picklist(df_primer)
        df_pick = df_pick.append(df_temp)
        
    filename = 'All_PCRs_ECHO_setup.csv'
    print('\n Combined list of all PCR')
    write_to_subfolder(filename, df_pick, 'PCR_files')
    #read_PCR_file_with_template()
    Add_96well_dest(df_pick)

def read_PCR_file_with_template():
    #As of now this file had been getting created manually, needs an automated solution
    file_name = glob.glob('**/*All_PCRs_ECHO_with_template*.csv')
    file_name = file_name[0]
    
    df_PCRs = pd.read_csv(file_name)
    #Add_96well_dest(df_PCRs)    
    

def Create_PCR_Picklist(df_primers):
    
    #df_primers =pd.read_csv(primer_file)
    column_names = ["Source Well", "Transfer Volume", "Destination Well", "Primer Name", "Ta", "PCR size"]
    column_names.append("Template_Name")
    df_picklist = pd.DataFrame(columns = column_names)
    
    length = len(df_primers)

    list2= [] # For Fwd primer
    list3 = [] # For reverse primer

    # use this to break the loop, as for the last index, there won't anything after it
    file_length = (len(df_primers) - 1)
        
    counter = int(length/2)
    Vol = 2500
    
    for index, row in df_primers.iterrows():
        
        if index == file_length:
            break

        # The rev primer should come after Fwd primer
        Fwd_primer = df_primers['Name'].loc[index]
        Rev_primer = df_primers['Name'].loc[index+1]
        
        # This requires that the file has fwd primer and then the rev primer
        isFwd = ((re.search("[F][P]", Fwd_primer))) 
        isRev = ((re.search("[R][P]", Rev_primer))) 

        if not isFwd:
            continue

        # To check if both primers belong to same PCR fragment 
        Fragment_fwd = re.findall('[F][P]_(\d{1,2})', Fwd_primer)
        Fragment_rev = re.findall('[R][P]_(\d{1,2})', Rev_primer)
        
        if isFwd and isRev and (Fragment_fwd == Fragment_rev):

            s_well_1 = df_primers['384well_pos_ECHO'].loc[index]
            s_well_2 = df_primers['384well_pos_ECHO'].loc[index+1]
            
            Ta_F = df_primers['Ta'].loc[index]
            Ta_R = ''
            Temp_len_f = df_primers['Template Length'].loc[index]
            Temp_len_r = ''
            Temp_name_F = df_primers['Template_Name'].loc[index]
            Temp_name_R = ''

            d_well = '' # will add the destination well using another function

            list2 = [s_well_1, Vol, d_well, Fwd_primer, Ta_F, Temp_len_f, Temp_name_F ]
            list3 = [s_well_2, Vol, d_well, Rev_primer, Ta_R, Temp_len_r, Temp_name_R]

            a_series = pd.Series(list2, index = df_picklist.columns)
            b_series = pd.Series(list3, index = df_picklist.columns)
            df_picklist = df_picklist.append(a_series, ignore_index=True)
            df_picklist = df_picklist.append(b_series, ignore_index=True)

        else:
            continue
        
    return df_picklist


def Find_next_same_PCRs(df_PCR_copy, start_ind):
    
    number_same_PCR = 1
    flag = True
    
    while(flag  == True) and len(df_PCR_copy) > start_ind + 1:
        Curr_primer = df_PCR_copy["Primer Name"].loc[start_ind]
        Next_primer = df_PCR_copy["Primer Name"].loc[start_ind+1]
        Curr_name = re.findall('^\d{1,2}[A-Za-z]{1,}', Curr_primer)
        if not Curr_name:
            Curr_name = re.findall('^[A-Za-z]{1,}\d{1,2}', Curr_primer)
            
        Next_name = re.findall('^\d{1,2}[A-Za-z]{1,}', Next_primer)
        if not Next_name:
            Next_name = re.findall('^[A-Za-z]{1,}\d{1,2}', Next_primer)

        
        if Curr_name == Next_name:
            start_ind = start_ind + 1
            number_same_PCR = number_same_PCR + 1
            continue
        else:
            flag = False 
                
    return number_same_PCR
    
    
def Wellplate96_PCR():
    
    # we will keep wells A1, A2 and H11, H12 empty
    # For running on FA, to add ladder or some other standrd
    # In each 96well plate, we will only set 92 PCRs
    Wellplate96 = []
    Skip_wells = ['A1', 'A2', 'H11', 'H12']

    for letter in ascii_uppercase:
        for j in range(1,13):            
            dest = letter+str(j)
            if dest in Skip_wells:
                continue
            Wellplate96.append(dest)
        if letter == 'H':
            break
            
    return Wellplate96

    
def Add_96well_dest(df_PCR):

    Wellplate96 = Wellplate96_PCR()
            
    Plate_counter = 1   
    Well_index_96well = 0
    Number_of_wells = 92
    start_ind = 0
    initial_cond = 183

    df_PCR = df_PCR.reset_index(drop = True)
    column_list = []
    for column in df_PCR:
        column_list.append(column)
    df_temp = pd.DataFrame(columns = column_list)
    Dest_well_list = []
    
    even_counter = 0
    for index, row in df_PCR.iterrows():

        if start_ind == 0:
            number_same_PCR = Find_next_same_PCRs(df_PCR, start_ind)
            start_ind = start_ind + number_same_PCR
            Indices_required = int(index + number_same_PCR)

        elif start_ind == index:
            if len(df_PCR) > start_ind: 
                number_same_PCR = Find_next_same_PCRs(df_PCR, start_ind)
            start_ind = start_ind + number_same_PCR 
            Indices_required = int(index + number_same_PCR)
        
        #check if all of the next ones can come to same 96well plate 
        if Indices_required > initial_cond:
            initial_cond = initial_cond + 183

            df_temp.loc[:,'Destination Well'] = Dest_well_list
            print('\n Number of PCRs in this plate ----> ', int(len(df_temp)/2))
            filename = 'ECHO_PCR_setup_plate_'+str(Plate_counter)+'.csv'
            add_template(df_temp, filename)
            master_mix_picklist_ECHO(df_temp, Plate_counter, filename)  

            Dest_well_list = []
            df_temp = df_temp.iloc[0:0]
            Plate_counter = Plate_counter + 1
            Well_index_96well = 0
        
        df_temp = df_temp.append(df_PCR.loc[index], ignore_index = True)
        even_counter = even_counter + 1
        
        if even_counter % 2 != 0:
            dest_well = Wellplate96[Well_index_96well]
            Well_index_96well = Well_index_96well + 1

            for i in range(2):
                Dest_well_list.append(dest_well)
                
        if len(df_PCR) == (index + 1):
            df_temp.loc[:,'Destination Well'] = Dest_well_list
            print('\n Number of PCRs in this plate ----> ', int(len(df_temp)/2))
            filename = 'ECHO_PCR_setup_plate_'+str(Plate_counter)+'.csv'  
            add_template(df_temp, filename)
            master_mix_picklist_ECHO(df_temp, Plate_counter, filename) 
            

    print('\n Number of 96well plates required for PCR ----->  ', Plate_counter)

    
def fluent_picklist():
        
    column_names = ["Source plate", "S-Well position", "Destination plate", "D-Well position", "Volume", "Primer"]
    df_picklist = pd.DataFrame(columns = column_names)
    
    return df_picklist

    
def Fluent_96well_format():
    
    Wellplate96 = []
    counter = 0
    
    for j in range(1,13):
        for letter in ascii_uppercase:
            counter = counter + 1
            dest = str(counter) + ' ' +letter+str(j)
            Wellplate96.append(dest)
            
            if letter == 'H':
                break
            
    return Wellplate96


def Find_well_number(d_well, Wellplate96):
    
    for item in Wellplate96:
        well_n = item.split(' ')    

        if d_well == well_n[1]:
            well_number = well_n[0]    
            
    return well_number 
    
    
def Source_Wells(total_PCRs, counter):
    
    # Each well <1.5ml master mix 
    # Distribute 47ul master mix to maximum 36 wells from one well 
    Source_well_n = int(math.ceil((total_PCRs / 35)))  # Number of wells with master mix 
    
    
    Master_mix_volume = []
    well_list = []
    S_well_list = []
    
    for i in range(Source_well_n):
        S_well_list.append(i+1)
        
        if (i+1) < Source_well_n:
            Master_mix_volume.append(1500)
            well_list.append(i+1)
            
        else:
            volume = 1500 * ((total_PCRs % 35)/35)
            volume = math.ceil(volume / 10)  * 10
            Master_mix_volume.append(volume)
            well_list.append(i+1)
    
    
    return S_well_list    
    

def master_mix_picklist_ECHO(df_96pcr_plate,  counter, filename):
    
    df_temp = fluent_picklist()
    df_temp2 = fluent_picklist()
    Wellplate96 = Fluent_96well_format()

    list1 = []
    volume = 45
    source_plate   = 'Master_mix[001]'
    
    total_PCRs = len(df_96pcr_plate)/2
    S_well = Source_Wells(total_PCRs, counter)
        
    Dest_plate = "PCR_reaction[001]"
    
    Skip_counter = 0
    
    for index, row in df_96pcr_plate.iterrows():
                 
        Skip_counter = Skip_counter + 1
        if Skip_counter % 2 == 0:
            continue
            
        #S_well_index = 'A1' #math.floor(index / 70)
        #S_well_number = S_well[S_well_index]
        S_well_number = 'A1'
        
        d_well = df_96pcr_plate['Destination Well'].loc[index]
        Primer =  df_96pcr_plate['Primer Name'].loc[index]
        
        #D_well_number = Find_well_number(d_well, Wellplate96)
        D_well_number = d_well
        list1 = [source_plate, S_well_number, Dest_plate, D_well_number, volume, Primer]
         
            
        a_series = pd.Series(list1, index = df_temp.columns)
        df_temp = df_temp.append(a_series, ignore_index=True)
       
    #filename = 'Picklist_PCR_master_mix_on_Fluent_'+str(counter)+'.csv'
    filename = 'Picklist_FLUENT_mastermix_'+filename[15:]
    write_to_subfolder(filename, df_temp, 'PCR_files')

def add_template(df_PCR, filename):
    
    try:
        File_template = glob.glob('**/*Original_Templates_384well*.csv')
        df_template = pd.read_csv(File_template[0])    
    except:
        print('\n \t ERROR: Need a csv file for template positions \n')
    
    #print(df_template.to_string())
    
    column_list = []
    for column in df_PCR:
        column_list.append(column)
        
    df_temp = pd.DataFrame(columns = column_list)
    List2 = []
    Volume = 500  #nl of Template to add

    for index, row in df_PCR.iterrows():            
            
        df_temp = df_temp.append(df_PCR.loc[index], ignore_index = True)
        
        if index == 0:
            continue
        
        template = str(df_PCR['Template_Name'].loc[index-1])
        
        if template == '':
            continue
                
        match = df_template[df_template['Template_Name'].str.match(template)]
        if match.empty:
            print('Template not found -->  ', template)
            continue
            
        Template_found = False
        for index_2, row_2 in match.iterrows():
            Template_database = match['Template_Name'].loc[index_2]
            
            if Template_database == template:
                Template_source_Well = match['384_Well'].loc[index_2]
                Template_found = True
                
        if Template_found == True:
            Dest = df_PCR['Destination Well'].loc[index-1]
            List2 = [ Template_source_Well, Volume, Dest, '', '', '','']
            a_series = pd.Series(List2, index = df_temp.columns)
            df_temp = df_temp.append(a_series, ignore_index=True)

    filename = 'Picklist_'+ filename
    write_to_subfolder(filename, df_temp, 'PCR_files')
        
    
if __name__ == '__main__':
    
    start_time = time.time()
    
    print('Wroking Directory: \t', os.getcwd(), '\n')
    Read_Primer_Position_file()
    
    print("\n --- %s seconds ---" % (time.time() - start_time))

    

Wroking Directory: 	 /Users/nilmani/Desktop/Python/High_GC_Picklist 

 Number of primers received from IDT:-  59
	 File written:-    Pos_Primers_Guides_neb_1GC.csv
	 File written:-    Pos_Primers_Guides_neb_2GC.csv
	 File written:-    Pos_Primers_Guides_neb_3GC.csv
	 File written:-    Pos_Primers_Guides_neb_4GC.csv
	 File written:-    Pos_Primers_Guides_neb_5GC.csv

 Combined list of all PCR
	 File written:-    All_PCRs_ECHO_setup.csv

 Number of PCRs in this plate ---->  33
	 File written:-    Picklist_ECHO_PCR_setup_plate_1.csv
	 File written:-    Picklist_FLUENT_mastermix_plate_1.csv

 Number of 96well plates required for PCR ----->   1

 --- 0.3586108684539795 seconds ---


In [6]:
import glob
import pandas as pd
import os

print(os.getcwd())

File_template = glob.glob('**/*All_PCRs_ECHO_setup*')
print(File_template)

for filename in File_template:
    df_template = pd.read_csv(filename)
    v = df_template['Template_Name'].value_counts()
    print(filename, v)

/Users/nilmani/Desktop/Python/High_GC_Picklist
['PCR_files/All_PCRs_ECHO_setup.csv']
PCR_files/All_PCRs_ECHO_setup.csv Template_2    28
Template_1     5
Name: Template_Name, dtype: int64


### INSTRUCTIONS!!!

`Add 384-well position to all Pos_ files for primers`

Use the `384_Well_positions` added to `IDT_Primers_` file (this was done in module 2), and add the position to original primer/guide files for each position. Re-write the files with `Pos_` in folder `Position_files`


- Mark position of primers on original file `Pos_IDT_` files.
- Later rewrite the `Pos_IDT_` to contain the positions in 384-well plate.

The position of primers in the 384-well plate will used for setting up PCR. It has been written to original files as an additional column `384well_pos_ECHO`.

PCR Master mix will need to be in a separate plate. <br>
The primers and the templates will be on one single Echo source plate. <br>
The primers and templates can be re-used. <br>
The csv file for position of template will have to be supplied by the user.


#### IMPORTANT: Extension PCR
-   For PCRs requiring extesnion PCR, Run the Module_4_sub_extension before proceeding ahead
