# Run Sequence 


`IDT_Ordering_Module`
- Removes duplicate oligos and creates IDT ordering format. <br>

`Module 1` 
- Re-formats the IDT information sheet.  <br>

`Module 2` 
- Add water to primers/guides, create picklists for Fluent.
- Create picklist for transfer to 384well Echo source plate for PCR setup.

`Module 3` 
- Picklist for guide mixing and phosphorylation. <br>

`Module 4`
- Picklist for primer and template mixing in Echo.
- There is an extension module for module 4 meant to be used for extension PCR. 

`Module 5`
- Equimolar mixing of PCR fragments.

Each module is dependent on successful run of the previous module. 

# Module 1
## Re-format the IDT file

######################################################<br><br>
**Format the IDT CSV file** <br>

This script will remove the unneccessary columns from the IDT file. <br>
- Remove spaces from DNA seqeunce e.g. ACG GTC TGC --> ACGGTCTGC. 
- Change A01 to A1 i.e. remove `0` from `A01` to make it `A1`.
- Write a new IDT file that starts with `edited_IDT_`<br>
- Split into multiple depending upon how many plate values it contains. 
- - number of plates = number of A1 in the well column

#####################################################

In [5]:
import os, sys
import traceback
import pandas as pd
import re
import math
import glob
import numpy as np
from string import ascii_uppercase


def Read_Original_IDT_file ():
            
    filename = glob.glob("Original_IDT_*.csv")
    
    if len(filename) > 1:
        print('\n \t Please combine all Original_IDT_ csv files into one file')
    
    elif not filename:
            print('\n \t Original IDT file not found')
    else: 
        print('\n IDT input file:-  ', filename, '\n')
        Rewrite_IDT_file(filename[0])  
                
                
def write_to_subfolder(filename, dataframe_object):
    
    curr_dir = os.getcwd()
    subdirectory = 'Generated_Files'
    new_dir = os.path.join(curr_dir,subdirectory)
    counter = 0
    
    if curr_dir.find(subdirectory) == -1:
        
        try:
            os.mkdir(subdirectory)
        except Exception:
            pass
    
        os.chdir(new_dir)
        print("File written :-   ", filename)
        dataframe_object.to_csv(filename, index=False)
        counter = 1
        os.chdir('..')
        
    else:
        os.chdir(new_dir)
        print("File written :-   ", filename)
        dataframe_object.to_csv(filename, index=False)
        os.chdir('..')
          
        
def Rewrite_IDT_file (filename):
    
    df_all = pd.read_csv(filename)
    keep = ['Sequence Name', 'Sequence', 'nmoles', 'Well Position']

    for column in df_all:
        if column in keep:
            continue
        else:
            df_all  = df_all.drop(column, axis=1)
            
    df_temp = pd.DataFrame()
    df_temp = df_all
    
    for index, row in df_all.iterrows():
        Sequence1 = str(df_all['Sequence'].loc[index])
        #print(Sequence1)
        Sequence = Sequence1.strip()
        Sequence = Sequence.replace(" ", "")
        df_temp['Sequence'].replace({Sequence1:Sequence}, inplace = True)
          
        Seq_name = df_all['Sequence Name'].loc[index]
        Seq_name1 = Seq_name.replace("NS_", "")
        df_temp['Sequence Name'].replace({Seq_name:Seq_name1}, inplace = True)
               
        Dest1 =  df_all['Well Position'].loc[index]
        
        try:
            temp_2 = Dest1.index('0')
            if temp_2 == 1:
                Dest = Dest1.replace('0','')
            else:
                Dest = Dest1                
        except:
            Dest = Dest1
            
        df_temp['Well Position'].replace({Dest1:Dest}, inplace = True)         
                       
        
    new_file = 'all_edited'+filename[8:]
    write_to_subfolder(new_file, df_temp)
    split_IDT_File (df_temp, filename)

    
def split_IDT_File (dataframe_object, filename_1):
    
    df_all = dataframe_object
    # This one macthes all that contain A1, even A12 and so on
    Match = df_all[df_all['Well Position'].str.match('A1')]
    # This does not match A1 specifically
    Match_2 = df_all[df_all['Well Position'].str.match('A1\d{1}')] 
        
    counter = 0
    
    length  = len(Match) - len(Match_2)
    print('\n This file contains information of {} plates. \n'.format(length))
    
    if length > 1:
    
        for index, row in Match.iterrows():
            
            pos = Match['Well Position'].loc[index]
            
            if pos != 'A1':
                continue
            
            if counter  == 0:
                start_ind  = index 
                counter = counter + 1
                continue    
                
            elif counter == 1:
                end_ind = index
            else:
                start_ind = end_ind
                end_ind = index
                    
            counter = counter + 1

            df_temp = pd.DataFrame()
            df_temp = df_all[start_ind:end_ind]
            df_temp = df_temp.reset_index(drop = True)
            Seq_name = df_temp['Sequence Name'].loc[1]
            Type = Filename_primers_guides(Seq_name)
                
            filename_2 = Type + str(counter) + '_' + filename_1[13:-4] + '.csv'
            print('File :- ',filename_2, "\t Number of items: \t", len(df_temp), '\n')
            write_to_subfolder(filename_2, df_temp)
            
        start_ind = end_ind
        df_temp = pd.DataFrame()
        df_temp = df_all[start_ind:]
        df_temp = df_temp.reset_index(drop = True)
        
        Seq_name = df_temp['Sequence Name'].loc[1]
        Type = Filename_primers_guides(Seq_name)

        filename_2 = Type + str(counter) + '_' + filename_1[13:-4] + '.csv'
        print('File :- ', filename_2, "\t Number of items: \t", len(df_temp), '\n')
        write_to_subfolder(filename_2, df_temp)
     
                    
    else:
        print('The file contains information about only one plate')
        
        
def Filename_primers_guides(Name):
        
    isGuide = re.search("[G][0-9]\S[j][0-9]", Name)
    isPrimer = ((re.search("[F][P]", Name) or re.search("[R][P]", Name))) 
    
    if isGuide:
        Type = 'IDT_Guides_'
    elif isPrimer:
        Type = 'IDT_Primers_'
        
    return Type


    
if __name__ == '__main__':
    
    print('Working in:- \t', os.getcwd(), '\n')
    Read_Original_IDT_file()
    

Working in:- 	 /Users/nilmani/Desktop/Python/High_GC_Picklist 


 IDT input file:-   ['Original_IDT_highGC_Oct21.csv'] 

File written :-    all_edited_IDT_highGC_Oct21.csv

 This file contains information of 2 plates. 

File :-  IDT_Primers_2_highGC_Oct21.csv 	 Number of items: 	 59 

File written :-    IDT_Primers_2_highGC_Oct21.csv
File :-  IDT_Guides_2_highGC_Oct21.csv 	 Number of items: 	 58 

File written :-    IDT_Guides_2_highGC_Oct21.csv


## Module 1 has been completed.

### Manual Task
Rename the correct output files in following format: <br>
`IDT_Primers_` <br>
`IDT_Guides_`<br>


**Proceed to Module 2**