In [None]:
import primer3
import pandas as pd
from IPython.display import display
pd.set_option('display.max_columns', None)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('max_colwidth', 200)
import os
from functools import partial
!ls

In [3]:
ls ../somVar/AMLMono7/Validation/primerselect_edit.csv

../somVar/AMLMono7/Validation/primerselect_edit.csv


In [4]:
test = pd.read_csv('../somVar/AMLMono7/Validation/primerselect_edit.csv', sep='\t')
no_primer = test.query('InsertRange != InsertRange')
no_primer.to_csv('doit.csv', sep='\t', index=False)

  interactivity=interactivity, compiler=compiler, result=result)


### Load mutation list

In [None]:
PCR_config = {
    'seq_len': 500,
    'mut_pad': 25,
    'prod_size_min': 100,
    'prod_size_max': 200
}

Primer3_config = {
        'PRIMER_OPT_SIZE': 20,
        'PRIMER_PICK_INTERNAL_OLIGO': 0,
        'PRIMER_INTERNAL_MAX_SELF_END': 8,
        'PRIMER_MIN_SIZE': 18,
        'PRIMER_MAX_SIZE': 25,
        'PRIMER_OPT_TM': 60.0,
        'PRIMER_MIN_TM': 55.0,
        'PRIMER_MAX_TM': 65.0,
        'PRIMER_MIN_GC': 20.0,
        'PRIMER_MAX_GC': 80.0,
        'PRIMER_MAX_POLY_X': 100,
        'PRIMER_INTERNAL_MAX_POLY_X': 100,
        'PRIMER_SALT_MONOVALENT': 50.0,
        'PRIMER_DNA_CONC': 50.0,
        'PRIMER_MAX_NS_ACCEPTED': 0,
        'PRIMER_MAX_SELF_ANY': 12,
        'PRIMER_MAX_SELF_END': 8,
        'PRIMER_PAIR_MAX_COMPL_ANY': 12,
        'PRIMER_PAIR_MAX_COMPL_END': 8,
    }

def file2str(file):
    '''
    returns a string from a text file
    '''
    
    with open(file, 'r') as file:
        return file.read().upper().replace('\n', '')
    
def get_primer_df(chrom_seq, config, row):
    '''
    returns the best primer pair for a given position
    return value is [fwd_seq, fwd_tmp, rev_seq, rev_tmp, prod_size]
    active chromosome sequence is global variable chrom
    '''
    
    # load sequence
    pos = row['Pos']
    half_seq = int(config['seq_len']/ 2)
    seq_start = pos - half_seq
    seq_end = pos + half_seq
    seq = chrom_seq[seq_start:seq_end]
    pad = int(config['mut_pad'] / 2)
    half_size = int(config['prod_size_min'] / 2)
    
    # calculate the target_range as offSet from center (half)
    offSet = half_size - 20 - pad
    target_start = half_seq - offSet
    target = [target_start, offSet * 2]
    # print(target)
    setting = {
        'SEQUENCE_ID': 'asdf',
        'SEQUENCE_TEMPLATE': seq,
        'SEQUENCE_TARGET': target
    }
    primers = primer3.bindings.designPrimers(setting, config)
    # print(primers)
    
    # return '--' if nothing was found
    if primers['PRIMER_PAIR_NUM_RETURNED'] == 0:
        row['fwd_seq'] = row['fwd_tmp'] = row['rev_seq'] = row['rev_tmp'] = row['prod_size'] = '--'
        return row
   
    prod_len = primers['PRIMER_RIGHT_0'][0] - primers['PRIMER_LEFT_0'][0]
    row['fwd_seq'] = primers['PRIMER_LEFT_0_SEQUENCE']
    row['fwd_tmp'] = primers['PRIMER_LEFT_0_TM']
    row['rev_seq'] = primers['PRIMER_RIGHT_0_SEQUENCE']
    row['rev_tmp'] = primers['PRIMER_RIGHT_0_TM']
    row['prod_size'] = prod_len
    row['mut_location'] = half_seq - primers['PRIMER_LEFT_0'][0]
    
    return row


def make_primer_list(mut_file, pcr_config=PCR_config, primer3_config=Primer3_config, chrom_column='Chr', pos_column='Pos', keep_cols='all'):
    
    
    # apply pcr size to primer3_config
    primer3_config['PRIMER_PRODUCT_SIZE_RANGE'] = [pcr_config['prod_size_min'],pcr_config['prod_size_max']]
    primer3_config.update(pcr_config)
    # get the mutation list with file-type switch
    if os.path.splitext(mut_file)[1] in ['.xlsx', '.xls']:
        mut_list = pd.read_excel(mut_file, header=0)
    else:
        mut_list = pd.read_csv(mut_file, sep='\t')
        
    # replace column names for chrom and pos
    mut_list = mut_list.rename(columns={chrom_column: 'Chr', pos_column: 'Pos'})
    mut_list['Chr'] = mut_list['Chr'].astype('str')
    org_cols = list(mut_list.columns)
    df_list = []
    # cycle through (formatted) chromosomes
    # + load chromosome sequence
    # + create primer_df for mutations on that chromosome
    # + concat all mutations
    for chrom in mut_list['Chr'].unique():
        chrom_seq = file2str(f'/Users/mahtin/Dropbox/Icke/Work/static/genome/gatk/hg38/split/{chrom}.fa')
        chr_df = mut_list.query('Chr == @chrom')
        primer_df = chr_df.apply(partial(get_primer_df, chrom_seq, primer3_config), axis=1)
        df_list.append(primer_df)
    primer_df = pd.concat(df_list, sort=True)
    if keep_cols == 'all':
        clinscore_index = list(mut_list.columns).index('Clin_score') + 1
        primer_df = primer_df[org_cols[:clinscore_index] + ['fwd_seq', 'fwd_tmp', 'rev_seq', 'rev_tmp', 'prod_size', 'mut_location'] + org_cols[clinscore_index:]]
    else:
        primer_df = primer_df[org_cols[:keep_cols] + ['fwd_seq', 'fwd_tmp', 'rev_seq', 'rev_tmp', 'prod_size', 'mut_location']]
        
    primer_df.to_csv(f"{os.path.splitext(mut_file)[0]}-primers.csv")
    return primer_df

In [None]:
PCR_config = {
    'seq_len': 500,
    'mut_pad': 5,
    'prod_size_min': 140,
    'prod_size_max': 210
}

Primer3_config = {
        'PRIMER_OPT_SIZE': 20,
        'PRIMER_PICK_INTERNAL_OLIGO': 0,
        'PRIMER_INTERNAL_MAX_SELF_END': 8,
        'PRIMER_MIN_SIZE': 16,
        'PRIMER_MAX_SIZE': 25,
        'PRIMER_OPT_TM': 60.0,
        'PRIMER_MIN_TM': 55.0,
        'PRIMER_MAX_TM': 65.0,
        'PRIMER_MIN_GC': 20.0,
        'PRIMER_MAX_GC': 80.0,
        'PRIMER_MAX_POLY_X': 100,
        'PRIMER_INTERNAL_MAX_POLY_X': 100,
        'PRIMER_SALT_MONOVALENT': 50.0,
        'PRIMER_DNA_CONC': 50.0,
        'PRIMER_MAX_NS_ACCEPTED': 0,
        'PRIMER_MAX_SELF_ANY': 12,
        'PRIMER_MAX_SELF_END': 8,
        'PRIMER_PAIR_MAX_COMPL_ANY': 12,
        'PRIMER_PAIR_MAX_COMPL_END': 8,
    }


primer_list = make_primer_list('doit.csv', pcr_config=PCR_config, primer3_config=Primer3_config, pos_column='Start', keep_cols='all')
primer_list

In [None]:
test = ['A', 'B', 'C']

In [None]:
test.index('C')