# Run test file

### Load a mutation file
+ input is df with columns 'Chr', 'Start', 'End', 'Ref', 'Alt' with optional id columns (everything left of Chr)
+ output is id columns + 'Chr', 'Start', 'End', 'Ref', 'Alt' + primer cols

In [None]:
test_df = pd.read_excel('../testdata/mutations.xlsx', engine="openpyxl", sheet_name="mutations")
# test_df = pd.read_csv('../testdata/mutations.csv', sep='\t')
test_df[:5]

### import run_primer3 and set the split-genome folder
+ download for hg38 [here (from my Dropbox)](https://www.dropbox.com/sh/swvbzgp8uslmh9p/AADZofWT1kf6NJSQS-dxye44a?dl=0) and set the path to split_genome_path as chroms_folder argument to function
+ PCR_config can be adjusted to specific requirements (see below) but defaults are tested to work in most cases
+ primer3_config has many parameters (for details see [here](https://htmlpreview.github.io/?https://github.com/libnano/primer3-py/master/primer3/src/libprimer3/primer3_manual.htm))

In [None]:
# some sensible settings for better output
from IPython.display import display
pd.set_option('display.max_columns', None)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('max_colwidth', 200)

# get the code from my package
import sys
sys.path.append('../code')
from script_utils import show_output
from p3_run import run_primer3
from primer_DB import check_primerDB

In [None]:
# set the targeted product size and play around with the configs

PCR_config = {
    'seq_len': 500,
    'center_offSet': 5,
    'prod_size_min': 120,
    'prod_size_max': 220
}

primer3_config = {
        'PRIMER_OPT_SIZE': 20,
        'PRIMER_MIN_SIZE': 18,
        'PRIMER_MAX_SIZE': 25,
        'PRIMER_OPT_TM': 60.0,
        'PRIMER_MIN_TM': 55.0,
        'PRIMER_MAX_TM': 65.0,
        'PRIMER_MIN_GC': 20.0,
        'PRIMER_MAX_GC': 80.0,
        'PRIMER_PICK_INTERNAL_OLIGO': 0,
        'PRIMER_INTERNAL_MAX_SELF_END': 8,
        'PRIMER_MAX_POLY_X': 100,
        'PRIMER_INTERNAL_MAX_POLY_X': 100,
        'PRIMER_SALT_MONOVALENT': 50.0,
        'PRIMER_DNA_CONC': 50.0,
        'PRIMER_MAX_NS_ACCEPTED': 0,
        'PRIMER_THERMODYNAMIC_OLIGO_ALIGNMENT': 1,  # set 1 to actually use the thermodynamic calculations
        'PRIMER_MAX_SELF_ANY': 12,
        'PRIMER_MAX_SELF_END': 8,
        'PRIMER_WT_SELF_END':1,   # use Primer_max_self_end
        'PRIMER_MAX_SELF_END_TH': 30,
        'PRIMER_WT_SELF_END_TH':1, # Primer_max_self_end_th
        'PRIMER_PAIR_MAX_COMPL_ANY': 12,
        'PRIMER_PAIR_MAX_COMPL_END': 8,
        'PRIMER_PAIR_MAX_COMPL_ANY': 8,
        'PRIMER_PAIR_WT_COMPL_ANY':2,
        'PRIMER_PAIR_MAX_COMPL_ANY_TH': 30,
        'PRIMER_PAIR_WT_COMPL_ANY_TH':2,
        'PRIMER_MAX_HAIRPIN_TH': 47,
        'PRIMER_WT_HAIRPIN_TH':1
    }

## run primer3

In [None]:
genome_split_folder = "/Users/martinszyska/Dropbox/Icke/Work/static/genome/gatk/hg38/split"

test_primers = run_primer3(
    test_df,
    chroms_folder=genome_split_folder, # path to split chrome folder
    pcr_config=PCR_config,
    primer3_config=primer3_config,
    threads=8         # multicore processing computes different chromosomes in parallel
)


In [None]:
test_primers[:5]

## check primers against primer "database"

In [None]:
primers_with_hits_df, results_df = check_primerDB(
    test_primers,               # mutation_df (can also be the output from run_primer3 for chaining the operations: check_primerDB(run_primer3(...), ))
    primer_DB="../testdata/test_primer_DB.xlsx",  # the excel_file containing the existing mutations (fields )
    padding=25  # padding defines minimum distance of mutation from primer 3-prime sites
)

In [None]:
primers_with_hits_df.query("DBhits > 0")

In [None]:
results_df

### write result to excel file

In [None]:
with pd.ExcelWriter("../testdata/primer_results.xlsx", mode="w", engine="openpyxl") as writer:
    test_df.to_excel(writer, sheet_name="mutations", index=False)
    primers_with_hits_df.to_excel(writer, sheet_name="primers", index=False)
    results_df.to_excel(writer, sheet_name="DBhits", index=False)