In [8]:
import os
os.environ['NUMEXPR_MAX_THREADS'] = '4'
os.environ.pop("MPLBACKEND", None)

import sys
from sshicstuff import main as _entrypoint
from Bio import SeqIO
import pandas as pd
from os.path import join

In [9]:
## DESIGN ARGS
DATA_DIR = "../test_data/design-output/"
GENOME = "S288c_DSB"
FASTA = join("../test_data", GENOME, f"{GENOME}.fa")
OUTPUT_SNP = join(DATA_DIR, f"{GENOME}_LY_Capture_N_Annealing_oligos.tsv")
OUTPUT_RAW = join(DATA_DIR, f"{GENOME}_LY_Capture_N_Annealing_oligos_initial.tsv")

ARGS = [
    "-f", f"{FASTA}",
    "--forward-intervals", "chr5:118710-133000",
    "--reverse-intervals", "chr5:100000-118710",
    "--output-snp", f"{OUTPUT_SNP}",
    "--output-raw", f"{OUTPUT_RAW}",
    "--site", "GATC",
    "--secondary-sites", "CAATTG,AATATT",
    "--size", "80",
    "--site-start", "70",
    "--n-5-prime-deletion", "10",
    "--n-3-prime-deletion", "10",
    "--fragment-size", "150",
    "--fasta-line-length", "80",
]

In [10]:
_sys_argv = ["sshicstuff", "design"] + ARGS
sys.argv = _sys_argv
_entrypoint.main()

INFO :: Running backend : oligo4sshic --fasta ../test_data/S288c_DSB/S288c_DSB.fa --forward-intervals chr5:118710-133000 --reverse-intervals chr5:100000-118710 --output-snp ../test_data/design-output/S288c_DSB_LY_Capture_N_Annealing_oligos.tsv --output-raw ../test_data/design-output/S288c_DSB_LY_Capture_N_Annealing_oligos_initial.tsv --site GATC --secondary-sites CAATTG,AATATT --size 80 --site-start 70 --no-snp-zone 5 --complementary-size 7 --snp-number 5 --tries 20


rerverse oligo: 44 / 90


INFO :: Creating the artificial chromosome with the annealing oligo and the enzyme GATC
INFO :: Inserting the artificial chromosome at the end of the original genome .FASTA file
INFO :: Artificial chromosome created and inserted at the end of the genome .FASTA file
INFO :: Artificial chromosome coordinates saved to annealing_oligos_positions.csv
INFO :: Creation of capture oligos from annealing oligos done. 
INFO :: [Design] Capture file saved to ../test_data/design-output/capture_oligos_positions.csv


In [11]:
chr_artificial_path = join(DATA_DIR, f"chr_artificial_ssDNA.fa")
for record in SeqIO.parse(chr_artificial_path, "fasta"):
    print(f"ID: {record.id}")
    print(f"Description: {record.description}")
    print(f"Sequence: {record.seq}")

ID: chr_artificial_ssDNA
Description: chr_artificial_ssDNA	 (4699 bp)
Sequence: NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCACCCTTTCCAATAACAATAAGAATGTTTATATTTTAATCTTGCAAAATAAGCTGTACGACTTTTTTGATCTTCGACNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNATCATTTCAACATTGTCAGGGGTTAAGTTTCCGGTAAACTTCGAACATGGGGACAAAAGACTAACTTGATCCAACTCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN

In [18]:
annealing_oligos_path = join(DATA_DIR, "annealing_oligos_positions.csv")
capture_oligos_path = join(DATA_DIR, "capture_oligos_positions.csv")
df_annealing = pd.read_csv(annealing_oligos_path, sep=",", header=0)
df_capture = pd.read_csv(capture_oligos_path, sep=",", header=0)

In [19]:
df_annealing.head()

Unnamed: 0,chr,start,end,length,chr_ori,start_ori,end_ori,orientation,type,name,sequence_original,sequence_modified
0,chr_artificial_ssDNA,73,446,373,chr5,121385.0,121464.0,w,ss,Probe_chr5_w_121385_121464,CCGCACCCTTTCCAATAACAATCAGAATATTTTTATTTTTATGTTG...,CCGCACCCTTTCCAATAACAATAAGAATGTTTATATTTTAATCTTG...
1,chr_artificial_ssDNA,446,819,373,chr5,121624.0,121703.0,w,ss,Probe_chr5_w_121624_121703,GTAATCATGTCAATATTGTCAGGGGTTAACTTTCCGGTAAACTTCA...,GTAATCATTTCAACATTGTCAGGGGTTAAGTTTCCGGTAAACTTCG...
2,chr_artificial_ssDNA,819,1192,373,chr5,126743.0,126822.0,w,ss,Probe_chr5_w_126743_126822,TATCGTCATATCTGTGCTTTCTGTTATCGTATTGGAAATATTTCCA...,TATCGTCATATCTGTGCTATCTCTCATCGTATTGGAAATCTTTCTA...
3,chr_artificial_ssDNA,1192,1565,373,chr5,130786.0,130865.0,w,ss,Probe_chr5_w_130786_130865,TACTGAAAAATACGTCCGTCAGGTCTCTAGAGAGGTACTGGAACCC...,TACTGAAAAATACGTCCGTCAGGTCTCTAGAGAGGTACTGGAACCC...
4,chr_artificial_ssDNA,1565,1938,373,chr5,132710.0,132789.0,w,ss,Probe_chr5_w_132710_132789,CGTTTTTAGAATATATTGTAATAAAACACAATTGATAATACAGTTC...,CGTTTTGAGAATATATTGTAATAAAACACAATCGATAATACAGTTC...


In [20]:
df_capture.head()

Unnamed: 0,chr,start,end,chr_ori,start_ori,end_ori,orientation,type,name,sequence
0,chr_artificial_ssDNA,73,446,chr5,121385.0,121464.0,w,ss,Probe_chr5_w_121385_121464,TCCAATAACAATAAGAATGTTTATATTTTAATCTTGCAAAATAAGC...
1,chr_artificial_ssDNA,446,819,chr5,121624.0,121703.0,w,ss,Probe_chr5_w_121624_121703,CAACATTGTCAGGGGTTAAGTTTCCGGTAAACTTCGAACATGGGGA...
2,chr_artificial_ssDNA,819,1192,chr5,126743.0,126822.0,w,ss,Probe_chr5_w_126743_126822,TCTGTGCTATCTCTCATCGTATTGGAAATCTTTCTAGCTCGGGTCG...
3,chr_artificial_ssDNA,1192,1565,chr5,130786.0,130865.0,w,ss,Probe_chr5_w_130786_130865,TACGTCCGTCAGGTCTCTAGAGAGGTACTGGAACCCATCTTAAGAT...
4,chr_artificial_ssDNA,1565,1938,chr5,132710.0,132789.0,w,ss,Probe_chr5_w_132710_132789,ATATATTGTAATAAAACACAATCGATAATACAGTTCTCCCTTCGTC...
