In [1]:
import os
os.environ['NUMEXPR_MAX_THREADS'] = '4'
os.environ.pop("MPLBACKEND", None)

import sys
from sshicstuff import main as _entrypoint
from Bio import SeqIO
import pandas as pd
from os.path import join

In [2]:
# === INPUTS ===
DATA_DIR = "../test_data/"
GENOME_NAME = "S288c_DSB"
FASTA = join(DATA_DIR, "inputs", "S288c_DSB.fa")

# === OUTPUT DIRECTORY & PREFIX ===
OUTDIR = join(DATA_DIR, "design-outputs")

# === OUTPUT FILENAMES ===
O4S_OUTPUT_RAW  = "output_o4s_raw.fa"
O4S_OUTPUT_SNP  = "output_o4s_snp.fa"
ANNEALING_CSV   = "annealing_oligo_positions.csv"
CAPTURE_CSV     = "capture_oligo_positions.csv"
CHR_ARTIFICIAL  = "chr_artificial_ssdna.fa"

# === DESIGN COMMAND ARGUMENTS ===
ARGS = [
    "-f", f"{FASTA}",
    "--forward-intervals", "chr5:118710-133000",
    "--reverse-intervals", "chr5:100000-118710",
    "--site", "GATC",
    "--secondary-sites", "CAATTG,AATATT",
    "--size", "80",
    "--site-start", "70",
    "--n-5-prime-deletion", "10",
    "--n-3-prime-deletion", "10",
    "--fragment-size", "150",
    "--fasta-line-length", "80",
    "--outdir", f"{OUTDIR}",
    "--o4s-output-raw", f"{O4S_OUTPUT_RAW}",
    "--o4s-output-snp", f"{O4S_OUTPUT_SNP}",
    "--annealing-csv", f"{ANNEALING_CSV}",
    "--capture-csv", f"{CAPTURE_CSV}",
    "--chr-artificial", f"{CHR_ARTIFICIAL}",
]

In [3]:
_sys_argv = ["sshicstuff", "design"] + ARGS
sys.argv = _sys_argv
_entrypoint.main()

INFO :: [Design] Running backend: oligo4sshic --fasta ../test_data/inputs/S288c_DSB.fa --forward-intervals chr5:118710-133000 --reverse-intervals chr5:100000-118710 --site GATC --secondary-sites CAATTG,AATATT --size 80 --site-start 70 --no-snp-zone 5 --complementary-size 7 --snp-number 5 --tries 20 --output-raw /home/adminico/Documents/projects-src/sshicstuff/test_data/design-outputs/output_o4s_raw.fa --output-snp /home/adminico/Documents/projects-src/sshicstuff/test_data/design-outputs/output_o4s_snp.fa


rerverse oligo: 44 / 90


INFO :: [Design] Creating the artificial chromosome with the annealing oligo and the enzyme GATC
INFO :: [Design] Artificial chromosome coordinates saved to chr_artificial_ssdna.fa
INFO :: [Design] Inserting the artificial chromosome at the end of the original genome .FASTA file
INFO :: [Design] Creation of capture oligos from annealing oligos done. 
INFO :: [Design] Capture file saved to /home/adminico/Documents/projects-src/sshicstuff/test_data/design-outputs/capture_oligo_positions.csv


In [4]:
for record in SeqIO.parse(join(OUTDIR, CHR_ARTIFICIAL), "fasta"):
    print(f"ID: {record.id}")
    print(f"Description: {record.description}")
    print(f"Sequence: {record.seq}")

ID: chr_artificial_ssDNA
Description: chr_artificial_ssDNA	 (4699 bp)
Sequence: NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNCACCCTTTCCAATAGCAATCAGAACATTTGTATTTTTTTGTTCCAAAATAAGCTGTACGACTTTTTTGATCTTCGACNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNATCACGTCCATATTGTCAGGGGATAACTTTCCGGTAAACTTCAGAAATGGGGACAAAACACTAACTTGATCCAACTCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN

In [7]:
annealing_oligos_path = join(OUTDIR, "annealing_oligo_positions.csv")
capture_oligos_path = join(OUTDIR, "capture_oligo_positions.csv")
df_annealing = pd.read_csv(annealing_oligos_path, sep=",", header=0)
df_capture = pd.read_csv(capture_oligos_path, sep=",", header=0)

In [8]:
df_annealing.head()

Unnamed: 0,chr,start,end,length,chr_ori,start_ori,end_ori,orientation,type,name,sequence_original,sequence_modified
0,chr_artificial_ssDNA,73,446,373,chr5,121385.0,121464.0,w,ss,Probe_chr5_w_121385_121464,CCGCACCCTTTCCAATAACAATCAGAATATTTTTATTTTTATGTTG...,CCGCACCCTTTCCAATAGCAATCAGAACATTTGTATTTTTTTGTTC...
1,chr_artificial_ssDNA,446,819,373,chr5,121624.0,121703.0,w,ss,Probe_chr5_w_121624_121703,GTAATCATGTCAATATTGTCAGGGGTTAACTTTCCGGTAAACTTCA...,GTAATCACGTCCATATTGTCAGGGGATAACTTTCCGGTAAACTTCA...
2,chr_artificial_ssDNA,819,1192,373,chr5,126743.0,126822.0,w,ss,Probe_chr5_w_126743_126822,TATCGTCATATCTGTGCTTTCTGTTATCGTATTGGAAATATTTCCA...,TATCGTCATATCTGTGCTTTCAGCTATCGTATTGGAAATATGTCTA...
3,chr_artificial_ssDNA,1192,1565,373,chr5,130786.0,130865.0,w,ss,Probe_chr5_w_130786_130865,TACTGAAAAATACGTCCGTCAGGTCTCTAGAGAGGTACTGGAACCC...,TACTGAAAAATAGGTCCGTCAGGTCTCTAGAGAGGTACTGGAACCC...
4,chr_artificial_ssDNA,1565,1938,373,chr5,132710.0,132789.0,w,ss,Probe_chr5_w_132710_132789,CGTTTTTAGAATATATTGTAATAAAACACAATTGATAATACAGTTC...,CGTTTTTAGAATATATTGTAATATGCCAGAATTGATAATACAGTAC...


In [9]:
df_capture.head()

Unnamed: 0,chr,start,end,chr_ori,start_ori,end_ori,orientation,type,name,sequence
0,chr_artificial_ssDNA,73,446,chr5,121385.0,121464.0,w,ss,Probe_chr5_w_121385_121464,TCCAATAGCAATCAGAACATTTGTATTTTTTTGTTCCAAAATAAGC...
1,chr_artificial_ssDNA,446,819,chr5,121624.0,121703.0,w,ss,Probe_chr5_w_121624_121703,CCATATTGTCAGGGGATAACTTTCCGGTAAACTTCAGAAATGGGGA...
2,chr_artificial_ssDNA,819,1192,chr5,126743.0,126822.0,w,ss,Probe_chr5_w_126743_126822,TCTGTGCTTTCAGCTATCGTATTGGAAATATGTCTAACTCGGGTCG...
3,chr_artificial_ssDNA,1192,1565,chr5,130786.0,130865.0,w,ss,Probe_chr5_w_130786_130865,TAGGTCCGTCAGGTCTCTAGAGAGGTACTGGAACCCATCTAAATAC...
4,chr_artificial_ssDNA,1565,1938,chr5,132710.0,132789.0,w,ss,Probe_chr5_w_132710_132789,ATATATTGTAATATGCCAGAATTGATAATACAGTACTCTCTTCGTC...
