# Basic Setup to Run Juicer on Biowulf
- **Author** - Frank Grenn
- **Date Started** - October 2019
- **Quick Description:** basic setup to run juicer pipeline on biowulf. 

In [1]:
import os
import subprocess

In [2]:
#these paths and files need to already exist
FASTQDIR = "/data/CARD/HICtemp/temp_fastqs" #location of the combined lane fastqs
WRKDIR="/data/CARD/HICtemp/juicer" #folder that will contain a folder for each sample's output
REF_GENOME_FASTA="/data/LNG/Frank/juicer_runs/references/Homo_sapiens_assembly38.fasta" #pulled down from gs://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta
MAIN_JUICER_SCRIPT="/data/LNG/Frank/HiC_Project/Juicer/new/juicer.sh" #modified version of the juicer script
CHROM_SIZES="/data/LNG/Frank/juicer_runs/mygenome.chrom.sizes"
RESTRICTION_ENZYME_FILE="/data/LNG/Frank/juicer_runs/hg38_MboI.txt"


Setup directories per sample

In [4]:
sample_directories = [ name for name in os.listdir(FASTQDIR) if os.path.isdir(os.path.join(FASTQDIR, name)) and "HICS" in name]
print(len(sample_directories))
print(sample_directories)

20
['HICS_CS25i_d0_S9', 'HICS_PPMI54144_3109_da0_v1_S7', 'HICS_PPMI3966_2813_da65_v1_S3', 'HICS_PPMI3453_7504_da0_v1_S5', 'HICS_PPMI50184_7165_da65_v1_S4', 'HICS_PPMI41471_4917_da0_v1_S8', 'HICS_PPMI54144_3109_da65_v1_S1', 'HICS_PPMI57670_3787_da65_v1_S1', 'HICS_PPMI56954_9190_da65_v1_S6', 'HICS_PPMI57670_3787_da0_v1_S2', 'HICS_PPMI51971_9029_da65_v1_S8', 'HICS_PPMI3966_2813_da0_v1_S2', 'HICS_CS25i_FBn_d25_S6', 'HICS_PPMI51971_9029_da0_v1_S7', 'HICS_PPMI3452_7426_da65_v1_S10', 'HICS_PPMI56954_9190_da0_v1_S5', 'HICS_PPMI3666_3014_da65_v1_S4', 'HICS_PPMI3452_7426_da0_v1_S10', 'HICS_PPMI3666_3014_da0_v1_S9', 'HICS_PPMI50184_7165_da0_v1_S3']


In [5]:
for sample in sample_directories:
    !mkdir {WRKDIR}/{sample}
    !mkdir {WRKDIR}/{sample}/fastq
    !ln -s {FASTQDIR}/{sample}/{sample}_R1_001.fastq.gz {WRKDIR}/{sample}/fastq
    !ln -s {FASTQDIR}/{sample}/{sample}_R2_001.fastq.gz {WRKDIR}/{sample}/fastq

run this on biowulf after loading the juicer module:

In [3]:
print(f"{MAIN_JUICER_SCRIPT} -z {REF_GENOME_FASTA} -p {CHROM_SIZES} -y {RESTRICTION_ENZYME_FILE}")

/data/LNG/Frank/HiC_Project/Juicer/new/juicer.sh -z /data/LNG/Frank/juicer_runs/references/Homo_sapiens_assembly38.fasta -p /data/LNG/Frank/juicer_runs/mygenome.chrom.sizes -y /data/LNG/Frank/juicer_runs/hg38_MboI.txt


# TESTING! DONT USE

In [37]:
!module load juicer

[+] Loading juicer  1.5.6 


In [6]:
sample_directories = sample_directories[0:1]
sample_directories

['HICS_CS25i_d0_S9']

In [7]:
for sample in sample_directories:
    wdir = WRKDIR+'/'+sample
    p = subprocess.Popen(["/data/LNG/Frank/juicer_runs/custom_juicer_scripts/custom_week_rm_chrM_run/juicer.sh","-z","/data/LNG/Frank/juicer_runs/references/Homo_sapiens_assembly38.fasta", "-p", "/data/LNG/Frank/juicer_runs/mygenome.chrom.sizes", "-y", "/data/LNG/Frank/juicer_runs/hg38_MboI.txt", "-S", "early"], cwd=wdir)

In [17]:
!module load juicer

[+] Loading juicer  1.5.6 


In [34]:
wdir = "/data/CARD/HICtemp/regenBAM/HICS_CS25i_d0_S9"
cmd = "/data/LNG/Frank/juicer_runs/custom_juicer_scripts/custom_week_rm_chrM_run/juicer.sh -z /data/LNG/Frank/juicer_runs/references/Homo_sapiens_assembly38.fasta -p /data/LNG/Frank/juicer_runs/mygenome.chrom.sizes -y /data/LNG/Frank/juicer_runs/hg38_MboI.txt -S early"

In [36]:
p = subprocess.Popen(["/data/LNG/Frank/juicer_runs/custom_juicer_scripts/custom_week_rm_chrM_run/juicer.sh","-z","/data/LNG/Frank/juicer_runs/references/Homo_sapiens_assembly38.fasta", "-p", "/data/LNG/Frank/juicer_runs/mygenome.chrom.sizes", "-y", "/data/LNG/Frank/juicer_runs/hg38_MboI.txt", "-S", "early"], cwd=wdir)

In [35]:
        !(module load juicer; \
          cd {wdir}; \
     /data/LNG/Frank/juicer_runs/custom_juicer_scripts/custom_week_rm_chrM_run/juicer.sh -z /data/LNG/Frank/juicer_runs/references/Homo_sapiens_assembly38.fasta -p /data/LNG/Frank/juicer_runs/mygenome.chrom.sizes -y /data/LNG/Frank/juicer_runs/hg38_MboI.txt -S early)

[+] Loading juicer  1.5.6 
Running juicer version 1.5.6
(-: Looking for fastq files...fastq files exist:
lrwxrwxrwx 1 grennfp CARD 80 Apr  2 13:24 /data/CARD/HICtemp/regenBAM/HICS_CS25i_d0_S9/fastq/HICS_CS25i_d0_S9_R1_001.fastq.gz -> /data/CARD/HICtemp/temp_fastqs/HICS_CS25i_d0_S9/HICS_CS25i_d0_S9_R1_001.fastq.gz
lrwxrwxrwx 1 grennfp CARD 80 Apr  2 13:24 /data/CARD/HICtemp/regenBAM/HICS_CS25i_d0_S9/fastq/HICS_CS25i_d0_S9_R2_001.fastq.gz -> /data/CARD/HICtemp/temp_fastqs/HICS_CS25i_d0_S9/HICS_CS25i_d0_S9_R2_001.fastq.gz
(-: Aligning files matching /data/CARD/HICtemp/regenBAM/HICS_CS25i_d0_S9/fastq/*_R*.fastq*
 in queue norm to genome hg38 with site file /data/LNG/Frank/juicer_runs/hg38_MboI.txt
(-: Created /data/CARD/HICtemp/regenBAM/HICS_CS25i_d0_S9/splits and /data/CARD/HICtemp/regenBAM/HICS_CS25i_d0_S9/aligned.
    Submitted job 54348249 which will wait for fastq file splitjobs to finish. START: Thu Apr  2 13:27:15 EDT 2020
^C
