# Setup cLoops for annotating loops in HiC data processed with Juicer or HiCPro

notebook by Frank Grenn  

juicer pipeline by Aiden lab:   
[juicer github](https://github.com/aidenlab/juicer)  
HiCPro by Nicolas Servant  
    [HiCPro github](https://github.com/nservant/HiC-Pro)  
cLoops pipeline by YaqiangCao:  
[cLoops github](https://github.com/YaqiangCao/cLoops)



In [1]:
USERDIR="/somepath/$USER" #directory containing conda directory which will hold the custom python environment

## (1) Create environment for cLoops

In [9]:
print("cd {}".format(USERDIR))

cd /data/$USER


In [2]:
print("source {}/conda/etc/profile.d/conda.sh".format(USERDIR))
print("conda activate base")
print("which python")
print("conda update conda")
print("conda clean --all --yes")

source /somepath/$USER/conda/etc/profile.d/conda.sh
conda activate base
which python
conda update conda
conda clean --all --yes


In [8]:
print("cd temp")
print("git clone https://github.com/YaqiangCao/cLoops")
print("cd cLoops")
print("conda env create -n cLoops --file cLoops_env.yaml")

cd temp
git clone https://github.com/YaqiangCao/cLoops
cd cLoops
conda create -n cLoops --file cLoops_env.yaml


---
### troubleshooting:
if this fails for the ```joblib``` package then go into cLoops_env.yaml, delete the joblib line and rerun the ```conda env create -n cLoops --file cLoops_env.yaml``` command above

then run:

In [9]:
print("conda activate cLoops")
print("which pip")
print("pip install joblib")

conda activate cLoops
which pip
pip install joblib


that should fix it (although joblib is already on biowulf...)

---

In [1]:
print("conda activate cLoops")
print("python setup.py install")


conda activate cLoops
python setup.py install


---
### troubleshooting:
if this fails you may need to set the PYTHONPATH environmental variable and add --prefix to the install command
    

In [3]:
print("export PYTHONPATH={}/conda/envs/cLoops/lib/python2.7/site-packages/".format(USERDIR))
print("python setup.py install --prefix={}/conda/envs/cLoops".format(USERDIR))

export PYTHONPATH=/somepath/$USER/conda/envs/cLoops/lib/python2.7/site-packages/
python setup.py install --prefix=/somepath/$USER/conda/envs/cLoops


---

## (2) Write bash job script to convert the output from Juicer (a) or HiC-Pro (b) to a bedpe file for cLoops



### (a) Juicer  
uses processed HiC data from the juicer pipeline in [long format](https://github.com/aidenlab/juicer/wiki/Pre#file-format). Usually named "merged_nodups.txt" 

In [4]:
SAMPLES_DIR="/path/to/juicer/sample/output"
SAMPLE="35236"
INPUT_FILE="{}/{}/aligned/merged_nodups.txt".format(SAMPLES_DIR,SAMPLE)
BEDPE_FILE="{}/cLoops/{}/{}_PET.bedpe".format(SAMPLES_DIR,SAMPLE,SAMPLE) #bedpe file to create

In [None]:
%%bash -s "$SAMPLES_DIR" "$SAMPLE"
SAMPLES_DIR=${1}
SAMPLE=${2}
mkdir ${SAMPLES_DIR}/cLoops
mkdir ${SAMPLES_DIR}/cLoops/${SAMPLE}

In [83]:
with open ("{}/cLoops/{}/juicerLong2bedpe_{}.sh".format(SAMPLES_DIR,SAMPLE,SAMPLE), "w") as text_file:
    print("#!/bin/bash \n\
source {}/conda/etc/profile.d/conda.sh \n\
module load python \n\
conda activate cLoops \n\
export PYTHONPATH={}/conda/envs/cLoops/lib/python2.7/site-packages/ \n\
{}/temp/cLoops/scripts/juicerLong2bedpe.py -i {} -o {} \n\
echo 'done'".format(USERDIR, USERDIR, USERDIR, INPUT_FILE, BEDPE_FILE), file = text_file)
    text_file.close()

In [5]:
print("sbatch --mem=200g --cpus-per-task=10 --mail-type=ALL --time=24:00:00 {}/cLoops/{}/juicerLong2bedpe_{}.sh".format(SAMPLES_DIR,SAMPLE,SAMPLE))

sbatch --mem=200g --cpus-per-task=10 --mail-type=ALL --time=24:00:00 /path/to/juicer/sample/output/cLoops/35236/juicerLong2bedpe_35236.sh


### (b) HiC-Pro  
uses processed HiC data from the HiCPro pipeline saved as a .allValidPairs file

In [6]:
SAMPLES_DIR="/path/to/HiCPro/sample/output"
SAMPLE="35236"
INPUT_FILE="{}/{}/hic_results/data/{}/{}.allValidPairs".format(SAMPLES_DIR,SAMPLE,SAMPLE,SAMPLE)
BEDPE_FILE="{}/cLoops/{}/{}.allValidPairs.bedpe.gz".format(SAMPLES_DIR,SAMPLE,SAMPLE) #bedpe file to create


In [None]:
%%bash -s "$SAMPLES_DIR" "$SAMPLE"
SAMPLES_DIR=${1}
SAMPLE=${2}
mkdir ${SAMPLES_DIR}/cLoops
mkdir ${SAMPLES_DIR}/cLoops/${SAMPLE}

In [None]:
with open ("{}/cLoops/{}/HiCProOutput2bedpe_{}.sh".format(SAMPLES_DIR,SAMPLE,SAMPLE), "w") as text_file:
    print("#!/bin/bash \n\
source {}/conda/etc/profile.d/conda.sh \n\
module load python \n\
conda activate cLoops \n\
export PYTHONPATH={}/conda/envs/cLoops/lib/python2.7/site-packages/ \n\
hicpropairs2bedpe -o {}/cLoops/{} {} \n\
echo 'done'".format(USERDIR,USERDIR,SAMPLES_DIR,SAMPLE, INPUT_FILE), file = text_file)
    text_file.close()

In [7]:
print("sbatch --mem=200g --cpus-per-task=10 --mail-type=ALL --time=24:00:00 {}/cLoops/{}/HiCProOutput2bedpe_{}.sh".format(SAMPLES_DIR,SAMPLE,SAMPLE))

sbatch --mem=200g --cpus-per-task=10 --mail-type=ALL --time=24:00:00 /path/to/HiCPro/sample/output/cLoops/35236/HiCProOutput2bedpe_35236.sh


## (3) Write bash job script to run cLoops

In [85]:
with open ("{}/cLoops/{}/run_cLoops_{}.sh".format(SAMPLES_DIR,SAMPLE,SAMPLE), "w") as text_file:
    print("#!/bin/bash \n\
source {}/conda/etc/profile.d/conda.sh \n\
module load python \n\
conda activate cLoops \n\
export PYTHONPATH={}/conda/envs/cLoops/lib/python2.7/site-packages/ \n\
cd {}/cLoops/{} \n\
cLoops -f {} -o {} -w -j -s -m 3 -plot -p -1 \n\
echo 'done'".format(USERDIR, USERDIR, SAMPLES_DIR,SAMPLE,BEDPE_FILE, SAMPLE), file = text_file)
    text_file.close()

In [8]:
print("sbatch --mem=800g --partition=largemem --cpus-per-task=10 --mail-type=ALL --time=10-0 {}/cLoops/{}/run_cLoops_{}.sh".format(SAMPLES_DIR,SAMPLE,SAMPLE))

sbatch --mem=800g --partition=largemem --cpus-per-task=10 --mail-type=ALL --time=10-0 /path/to/HiCPro/sample/output/cLoops/35236/run_cLoops_35236.sh
