### Trackhub for Matt Gemberling

Create unified peakfiles for trackhub

In [4]:
%%bash
module load bedtools2
cd /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/Matt_5756_190620B1-se-with-control
SAMPLES=($(cut -d, -f1 qc.txt | tail -n+2 | cut -d. -f1-2 | uniq))
# 1-$((${#SAMPLES[*]}-1))
sbatch \
    -p new,all \
    --array=1-$((${#SAMPLES[*]}-1)) \
    <<EOF
#!/bin/bash
#SBATCH --output=/data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/logs/merge_peaks.%a.out

SAMPLES=(${SAMPLES[@]})
SAMPLE=\${SAMPLES[\${SLURM_ARRAY_TASK_ID}]}

cat \${SAMPLE}.rep{1,2,3,4,5,6,7,8,9,10,11,12}.masked.dedup.sorted_peaks.narrowPeak \
| awk -vOFS="\t" '\$5>1000{\$5=1000}{print}' \
| sort -k1,1 -k2,2n \
| bedtools merge \
    -i stdin \
> \${SAMPLE}.merged_peaks.bed \
&& echo "\${SAMPLE}.merged_peaks.bed created" \
|| echo "\${SAMPLE}.merged_peaks.bed failed!"

/data/reddylab/software/bin/bedToBigBed \
    \${SAMPLE}.merged_peaks.bed \
    /data/reddylab/Reference_Data/Genomes/mm10/GRCm38.sizes \
    \${SAMPLE}.merged_peaks.bb \
&& echo "\${SAMPLE}.merged_peaks.bb Done" \
|| echo "\${SAMPLE}.merged_peaks.bb Failed!"

EOF

Submitted batch job 9403039


In [7]:
%%bash
mkdir -p /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/trackhub/matt_p300_krab

In [10]:
%%writefile /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/scripts/create_chipseq_tracks.matt_p300_krab.py
#!/usr/bin/env python

from trackhub import Hub, GenomesFile, Genome, TrackDb, Track, ViewTrack, \
    SuperTrack, AggregateTrack, CompositeTrack, SubGroupDefinition
import os
from operator import add
from palettable.cmocean.sequential import *
from palettable.cartocolors.qualitative import *
from palettable.colorbrewer.diverging import BrBG_6, PRGn_6
    
# colors_palettes=[Gray_12,
#                  Matter_12, 
#                  Solar_12, 
#                  Gray_12,
#                  Matter_12, 
#                  Solar_12]
colors_palettes=[
    Gray_6,
    Gray_6,
    Gray_6,
    Ice_12,
    Haline_10,
    Deep_10,
    Ice_12,
    Haline_10,
    Deep_10,

    Gray_6,
    Gray_6,
    Gray_6,
    Solar_6,
    Amp_6_r, 
    Matter_6_r, 
    Solar_6,
    Amp_6_r, 
    Matter_6_r,
    
    Gray_6,
    Bold_5_r,
    Bold_6_r,
]

OUTDIR='/data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/trackhub/matt_p300_krab'
URLBASE = 'http://trackhub.genome.duke.edu/reddylab/'
REMOTE_DATA_DIR='/nfs/trackhub/reddylab/collab/matt_p300_krab/'
LOCAL_DATA_DIR='/data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/Matt_5756_190620B1-se-with-control/'
GENOME = 'mm10'

hub = Hub(
    hub='matt_p300_krab',
    short_label='matt_p300_krab',
    long_label='matt_p300_krab',
    email='alejandro.barrera@duke.edu')

hub.local_fn = '%s/hub.txt' % OUTDIR
genomes_file = GenomesFile()
genome = Genome(GENOME)
trackdb = TrackDb()
trackdb.local_fn = '%s/%s/trackDb.txt' % (OUTDIR, GENOME)

# factors = [
#     'mmLiver_KRAB.Input',
#     'mmLiver_KRAB.flag',
#     'mmLiver_KRAB.K9me3',
#     'mmLiver_p300.input',
#     'mmLiver_p300.flag',
#     'mmLiver_p300.K27ac'
# ]

factors = [
    'mmLiver_KRAB.Input.PBS',
    'mmLiver_KRAB.Input.targeted',
    'mmLiver_KRAB.Input.scram',
    'mmLiver_KRAB.flag.PBS',
    'mmLiver_KRAB.flag.targeted',
    'mmLiver_KRAB.flag.scram',
    'mmLiver_KRAB.K9me3.PBS',
    'mmLiver_KRAB.K9me3.targeted',
    'mmLiver_KRAB.K9me3.scram',

    'mmLiver_p300.input.PBS',
    'mmLiver_p300.input.scram',
    'mmLiver_p300.input.targeted',
    'mmLiver_p300.flag.PBS',
    'mmLiver_p300.flag.scram',
    'mmLiver_p300.flag.targeted',
    'mmLiver_p300.K27ac.PBS',
    'mmLiver_p300.K27ac.scram',
    'mmLiver_p300.K27ac.targeted',
    
    'mmTh0_p300.K27ac.CREpos.Foxp3_ntgRNA',
    'mmTh0_p300.K27ac.CREneg.Foxp3_g5',
    'mmTh0_p300.K27ac.CREpos.Foxp3_g5',
]


replicates = 12
replicates = [
    [1,2,4,12],
    [3,5,6,7],
    [8,9,10,11],
    [1,2,4,12],
    [3,5,6,7],
    [8,9,10,11],
    [1,2,4,12],
    [3,5,6,7],
    [8,9,10,11],

    [4,10,11,12],
    [1,2,3,5],
    [6,7,8,9],
    [4,10,11,12],
    [1,2,3,5],
    [6,7,8,9],
    [4,10,11,12],
    [1,2,3,5],
    [6,7,8,9],
    
    [1,2,3],
    [1,2,3],
    [1,2,3]
]
tracks = []
supertrack = SuperTrack(
    visibility='full',
    name="matt_p300_krab",
    short_label="matt_p300_krab",
    long_label="matt_p300_krab")


for fi, factor in enumerate(factors):
    aggregate_track = AggregateTrack(
        name="agg%s" % (factor.replace('.', '_')),
        short_label="%02d_%s" % (fi, factor),
        long_label="%02d_%s" % (fi, factor),
        tracktype='bigWig',
        showSubtrackColorOnUi='on',
        visibility='full',
        viewLimits='0:2000',
        autoScale='off',
        maxHeightPixels='100:32:8',
        alwaysZero='on',
        aggregate='transparentOverlay',
        subgroups={'factor': factor}
    )
#     for rep in xrange(1, replicates+1):
    for rep_ix, rep in enumerate(replicates[fi]):
        if 'mmLiver_KRAB.flag.scram' == factor and rep == 8: continue 
        if 'input' in factor.lower() or 'Th0' in factor:
            sample = "%s.rep%d.masked.dedup.sorted.rpkm.bw" % (factor, rep)
        else:
            sample = "%s.rep%d.masked.dedup.sorted.rpkm.ctrl_subtracted.bw" % (factor, rep)
        sample_name = "%02d_%s_rep%02d" % (fi, factor.replace('.', '_'), rep)
        tr = Track(
                name=sample_name,
                short_label=sample_name,
                long_label=sample_name,
                local_fn=os.path.join(LOCAL_DATA_DIR, sample),
                remote_fn=os.path.join(REMOTE_DATA_DIR, sample),
                url=os.path.join(URLBASE, 
                                 'collab',
                                 'matt_p300_krab', 
                                 GENOME,
                                 'data',
                                 sample),
                tracktype='bigWig',
                color=','.join([str(cc) for cc in colors_palettes[fi].colors[rep_ix if 'Th0' not in factor else 0]]),
                visibility='full',
                maxHeightPixels='100:50:8',
            )
        aggregate_track.add_subtrack(tr)
        
#         # Add peaks in bigBed format
#         sample = "%s.rep%d.masked.dedup.sorted_peaks.trunked_scores.narrowPeak.bb" % (factor, rep)
#         bigbed_peaks = Track(
#             name="%s_peaks" % sample_name,
#             short_label="%s_peaks" % sample_name,
#             long_label="%s_peaks" % sample_name,
#             url=os.path.join(URLBASE, 
#                              'collab',
#                              'matt_p300_krab', 
#                              GENOME,
#                              'data',
#                              sample),
#             tracktype='bigBed 6 .',
#             visibility='dense',
#             color='0,0,128'
#         )
#         supertrack.add_track(bigbed_peaks)

#         aggregate_peaks_track.add_subtrack(bigbed_peaks)
#     sample = "%s.merged_peaks.bb" % (factor)
#     sample_name = "%02d_%s" % (fi, factor.replace('.', '_'))
#     bigbed_merged_peaks = Track(
#         name="%s_merged_peaks" % sample_name,
#         short_label="%s_merged_peaks" % sample_name,
#         long_label="%s_merged_peaks" % sample_name,
#         url=os.path.join(URLBASE, 
#                          'collab',
#                          'matt_p300_krab', 
#                          GENOME,
#                          'data',
#                          sample),
#         tracktype='bigBed 3 .',
#         visibility='dense',
#         color='0,0,128'
#     )
#     supertrack.add_track(bigbed_merged_peaks)

    supertrack.add_track(aggregate_track)
#     supertrack.add_track(aggregate_peaks_track)

print supertrack
trackdb.add_tracks(supertrack)

genome.add_trackdb(trackdb)
genomes_file.add_genome(genome)
hub.add_genomes_file(genomes_file)

hub.render()

Overwriting /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/scripts/create_chipseq_tracks.matt_p300_krab.py


In [11]:
%%bash
source /data/reddylab/software/miniconda2/bin/activate alex
sbatch -o /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/logs/create_chipseq_tracks.out \
    -pnew,all \
    --wrap="python /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/scripts/create_chipseq_tracks.matt_p300_krab.py"

Submitted batch job 23761983


In [2]:
%%bash
mkdir -p /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/trackhub/matt_p300_krab/mm10/data
cd /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/trackhub/matt_p300_krab/mm10/data
# ln -s /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/Matt_5756_190620B1-se-with-control/*.masked.dedup.sorted.rpkm.bw ./
# ln -s /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/Matt_5756_190620B1-se-with-control/*.merged_peaks.bb ./
# ln -s /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/Matt_5756_190620B1-se-with-control/*masked.dedup.sorted_peaks.trunked_scores.narrowPeak.bb ./
# ln -s /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/Matt_5756_190620B1-se-with-control/*.masked.dedup.sorted.rpkm.ctrl_subtracted.bw ./
# ln -s /data/reddylab/Keith/collab/200924_Gemberling/processing/chip_seq/Siklenka_6621_201109A5-pe/KS157.CREneg.g5*rpkm.bw ./
# ln -s /data/reddylab/Keith/collab/200924_Gemberling/processing/chip_seq/Siklenka_6683_201201A5-pe/KS162_CrePos*rpkm.bw ./
# rename KS162_CrePos_NTC_ mmTh0_p300.K27ac.CREpos.Foxp3_ntgRNA. KS162_CrePos_NTC_*
# rename KS162_CrePos_g5_ mmTh0_p300.K27ac.CREpos.Foxp3_g5. KS162_CrePos_g5_*
# rename KS157.CREneg.g5.K27ac mmTh0_p300.K27ac.CREneg.Foxp3_g5 KS157.CREneg.g5.K27ac*


In [12]:
%%bash
ssh hardac-xfer.genome.duke.edu
cd /data/reddylab/Alex/collab/20190701_Matt/processing/chip_seq/trackhub/matt_p300_krab
rsync -rvz --copy-links -e ssh --update \
    * \
    trackhub.genome.duke.edu:/nfs/trackhub/reddylab/collab/matt_p300_krab


sending incremental file list
hub.txt
matt_p300_krab.genomes.txt
mm10/trackDb.txt

sent 1051 bytes  received 431 bytes  988.00 bytes/sec
total size is 2517054098  speedup is 1698417.07


Pseudo-terminal will not be allocated because stdin is not a terminal.


http://genome.ucsc.edu/cgi-bin/hgTracks?db=mm10&hubUrl=http://trackhub.genome.duke.edu/reddylab/collab/matt_p300_krab/hub.txt