# Epigenetic feature enrichment signal
create bigwigs showing feature enrichment

this data will be used for metaplots and overall feature distribution plot from pygenometracks

input source for each target was chosen based on similarity between the replicates (see [peak_calling_v2.ipynb](peak_caling_v2.ipynb))

In [2]:
import os

# Process bam file replicates to single bigwig file
step 1 - entriched target vs input

step 2 - replicate 1 vs replicate 2

In [3]:
bam_dir = 'data/chip-seq/bam_files/'
output_dir = 'data/chip-seq/compared_bigwig/'
replicates = ['R1', 'R2']
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

def compare_bam(enriched, input, output) -> None:
    if not os.path.exists(enriched+'.bai'):
        command = 'samtools index ' + enriched
        print(command)
        os.system(command)
    if not os.path.exists(input+'.bai'):
        command = 'samtools index ' + input
        print(command)
        os.system(command) 
    log = output + '.log'
    command = 'bamCompare -b1 {} -b2 {} -o {} -p max/2 --ignoreDuplicates --normalizeUsing RPKM --binSize 20 --smoothLength 60 --centerReads --extendReads 220 --scaleFactorsMethod None > {} 2>&1 || true'.format(enriched, input, output, log)
    print(command)
    os.system(command)

def average_bigwigs(bigwig1, bigwig2, output) -> None:
    #log = output + '.log'
    command = 'bigwigAverage -b {} {} -o {}'.format(bigwig1, bigwig2, output)
    print(command)
    os.system(command)

def process_bams(target, input_type, replicates):
    output_paths = []
    for replicate in replicates:
        enriched = bam_dir + target + replicate + '.bam'
        input = bam_dir + input_type + replicate + '.bam'
        output = output_dir + target + replicate + '_vs_' + input_type + replicate + '.bw'
        output_paths.append(output)
        compare_bam(enriched, input, output)
    average_bigwigs(output_paths[0], output_paths[1], output_dir + target + '_average.bw')


In [9]:
process_bams('CenH3', 'input', replicates)
process_bams('H3K4', 'noaba', replicates)
process_bams('H3K9', 'input', replicates)   



bamCompare -b1 data/chip-seq/bam_files/CenH3R1.bam -b2 data/chip-seq/bam_files/inputR1.bam -o data/chip-seq/compared_bigwig/CenH3R1_vs_inputR1.bw -p max/2 --ignoreDuplicates --normalizeUsing RPKM --binSize 20 --smoothLength 60 --centerReads --extendReads 220 --scaleFactorsMethod None > data/chip-seq/compared_bigwig/CenH3R1_vs_inputR1.bw.log 2>&1 || true


# Enrichment profiles (metaplots)


## compute matrix

```bash
computeMatrix scale-regions -S CenH3_average.bw H3K4_average.bw H3K9_average.bw -R Luzula_sylvatica_genes.bed Lusy1_merged_15k.bed Lusy2_merged_15k.bed LTR-TEs.bed -o epigenetic_marks.matrix.gz  -m 4000 -b 2000 -a 2000 -p 16 --smartLabels
```

## plot profile
```bash
plotProfile -m epigenetic_marks.matrix.gz --startLabel start --endLabel end -out profile.svg --plotFileFormat svg --regionsLabel genes Lusy1 Lusy2 LTR-TEs --samplesLabel CenH3 H3K4me3 H3K9me2 --numPlotsPerRow 1 --colors "#0004f2ff" "#d42afff1" "#008203ff" "#fba604ff"
```
