# 20 vs 20 signal processing

### There is 2 ways to process signal
* Based on exact tags count using fragment size
    UNIQUE_BAM -> PILEUP_BED -> TAGS -> intersect with given regions bed and compute intersection
* Based on bigwigs
    UNIQUE_BAM -> BIGWIG -> bigWigAverageOverBed

### Interesting LOCI
/mnt/stripe/bio/raw-data/aging/loci_of_interest/

# Prepare BigWigs
```
mkdir /mnt/stripe/bio/experiments/signal
cd /mnt/stripe/bio/experiments/signal
mkdir H3K27ac
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*k27ac*_unique.bw H3K27ac
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*input*_unique.bw H3K27ac
mkdir H3K27me3
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*k27me3*_unique.bw H3K27me3
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*input*_unique.bw H3K27me3
mkdir H3K36me3
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*k36me3*_unique.bw H3K36me3
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*input*_unique.bw H3K36me3
mkdir H3K4me1
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*k4me1*_unique.bw H3K4me1
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*input*_unique.bw H3K4me1
mkdir H3K4me3
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*k4me3*_unique.bw H3K4me3
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*input*_unique.bw H3K4me3
mkdir meth
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*meth*.bw meth/
# Remove outliers
rm meth/*od5* meth/*od17* meth/*yd9*
mkdir rnaseq
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*transcription* rnaseq
mkdir mirna
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser_tracks/*mirna* mirna
```

# Process signals and build PCA/graphics/diffbind scores
```
export PYTHONPATH="/mnt/stripe/washu:$PYTHONPATH"
DIR=/mnt/stripe/bio/experiments/signal
cd $DIR
for M in $(find . -maxdepth 1  -type d | grep '/' | sed 's#./##g'); do 
    echo "Processing $DIR/$M"; 
    for F in $(find /mnt/stripe/bio/raw-data/aging/loci_of_interest/ -name "*.bed"); do 
        echo "Processing regions $F"; 
        N=${F%%.bed}; 
        N=${N##*/}; 
        echo $N; 
        if [ ! -d $DIR/$M/$N ]; then
            bash /mnt/stripe/washu/parallel/signals_bw.sh $DIR/$M $F $N /mnt/stripe/bio/genomes/hg19/hg19.chrom.sizes;
        fi;
    done;
done | tee log.txt
```


### Create summary fit error table
```
T=$'\t'; 
for F in $(find . -name "*_fit_error.csv"); do 
    N=$(echo $F | sed 's#\./##g'); 
    M=${N%%/*}; 
    R=${N##*/}; 
    echo $M; echo $R; 
    L=$(cat $F | tr ',' '\t'); 
    echo "$M$T$R$T$L" >> result.tsv.tmp; 
done
echo "modification${T}file${T}e${T}e_scaled${T}e_log${T}e_scaled_log${T}e_min" > result.tsv
cat result.tsv.tmp | awk -v OFS='\t' '{min=$3; for(j=4;j<=6;j++){min=($j<min)?$j:min}; print($1,$2,$3,$4,$5,$6,min)}' >> result.tsv
# Cleanup
rm result.tsv.tmp
```

# Signals over weak consensus to mark outliers

## Copy all the unique.bw
```
mkdir /mnt/stripe/bio/experiments/signal_outliers
cd /mnt/stripe/bio/experiments/signal_outliers
mkdir h3k27ac
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser/*k27ac*unique*.bw h3k27ac/
mkdir h3k27me3
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser/*k27me3*unique*.bw h3k27me/
mkdir h3k36me3
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser/*k36me3*unique*.bw h3k36me3/
mkdir h3k4me1
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser/*k4me1*unique*.bw h3k4me1/
mkdir h3k4me3
cp /mnt/stripe/bio/experiments/configs/Y20O20/browsers/browser/*k4me3*unique*.bw h3k4me3/
```

## Copy weak consensus
```
cp /mnt/stripe/bio/raw-data/aging/loci_of_interest/golden_consensus/* .
cp /mnt/stripe/bio/raw-data/aging/loci_of_interest/zinbra_consensus/* .
```


## Launch PCA
```
export PYTHONPATH="/mnt/stripe/washu:$PYTHONPATH"
DIR=/mnt/stripe/bio/experiments/signal_outliers
cd $DIR
for M in $(find . -maxdepth 1  -type d | grep '/' | sed 's#\./##g'); do 
    echo "Processing $DIR/$M"; 
    for F in $(find $DIR -iname "*$M*.bed"); do 
        echo "Processing regions $F"; 
        N=${F%%.bed}; 
        N=${N##*/}; 
        echo $N; 
        if [ ! -d $DIR/$M/$N ]; then
            bash /mnt/stripe/washu/parallel/signals_bw.sh $DIR/$M $F $N /mnt/stripe/bio/genomes/hg19/hg19.chrom.sizes;
        fi;
    done;
done | tee log.txt```