# Notebook K4me1 20vs20 analysis

In [None]:
%matplotlib inline
%run ~/work/washu/bed/bedtrace.py
import matplotlib.pyplot as plt

# Diffbind data preparation 10vs10
Use the following cmd to prepare config for DiffBind: 

```
bash ~/work/washu/analysis/diffbind_config.sh k4me1_10vs10_reseq_bams k4me1_10vs10_reseq_bams_macs_broad_0.1 |\
    sed 's#/home/user/nfsexport#/mnt/stripe#g' > k4me1_10vs10_reseq_diff/k4me1_10vs10_diff.csv
```

# Load peaks from Diffbind config

In [None]:
import pandas as pd

def load(path):
    db = pd.read_csv(path, sep=',')

    YDS={}
    for index, row in db[db['Condition'] == 'Y'].iterrows():
        peaks = row['Peaks'].replace('.xls', '.broadPeak')
        YDS[row['SampleID']] = Bed(peaks)
    print ('K27ac YDS: {}'.format(len(YDS)))
    for k, v in YDS.items():
        print('{}: {}'.format(k, v.count()))    

    ODS={}
    for index, row in db[db['Condition'] == 'O'].iterrows():
        peaks = row['Peaks'].replace('.xls', '.broadPeak')
        ODS[row['SampleID']] = Bed(peaks)
    print('K27ac ODS: {}'.format(len(ODS)))
    for k, v in ODS.items():
        print('{}: {}'.format(k, v.count()))    
    return YDS, ODS

FOLDER='/mnt/stripe/bio/raw-data/aging/Y10OD10/chipseq/processed/k4me1_10vs10_reseq_bams_macs_broad_0.01_difference'
K4me1_YDS, K4me1_ODS = load(FOLDER+'/k4me1_unit809.csv')

# Peaks distribution

In [None]:
import numpy as np

def intersect_peaks(YDS, ODS):
    YD_intersection = intersect(*YDS.values())
    OD_intersection = intersect(*ODS.values())
    YD_OD_intersection = intersect(YD_intersection, OD_intersection)
    metapeaks({'Young donors': YD_intersection, 'Old donors': OD_intersection})
    plt.show()
   
    N = len(YDS) + len(ODS)
    ind=np.arange(N)

    common_peaks = [YD_OD_intersection.count()] * N 
    group_specific = [YD_intersection.count() - YD_OD_intersection.count()] * len(YDS) +\
                    [OD_intersection.count() - YD_OD_intersection.count()] * len(ODS)
    sample_specific = []
    names = []
    for k, v in YDS.items():
        sample_specific.append(v.count() - YD_intersection.count())
        names.append(k)
    for k, v in ODS.items():
        sample_specific.append(v.count() - OD_intersection.count())
        names.append(k)
    
    plt.figure(figsize=(20,15))
    width=0.35
    p1 = plt.bar(ind, common_peaks, width, color='green')
    p2 = plt.bar(ind, group_specific, width, bottom=common_peaks, color='blue')
    p3 = plt.bar(ind, sample_specific, width, bottom=np.sum([common_peaks,group_specific], axis=0), color='red')
    plt.ylabel('Peaks')
    plt.xticks(ind, names)
    plt.legend((p1[0], p2[0], p3[0]), ('Common', 'Group', 'Individual'))
    plt.show()

# K4me1 10vs10

In [None]:
intersect_peaks(K4me1_YDS, K4me1_ODS)

In [None]:
# Filter out tracks with bad quality
intersect_peaks({k:K4me1_YDS[k] for k in K4me1_YDS.keys() if k not in ['YD14']}, K4me1_ODS)

# VS ENCODE

In [None]:
# CD14 K4me1 broad peaks accesible from IGV
# https://www.encodeproject.org/files/ENCFF001TAJ/@@download/ENCFF001TAJ.bed.gz
K4ME1_PEAKS = '/mnt/stripe/bio/raw-data/aging/peaks_k4me1_analysis/peaks_k4me1_macs_broad_0.1'
ENCODE_1=Bed(K4ME1_PEAKS + '/Broad_1_ENCFF000CDL_hg19_broad_0.1_peaks.broadPeak')
ENCODE_2=Bed(K4ME1_PEAKS + '/Broad_2_ENCFF000CDK_hg19_broad_0.1_peaks.broadPeak')
metapeaks({'ENCODE_1': ENCODE_1, 'ENCODE_2': ENCODE_2, 
           'IGV K4me1': Bed('/Users/oleg/Desktop/peaks_k4me1/peaks_k4me1/ENCFF001TAJ.bed')})
plt.show()           

# Intersection
YD_PEAKS = intersect(*[K4me1_YDS[k] for k in K4me1_YDS.keys() if k not in ['YD14']])
OD_PEAKS = intersect(*K4me1_ODS.values())
metapeaks({'ENCODE_1': ENCODE_1, 'ENCODE_2': ENCODE_2, 'YD_intersection': YD_PEAKS})
plt.show()
metapeaks({'ENCODE_1': ENCODE_1, 'ENCODE_2': ENCODE_2, 'OD_intersection': OD_PEAKS})
plt.show()

# Union
YD_PEAKS_UNION = union(*[K4me1_YDS[k] for k in K4me1_YDS.keys() if k not in ['YD14']])
OD_PEAKS_UNION = union(*K4me1_ODS.values())
metapeaks({'OD_union': OD_PEAKS_UNION, 'YD_union': YD_PEAKS_UNION})
plt.show()
metapeaks({'ENCODE_1': ENCODE_1, 'ENCODE_2': ENCODE_2, 
           'YD and OD union': union(YD_PEAKS_UNION, OD_PEAKS_UNION)})
plt.show()
