In [1]:
import numpy as np
import os
import pandas as pd
import utils

In [2]:
%%bash
input_path="../2.peaks_in_CMs/peak_files";
core_path="../2.peaks_in_CMs/peak_overlaps";

mkdir -p ${core_path}/overlap_all_vs_all_peaks;
bedtools intersect -wo \
    -a ../0.input_peaks/peak_bed_files/k27ac_k4me1_peaks.bed \
    -b ../0.input_peaks/peak_bed_files/k27ac_k4me1_peaks.bed \
    > ${core_path}/overlap_all_vs_all_peaks/all_peaks_overlap.bed
    
dataset="test_data";
methods=( "vcmtools" "clomics" "phm" );
for method in ${methods[*]}
do
    mkdir -p ${core_path}/overlap_of_${method}_and_not_${method}_peaks;
    mkdir -p ${core_path}/overlap_of_${method}_and_${method}_peaks;
    mkdir -p ${core_path}/overlap_of_not_${method}_and_not_${method}_peaks;

    bedtools intersect -wo \
        -a ${input_path}/${dataset}_not_${method}_peaks.bed \
        -b ${input_path}/${dataset}_not_${method}_peaks.bed \
        > ${core_path}/overlap_of_not_${method}_and_not_${method}_peaks/${dataset}_not_${method}_not_${method}_peaks_overlap.bed
    
    bedtools intersect -wo \
        -a ${input_path}/${dataset}_${method}_all_peaks.bed \
        -b ${input_path}/${dataset}_not_${method}_peaks.bed \
        > ${core_path}/overlap_of_${method}_and_not_${method}_peaks/${dataset}_${method}_not_${method}_peaks_overlap.bed
    
    bedtools intersect -wo \
        -a ${input_path}/${dataset}_${method}_all_peaks.bed \
        -b ${input_path}/${dataset}_${method}_all_peaks.bed \
        > ${core_path}/overlap_of_${method}_and_${method}_peaks/${dataset}_${method}_${method}_peaks_overlap.bed
done

In [2]:
dataset = "test_data"
overlapping_peaks_path = "../2.peaks_in_CMs/peak_overlaps"
if not os.path.exists(overlapping_peaks_path):
    os.makedirs(overlapping_peaks_path)
methods = ["vcmtools", "clomics", "phm"]

In [3]:
all_peaks_overlap_df = pd.read_csv(
    os.path.join(
        overlapping_peaks_path, "overlap_all_vs_all_peaks", "all_peaks_overlap.bed"
    ),
    header=None,
    sep="\t",
)
all_peaks_overlap_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,chr22,16192326,16192745,H3K27ac:chr22:16192326:16192745,+,chr22,16192326,16192745,H3K27ac:chr22:16192326:16192745,+,419
1,chr22,16204838,16205246,H3K27ac:chr22:16204838:16205246,+,chr22,16204838,16205246,H3K27ac:chr22:16204838:16205246,+,408
2,chr22,16293852,16294075,H3K27ac:chr22:16293852:16294075,+,chr22,16293852,16294075,H3K27ac:chr22:16293852:16294075,+,223
3,chr22,16682884,16683232,H3K27ac:chr22:16682884:16683232,+,chr22,16682884,16683232,H3K27ac:chr22:16682884:16683232,+,348
4,chr22,16684791,16685090,H3K27ac:chr22:16684791:16685090,+,chr22,16684791,16685090,H3K27ac:chr22:16684791:16685090,+,299


In [4]:
no_self_overlap_df = all_peaks_overlap_df[
    all_peaks_overlap_df[3] != all_peaks_overlap_df[8]
]
k27ac_in_third_column_df = no_self_overlap_df[
    no_self_overlap_df[3].str.startswith("H3K27ac")
]
k27ac_in_third_col_peak_dict = {
    k27ac_peak_3: list(k27ac_peak_df_3[8])
    for k27ac_peak_3, k27ac_peak_df_3 in k27ac_in_third_column_df.groupby(3)
}
mapping_dict = {
    overlap_peak: k27ac_peak
    for k27ac_peak, lst_of_overlapping_peaks in k27ac_in_third_col_peak_dict.items()
    for overlap_peak in lst_of_overlapping_peaks
}
np.save(
    os.path.join(
        overlapping_peaks_path,
        "overlap_all_vs_all_peaks",
        "mapping_k4me1_to_k27ac_peaks.npy",
    ),
    mapping_dict,
    allow_pickle=True,
)

In [5]:
for method in methods:
    overlap_path = os.path.join(
        overlapping_peaks_path,
        "_".join(["overlap_of", method, "and", method, "peaks"]),
        "_".join([dataset, method, method, "peaks_overlap.bed"]),
    )
    utils.save_npy_dict(overlap_path, overlapping_peaks_path, method, dataset)

In [6]:
for method in methods:
    overlap_path = os.path.join(
        overlapping_peaks_path,
        "_".join(["overlap_of_not", method, "and_not", method, "peaks"]),
        "_".join([dataset, "not", method, "not", method, "peaks_overlap.bed"]),
    )
    utils.save_npy_dict(
        overlap_path, overlapping_peaks_path, method, dataset, prefix="not_"
    )