In [1]:
import os
import glob
import subprocess as sp
import pandas as pd
import numpy as np

In [2]:
root = os.path.join("/data/manke/group/rabbani/atac_project2078/")
bw_path = os.path.join(root, "atac_pipeline_merged_bam", "BigWigs")
rna_path = os.path.join("/data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros")
output_path = os.path.join("/data/akhtar/group2/rabbani/atac_project2078/plots")

In [3]:
# make bed file
gene_coord = os.path.join("/data/manke/group/rabbani/rna_project2129/genes.filtered.gtf")
gene_coord = pd.read_csv(gene_coord, sep = "\t", header = None, comment = "#")
gene_coord = gene_coord.loc[gene_coord[2] == "gene"]
gene_coord[8] = gene_coord[8].str.split('"', 2, expand=True)[1]
gene_coord = gene_coord[[0,3,4,8,5,6]]
os.makedirs(os.path.join(rna_path, "beds"), exist_ok= True)
# Make UP and DOWN bed files per time point
for time in ['LPS0', 'LPS3', 'LPS12']:
    for index, cond in enumerate(["vector", "K197R", "K197Q"]):
        files = glob.glob(os.path.join(rna_path, "_".join([cond,time,"vs", "WT",time])+".tsv"))
        assert(len(files) == 1)
        file = files[0]
        name = os.path.basename(file).split(".tsv")[0]
        df = pd.read_csv(file, sep = "\t")
        bed = df.merge(gene_coord, left_on = "GeneID", right_on = gene_coord[8], how = "inner")
        up = bed.loc[bed["Status"] == "UP"]
        down = bed.loc[bed["Status"] == "DOWN"]
        if len(down) > 0:
            down = down[[0, 3, 4, 8, 5, 6]]
            down.columns = ['#chr', 'start', 'end', 'GeneID', 'score', 'strand']
            down.to_csv(os.path.join(rna_path, "beds", name + "_down.bed"), sep = "\t", index = False)
        if len(up) > 0:
            up = up[[0, 3, 4, 8, 5, 6]]
            up.columns = ['#chr', 'start', 'end', 'GeneID', 'score', 'strand']
            up.to_csv(os.path.join(rna_path, "beds", name + "_up.bed"), sep = "\t", index = False)
#  Make 500 random genes bed
random_genes = pd.read_csv(os.path.join(rna_path, "K197R_LPS12_vs_WT_LPS12.tsv"), sep = "\t")
random_genes = random_genes.sample(n = 500)
random_genes = random_genes.merge(gene_coord, left_on = "GeneID", right_on = gene_coord[8], how = "inner")
print(random_genes.head())
random_genes = random_genes[[0, 3, 4, 8, 5, 6]]
random_genes.columns = ['#chr', 'start', 'end', 'GeneID', 'score', 'strand']
random_genes.to_csv(os.path.join(rna_path, "beds", "500RandomGenes.bed"), sep = "\t", index = False)


  exec(code_obj, self.user_global_ns, self.user_ns)


                  GeneID     baseMean  log2FoldChange     lfcSE      stat  \
0   ENSMUSG00000108365.1    11.316024        1.944718  0.871289  2.232002   
1  ENSMUSG00000033632.15   437.496242       -0.001246  0.083726 -0.014887   
2   ENSMUSG00000078314.2     7.868302       -0.033810  0.573842 -0.058918   
3  ENSMUSG00000021668.14  1454.583425        0.047710  0.062908  0.758413   
4  ENSMUSG00000022329.14  2422.361965       -0.024019  0.043861 -0.547622   

     pvalue  padj Status external_gene_name   0         3         4  \
0  0.025615   NaN   None       RP24-174G2.2   7  80036705  80038728   
1  0.988122   NaN   None           AW554918  18  25168999  25467321   
2  0.953017   NaN   None            Gm14762   X  85825892  85834831   
3  0.448204   NaN   None               Polk  13  96480689  96542485   
4  0.583951   NaN   None               Stk3  15  34875496  35155806   

                       8  5  6  
0   ENSMUSG00000108365.1  .  +  
1  ENSMUSG00000033632.15  .  +  
2   ENSMUSG

In [4]:
# compare wo_zeros up/down with vs_wt up/down. This is just to see how much they are in agreement
for file in glob.glob(os.path.join(rna_path, "beds", "*.bed")):
    name = os.path.basename(file)
    df = pd.read_csv(file, sep = "\t")
    try:
        df1 = pd.read_csv(os.path.join("/data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_vs_wt/beds/", name), sep = "\t", header = None)
        df1.columns = ['#chr', 'start', 'end']
        merged_df = df.merge(df1, on = ['#chr', 'start', 'end'], how = "inner")
        print(name, len(df), len(df1), len(merged_df))
    except:
        print("this is only found after removing zeros!", name, len(df))

K197Q_LPS12_vs_WT_LPS12_down.bed 807 764 754
vector_LPS0_vs_WT_LPS0_down.bed 709 702 701
K197Q_LPS0_vs_WT_LPS0_down.bed 756 746 735
K197R_LPS0_vs_WT_LPS0_down.bed 525 524 520
vector_LPS12_vs_WT_LPS12_down.bed 471 459 456
vector_LPS3_vs_WT_LPS3_up.bed 624 612 611
K197Q_LPS12_vs_WT_LPS12_up.bed 1768 1734 1730
K197Q_LPS3_vs_WT_LPS3_down.bed 611 618 607
K197R_LPS0_vs_WT_LPS0_up.bed 375 375 372
K197R_LPS3_vs_WT_LPS3_down.bed 195 194 192
vector_LPS3_vs_WT_LPS3_down.bed 624 611 607
vector_LPS0_vs_WT_LPS0_up.bed 289 269 267
vector_LPS12_vs_WT_LPS12_up.bed 504 480 478
this is only found after removing zeros! 500RandomGenes.bed 500
K197R_LPS3_vs_WT_LPS3_up.bed 199 197 197
K197Q_LPS0_vs_WT_LPS0_up.bed 641 610 606
K197Q_LPS3_vs_WT_LPS3_up.bed 358 356 354


In [5]:
# computematrix on diff tss +-2kb
# plot heatmap
# profile plot
# ultraheatmap

In [6]:
for time in ['LPS0', 'LPS3', 'LPS12']:
    bws = ""
    names = ""
    rna = ""
    
    for index, cond in enumerate(["WT","Vector", "K197R", "K197Q"]):
        bws += os.path.join(bw_path, "_".join(["merged", cond,time])+".bw")+" "
        names += "_".join([cond,time])+ " "
    for index, cond in enumerate(["K197R", "K197Q", "vector"]):
        down_file = os.path.join(rna_path, "beds", "_".join([cond,time,"vs", "WT",time])+"_down.bed")
        
        if  os.path.isfile(down_file):
            rna += down_file + " "
        up_file = os.path.join(rna_path, "beds", "_".join([cond,time,"vs", "WT",time])+"_up.bed")
        if os.path.isfile(up_file):
            rna += up_file + " "
    ##Add 500 random genes
    if time == 'LPS12':
        rand = os.path.join(rna_path, "beds", "500RandomGenes.bed")
        rna += rand + " "
    if rna != "":
        cmd = "module load deeptools/3.5.0;"
        cmd += "computeMatrix reference-point -S "+bws
        cmd += " -R "+rna
        cmd += " -a 2000 -b 2000 "
        cmd += " --samplesLabel "+ names
        cmd += " -o "+os.path.join(output_path, time+".gz")
        cmd += "; plotHeatmap -m "+os.path.join(output_path, time+".gz")
        cmd += " -o "+os.path.join(output_path, time+".png")
        cmd += "; plotProfile -m "+os.path.join(output_path, time+".gz")
        cmd += " --perGroup -o "+os.path.join(output_path, time+"_profile.png")
        cmd += " --outFileNameData "+os.path.join(output_path, time+"_profile.tsv")
        print(cmd)
        sp.check_output(cmd, shell = True)

module load deeptools;computeMatrix reference-point -S /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_WT_LPS0.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_Vector_LPS0.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_K197R_LPS0.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_K197Q_LPS0.bw  -R /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/K197R_LPS0_vs_WT_LPS0_down.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/K197R_LPS0_vs_WT_LPS0_up.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/K197Q_LPS0_vs_WT_LPS0_down.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/K197Q_LPS0_vs_WT_LPS0_up.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/vector_LPS0_vs_WT_LPS0_down.bed /data/akhtar/


Mean of empty slice




The 'alpha' parameter to Colorbar has no effect because it is overridden by the mappable; it is deprecated since 3.3 and will be removed two minor releases later.


savefig() got unexpected keyword argument "pdd_inches" which is no longer supported as of 3.3 and will become an error two minor releases later



module load deeptools;computeMatrix reference-point -S /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_WT_LPS3.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_Vector_LPS3.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_K197R_LPS3.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_K197Q_LPS3.bw  -R /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/K197R_LPS3_vs_WT_LPS3_down.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/K197R_LPS3_vs_WT_LPS3_up.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/K197Q_LPS3_vs_WT_LPS3_down.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/K197Q_LPS3_vs_WT_LPS3_up.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/vector_LPS3_vs_WT_LPS3_down.bed /data/akhtar/


Mean of empty slice




The 'alpha' parameter to Colorbar has no effect because it is overridden by the mappable; it is deprecated since 3.3 and will be removed two minor releases later.


savefig() got unexpected keyword argument "pdd_inches" which is no longer supported as of 3.3 and will become an error two minor releases later



module load deeptools;computeMatrix reference-point -S /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_WT_LPS12.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_Vector_LPS12.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_K197R_LPS12.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_K197Q_LPS12.bw  -R /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/K197Q_LPS12_vs_WT_LPS12_down.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/K197Q_LPS12_vs_WT_LPS12_up.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/vector_LPS12_vs_WT_LPS12_down.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/vector_LPS12_vs_WT_LPS12_up.bed /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/beds/500RandomGenes.bed  -a 2000 -b 


Mean of empty slice




The 'alpha' parameter to Colorbar has no effect because it is overridden by the mappable; it is deprecated since 3.3 and will be removed two minor releases later.


savefig() got unexpected keyword argument "pdd_inches" which is no longer supported as of 3.3 and will become an error two minor releases later



In [7]:
# ultraheatmap wo mapping on the above matrix 
for time in ['LPS0', 'LPS3', 'LPS12']:
    cmd = "module load ultraheatmap/1.3.1; addFeatureToMatrix "
    cmd += "-m "+os.path.join(output_path, time+".gz")
    cmd += " -o "+os.path.join(output_path, "ultraheatmap", time+"_w_deg.gz")
    cmd += " -t "+os.path.join(rna_path, "K197Q_"+time+"_vs_WT_"+time+".tsv")+ " "+os.path.join(rna_path, "K197R_"+time+"_vs_WT_"+time+".tsv")
    cmd += " "+os.path.join(rna_path, "vector_"+time+"_vs_WT_"+time+".tsv")
    cmd += " --featureIdColumn 'GeneID' --featureNames 'log2FoldChange' "
    print(cmd)
    sp.check_output(cmd, shell = True)

module load ultraheatmap; addFeatureToMatrix -m /data/akhtar/group2/rabbani/atac_project2078/plots/LPS0.gz -o /data/akhtar/group2/rabbani/atac_project2078/plots/ultraheatmap/LPS0_w_deg.gz -t /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/K197Q_LPS0_vs_WT_LPS0.tsv /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/K197R_LPS0_vs_WT_LPS0.tsv /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/vector_LPS0_vs_WT_LPS0.tsv --featureIdColumn 'GeneID' --featureNames 'log2FoldChange' 
module load ultraheatmap; addFeatureToMatrix -m /data/akhtar/group2/rabbani/atac_project2078/plots/LPS3.gz -o /data/akhtar/group2/rabbani/atac_project2078/plots/ultraheatmap/LPS3_w_deg.gz -t /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/K197Q_LPS3_vs_WT_LPS3.tsv /data/akhtar/group2/rabbani/rna_project2129/pairwise_comparison_wo_zeros/K197R_LPS3_vs_WT_LPS3.tsv /data/akhtar/group2/rabbani/rna_project2129/pairwise_compariso

In [8]:
# plotheatmap
for time in ['LPS0', 'LPS3', 'LPS12']:
    cmd = "module load deeptools/3.5.0; plotHeatmap "
    cmd += " -m "+os.path.join(output_path, "ultraheatmap", time+"_w_deg.gz")
    cmd += " -o "+os.path.join(output_path, "ultraheatmap", time+"_w_deg.png")
    cmd += " --zMin 0 0 0 0 -2 -2 -2 --zMax 1 1 1 1 2 2 2 " 
    cmd += " --colorMap Greens Greens Greens Greens RdBu_r RdBu_r RdBu_r --sortUsingSample 5 6 7 --samplesLabel wt_atac vector_atac kr_atac kq_atac kq_vs_wt kr_vs_wt vector_vs_wt "
    print(cmd)
    sp.check_output(cmd, shell = True)

module load deeptools; plotHeatmap  -m /data/akhtar/group2/rabbani/atac_project2078/plots/ultraheatmap/LPS0_w_deg.gz -o /data/akhtar/group2/rabbani/atac_project2078/plots/ultraheatmap/LPS0_w_deg.png --zMin 0 0 0 0 -2 -2 -2 --zMax 1 1 1 1 2 2 2  --colorMap Greens Greens Greens Greens RdBu_r RdBu_r RdBu_r --sortUsingSample 5 6 7 --samplesLabel wt_atac vector_atac kr_atac kq_atac kq_vs_wt kr_vs_wt vector_vs_wt 







Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding

module load deeptools; plotHeatmap  -m /data/akhtar/group2/rabbani/atac_project2078/plots/ultraheatmap/LPS3_w_deg.gz -o /data/akhtar/group2/rabbani/atac_project2078/plots/ultraheatmap/LPS3_w_deg.png --zMin 0 0 0 0 -2 -2 -2 --zMax 1 1 1 1 2 2 2  --colorMap Greens Greens Greens Greens RdBu_r RdBu_r RdBu_r --sortUsingSample 5 6 7 --samplesLabel wt_atac vector_atac kr_atac kq_atac kq_vs_wt kr_vs_wt vector_vs_wt 







Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding

module load deeptools; plotHeatmap  -m /data/akhtar/group2/rabbani/atac_project2078/plots/ultraheatmap/LPS12_w_deg.gz -o /data/akhtar/group2/rabbani/atac_project2078/plots/ultraheatmap/LPS12_w_deg.png --zMin 0 0 0 0 -2 -2 -2 --zMax 1 1 1 1 2 2 2  --colorMap Greens Greens Greens Greens RdBu_r RdBu_r RdBu_r --sortUsingSample 5 6 7 --samplesLabel wt_atac vector_atac kr_atac kq_atac kq_vs_wt kr_vs_wt vector_vs_wt 







Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding.


Attempting to set identical left == right == 0 results in singular transformations; automatically expanding

In [9]:
# ultraheatmap
# 1. plot bws on peaks (ocrs)
# 2. add gene expression

In [None]:
path2peaks = os.path.join("/data/akhtar/group2/rabbani/atac_project2078/hmmratac/ocrs/merged_peaks")
for time in ['LPS0', 'LPS3', 'LPS12']:
    bws = ""
    names = ""
    peaks = ""
    for index, cond in enumerate(["Vector", "WT", "K197R", "K197Q"]):
        bws += os.path.join(bw_path, "_".join(["merged", cond,time])+".bw")+" "
        names += "_".join([cond,time])+ " "
        peaks += os.path.join(path2peaks, "_".join(["merged", cond,time])+"_.bed")+" "
    cmd = "module load deeptools/3.5.0;"
    cmd += "computeMatrix reference-point -S "+bws
    cmd += " -R "+peaks
    cmd += " -a 2000 -b 2000 --referencePoint center -p 20  --missingDataAsZero  --skipZeros "
    cmd += " --samplesLabel "+ names
    cmd += " --blackListFileName /data/repository/organisms/GRCm38_ensembl/akundaje/blacklist.bed "
    cmd += " -o "+os.path.join(output_path, time+"_onpeaks.gz")
    print(cmd)
    sp.check_output(cmd, shell = True)

module load deeptools/3.5.0;computeMatrix reference-point -S /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_Vector_LPS0.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_WT_LPS0.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_K197R_LPS0.bw /data/manke/group/rabbani/atac_project2078/atac_pipeline_merged_bam/BigWigs/merged_K197Q_LPS0.bw  -R /data/akhtar/group2/rabbani/atac_project2078/hmmratac/ocrs/merged_peaks/merged_Vector_LPS0_.bed /data/akhtar/group2/rabbani/atac_project2078/hmmratac/ocrs/merged_peaks/merged_WT_LPS0_.bed /data/akhtar/group2/rabbani/atac_project2078/hmmratac/ocrs/merged_peaks/merged_K197R_LPS0_.bed /data/akhtar/group2/rabbani/atac_project2078/hmmratac/ocrs/merged_peaks/merged_K197Q_LPS0_.bed  -a 2000 -b 2000 --referencePoint center -p 20  --missingDataAsZero  --skipZeros  --samplesLabel Vector_LPS0 WT_LPS0 K197R_LPS0 K197Q_LPS0  --blackListFileName /data/repos

In [None]:
for time in ['LPS0', 'LPS3', 'LPS12']:
    cmd = "module load ultraheatmap/1.3.1; addFeatureToMatrix "
    cmd += "-m "+os.path.join(output_path, time+"_onpeaks.gz")
    cmd += " -o "+os.path.join(output_path, time+"_onpeaks_mappedgenes.gz")
    cmd += " -t "+os.path.join(rna_path, "K197Q_"+time+"_vs_WT_"+time+".tsv")+ " "+os.path.join(rna_path, "K197R_"+time+"_vs_WT_"+time+".tsv")
    cmd += " "+os.path.join(rna_path, "vector_"+time+"_vs_WT_"+time+".tsv")
    cmd += " -g "+os.path.join("/data/manke/group/rabbani/rna_project2129/genes.filtered.gtf")
    cmd += " --annotationFeature 'gene' --featureIdColumn 'GeneID' --featureNames 'log2FoldChange' --referencePoint 'TSS' "
    print(cmd)
    sp.check_output(cmd, shell = True)

In [None]:
# plotheatmap
for time in ['LPS0', 'LPS3', 'LPS12']:
    cmd = "module load deeptools/3.5.0; plotHeatmap "
    cmd += " -m "+os.path.join(output_path, time+"_onpeaks_mappedgenes.gz")
    cmd += " -o "+os.path.join(output_path, time+"_onpeaks_mappedgenes.png")
    cmd += " --zMin 0 0 0 0 -2 -2 -2 --zMax 1 1 1 1 2 2 2 " 
    cmd += " --colorMap Greens Greens Greens Greens RdBu_r RdBu_r RdBu_r --sortUsingSample 5 6 7 --samplesLabel vector_atac wt_atac kr_atac kq_atac kq_vs_wt kr_vs_wt vector_vs_wt "
    print(cmd)
    sp.check_output(cmd, shell = True)

In [None]:
# # This is not fully done!
# # similar approach as above ony on diff peaks
# path2diffpeaks = os.path.join("/data/akhtar/group2/rabbani/atac_project2078/hmmratac/pairwise_comparison/wo_hiCov/deseq2")
# for time in ['LPS0', 'LPS3', 'LPS12']:
#     for cond in ["K197R", "K197Q"]:
#         df = pd.read_csv(os.path.join(path2diffpeaks, "_".join([cond,time, "vs", "WT", time])+".tsv"), sep = "\t")
#         up = df.loc[df["Status"] == "UP"][["chr", "start", "end"]]
#         if len(up) != 0:
#             up.to_csv(os.path.join(path2diffpeaks, "diffPeaksBeds", "_".join([cond,time])+"_up.bed"), sep = "\t", header = None, index = False)
#         down = df.loc[df["Status"] == "DOWN"][["chr", "start", "end"]]
#         if len(down) != 0:
#             down.to_csv(os.path.join(path2diffpeaks, "diffPeaksBeds", "_".join([cond,time])+"_down.bed"), sep = "\t", header = None, index = False)
# # This is not fully done!

In [None]:
blacklist= "MT X Y JH584299.1 GL456233.1 JH584301.1 GL456211.1 GL456350.1 JH584293.1 GL456221.1 JH584297.1 JH584296.1 GL456354.1 JH584294.1 JH584298.1 JH584300.1 GL456219.1 GL456210.1 JH584303.1 JH584302.1 "\
"GL456212.1 JH584304.1 GL456379.1 GL456216.1 GL456393.1 GL456366.1 GL456367.1 GL456239.1 GL456213.1 GL456383.1 GL456385.1 GL456360.1 GL456378.1 GL456389.1 GL456372.1 GL456370.1 GL456381.1 GL456387.1 GL456390.1 "\
"GL456394.1 GL456392.1 GL456382.1 GL456359.1 GL456396.1 GL456368.1 JH584292.1 JH584295.1"

In [None]:
# bigwigsummary of atac signal on diff genes
for time in ['LPS0', 'LPS3', 'LPS12']:
    bws = ""
    
    for cond in ["Vector", "WT", "K197R", "K197Q"]:
        for rep in ['rep1', 'rep2', 'rep3']:
            bws += os.path.join(root, "atac_pipeline", "BigWigs", "_".join([cond, time, rep])+".filtered.bw")+" "
    print(bws)
    for cond in ["vector", "K197R", "K197Q"]:
        beds = {"down":os.path.join(rna_path, "beds", "_".join([cond,time,"vs", "WT",time])+"_down.bed"),
                "up":os.path.join(rna_path, "beds", "_".join([cond,time,"vs", "WT",time])+"_up.bed")}
        for name, bed in beds.items():
            if os.path.isfile(bed):
                cmd = "module load deeptools/3.5.0;"
                cmd += "multiBigwigSummary BED-file "   
                cmd += " -b "+bws
                cmd += " -o "+os.path.join("/data/akhtar/group2/rabbani/atac_project2078/bwsummary", "_".join([time,cond,name])+".npz")
                cmd +=  " --BED "+ bed
                cmd += " --chromosomesToSkip "+blacklist
                cmd += " -bl /data/repository/organisms/GRCm38_ensembl/akundaje/blacklist.bed "
                cmd += " -p 15 --outRawCounts "+os.path.join("/data/akhtar/group2/rabbani/atac_project2078/bwsumm
                                                             
                                                             
                                                             ary", "_".join([time,cond,name])+".tsv")
                print(cmd)
                sp.check_output(cmd, shell = True)

In [None]:
# ultraheatmap on all active TSS

In [None]:
# Make a list of active genes, those which are available in deseq result

In [None]:
# make the region bed file
gene_coord = os.path.join("/data/manke/group/rabbani/rna_project2129/genes.filtered.gtf")
gene_coord = pd.read_csv(gene_coord, sep = "\t", header = None, comment = "#")
gene_coord = gene_coord.loc[gene_coord[2] == "gene"]
gene_coord[8] = gene_coord[8].str.split('"', 2, expand=True)[1]
gene_coord = gene_coord[[0,3,4,8,5,6]]
rna_file = pd.read_csv(os.path.join(rna_path, "K197R_LPS0_vs_WT_LPS0.tsv"), sep = "\t") # any tsv file of this path can be used, they all contain the same list of genes
regions_to_plot = rna_file.merge(gene_coord, left_on = "GeneID", right_on = gene_coord[8], how = "inner")
regions_to_plot[[0,3,4,8,5,6]].to_csv(os.path.join(rna_path, "activegenes.bed"), sep = "\t", header = None, index = False)

In [None]:
# computematrix
for time in ['LPS0', 'LPS3', 'LPS12']:
    bws = ""
    names = ""
    for index, cond in enumerate(["Vector", "WT", "K197R", "K197Q"]):
        bws += os.path.join(bw_path, "_".join(["merged", cond,time])+".bw")+" "
        names += "_".join([cond,time])+ " "
    cmd = "module load deeptools/3.5.0;"
    cmd += "computeMatrix reference-point -S "+bws
    cmd += " -R "+os.path.join(rna_path, "activegenes.bed")
    cmd += " -a 2000 -b 2000 --referencePoint TSS -p 20  --missingDataAsZero  --skipZeros "
    cmd += " --samplesLabel "+ names
    cmd += " --blackListFileName /data/repository/organisms/GRCm38_ensembl/akundaje/blacklist.bed "
    cmd += " -o "+os.path.join(output_path, time+"_onMouseTSS.gz")
    print(cmd)
    sp.check_output(cmd, shell = True)

In [None]:
# ultraheatmap
for time in ['LPS0', 'LPS3', 'LPS12']:
    cmd = "module load ultraheatmap/1.3.1; addFeatureToMatrix "
    cmd += "-m "+os.path.join(output_path, time+"_onMouseTSS.gz")
    cmd += " -o "+os.path.join(output_path, time+"_onMouseTSS_deg.gz")
    cmd += " -t "+os.path.join(rna_path, "K197Q_"+time+"_vs_WT_"+time+".tsv")+ " "+os.path.join(rna_path, "K197R_"+time+"_vs_WT_"+time+".tsv")
    cmd += " "+os.path.join(rna_path, "vector_"+time+"_vs_WT_"+time+".tsv")
    cmd += " --featureIdColumn 'GeneID' --featureNames 'log2FoldChange' --referencePoint 'TSS' "
    print(cmd)
    sp.check_output(cmd, shell = True)

In [None]:
# heatmap
for time in ['LPS0', 'LPS3', 'LPS12']:
    cmd = "module load deeptools/3.5.0; plotHeatmap "
    cmd += " -m "+os.path.join(output_path, time+"_onMouseTSS_deg.gz")
    cmd += " -o "+os.path.join(output_path, time+"_onMouseTSS_deg.png")
    cmd += " --zMin 0 0 0 0 -1 -1 -1 --zMax 1 1 1 1 1 1 1 " 
    cmd += " --colorMap Greens Greens Greens Greens RdBu_r RdBu_r RdBu_r --samplesLabel vector_atac wt_atac kr_atac kq_atac kq_vs_wt kr_vs_wt vector_vs_wt "
    print(cmd)
    sp.check_output(cmd, shell = True)

In [None]:
# heatmap with k = 4 clustering
for time in ['LPS0', 'LPS3', 'LPS12']:
    cmd = "module load deeptools/3.5.0; plotHeatmap "
    cmd += " -m "+os.path.join(output_path, time+"_onMouseTSS_deg.gz")
    cmd += " -o "+os.path.join(output_path, time+"_onMouseTSS_deg_k4.png")
    cmd += " --zMin 0 0 0 0 -1 -1 -1 --zMax 1 1 1 1 1 1 1 --kmeans 4 --clusterUsingSamples 5 6 7 " 
    cmd += " --colorMap Greens Greens Greens Greens RdBu_r RdBu_r RdBu_r --sortUsingSamples 5 6 7 --samplesLabel vector_atac wt_atac kr_atac kq_atac kq_vs_wt kr_vs_wt vector_vs_wt "
    print(cmd)
    sp.check_output(cmd, shell = True)

In [None]:
# heatmap sort by deg
for time in ['LPS0', 'LPS3', 'LPS12']:
    cmd = "module load deeptools/3.5.0; plotHeatmap "
    cmd += " -m "+os.path.join(output_path, time+"_onMouseTSS_deg.gz")
    cmd += " -o "+os.path.join(output_path, time+"_onMouseTSS_deg_sortbydeg.png")
    cmd += " --zMin 0 0 0 0 -1 -1 -1 --zMax 1 1 1 1 1 1 1 " 
    cmd += " --colorMap Greens Greens Greens Greens RdBu_r RdBu_r RdBu_r --sortUsingSamples 5 6 7 --samplesLabel vector_atac wt_atac kr_atac kq_atac kq_vs_wt kr_vs_wt vector_vs_wt "
    print(cmd)
    sp.check_output(cmd, shell = True)