In [1]:
!pwd

/data5/galaxy/shell_dir/2018_3_17/jupyter_shell


In [1]:
import os
import shutil
import glob
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
from itertools import permutations
from multiprocessing import Pool

In [6]:
expre_file = "/data5/galaxy/project/expression/stringtie/Total-RPKM.txt"
tissue_list = ["brain", "heart", "kid", "lung"]
df_total_exp = get_expre()
for tissue in tissue_list:
    print("##################################################")
    print(tissue)
    df_m6a = get_m6a(tissue)
    df_expre = df_total_exp[["name", tissue]]
    #
    print("#### coorelation between expression && m6a!")
    df_merge = df_m6a.merge(df_expre, on="name", how="left").dropna()
    print(stats.spearmanr(df_merge[tissue], df_merge["m6a_score"]))
    print(stats.pearsonr(df_merge[tissue], df_merge["m6a_score"]))
    #
    print("#### coorelation between expression && methylation!")
    df_methyl = get_methylation(tissue)
    class_methyl_run(sub_dir)
    df = df_merge.merge(df_methyl, on="name", how="left").dropna()
    print(stats.spearmanr(df[tissue], df["methyl"]))
    print(stats.pearsonr(df[tissue], df["methyl"]))
    #
    print("#### coorelation between methylation && m6a!")
    print(stats.spearmanr(df["methyl"], df["m6a_score"]))
    print(stats.pearsonr(df["methyl"], df["m6a_score"]))

##################################################
brain
#### coorelation between expression && m6a!
SpearmanrResult(correlation=-0.2278449261923644, pvalue=4.573013679409501e-199)
(-0.07612578513990023, 2.808425194029496e-23)
#### coorelation between expression && methylation!
SpearmanrResult(correlation=-0.06678740372922698, pvalue=3.106630681142805e-11)
(-0.016034691021318603, 0.11119779972872013)
#### coorelation between methylation && m6a!
SpearmanrResult(correlation=0.03438434878907758, pvalue=0.0006345115152405328)
(0.007331855364669609, 0.46643922372682123)
##################################################
heart
#### coorelation between expression && m6a!
SpearmanrResult(correlation=-0.2249027381571596, pvalue=1.02332899971635e-179)
(-0.03931258605893888, 8.038807159131456e-07)
#### coorelation between expression && methylation!
SpearmanrResult(correlation=-0.046117755035064045, pvalue=5.148588492432149e-06)
(-0.012016413052406217, 0.23514586036349489)
#### coorelation between

In [2]:
def class_m6a_by_diffPeak(work_dir):
    os.chdir(work_dir)
    m6a_up = pd.read_table("diff_peak_c3.0_cond1_anno.txt", sep="\t", header=None, names=["name", "m6a_score"])
    m6a_common = pd.read_table("diff_peak_c3.0_common_anno.txt", sep="\t", header=None, names=["name", "m6a_score"])
    m6a_down = pd.read_table("diff_peak_c3.0_cond2_anno.txt", sep="\t", header=None, names=["name", "m6a_score"])
    # m6a score 计算方式
    df_m6a_up = m6a_up.groupby(["name"]).mean().reset_index()
    df_m6a_common = m6a_common.groupby(["name"]).mean().reset_index()
    df_m6a_down = m6a_down.groupby(["name"]).mean().reset_index()
    df_m6a_up, df_m6a_down, df_m6a_common = filter_intersect(df_m6a_up, df_m6a_down, df_m6a_common)
    df_m6a = pd.concat([df_m6a_up, df_m6a_common, df_m6a_down]).dropna().drop_duplicates()
    return df_m6a, df_m6a_up, df_m6a_common, df_m6a_down

In [3]:
def get_expre():
    df = pd.read_table(expre_file, sep="\t")
    df.columns = ["gene", "brain", "heart", "kid", "liver", "pla", "lung", "mus", "sto"]
    df["name"] = df["gene"].str.split(".").str[0]
    return df

In [10]:
def foldchange_of_methylation(sub_dir):
    os.chdir("/data5/galaxy/project/methyl_m6a/data/roimethstat/")
    map_dict = {"brain": "E081", "heart": "E083", "kid": "E086", "lung": "E088"}
    tissue_1, tissue_2 = sub_dir.split("_")[0], sub_dir.split("_")[-1]
    try:
        num_1, num_2 = map_dict[tissue_1], map_dict[tissue_2]
    except KeyError:
        print("%s %s didn't have acoording DNA methylation data!" % (tissue_1, tissue_2))
    df_1 = pd.read_table("promoter_filtered_%s.bed" % num_1, header=None, names=["chr", "s", "e", "n", tissue_1, "strand"])
    # filtered_
    df_1["name"] = df_1["n"].str.split(":").str[0]
    df_2 = pd.read_table("promoter_filtered_%s.bed" % num_2, header=None, names=["chr", "s", "e", "n", tissue_2, "strand"])
    df_2["name"] = df_2["n"].str.split(":").str[0]
    df_methyl = df_1.merge(df_2, on="name").dropna()
    df_methyl["logFC"] = np.log2(df_methyl[tissue_1].astype(float) + 0.0001) - np.log2(df_methyl[tissue_2].astype(float) + 0.0001)
    df_methyl = df_methyl[["name", "logFC"]].sort_values(["logFC"], ascending=False)
#     print(df_methyl.head())
#     print(df_1[tissue_1].median(), df_2[tissue_2].median())
#     df_methyl_up = df_methyl[df_methyl["logFC"] > 1]
# #     df_methyl_common = df_methyl[ -1 < df_methyl["logFC"] < 1]
#     df_methyl_down = df_methyl[df_methyl["logFC"] < -1]
    return df_methyl