# ROS1 endosperm paper python figure generation and some data filtering/dataframe manipulation for publication.

## Last edited: 11/03/2024

# Setup

In [None]:
import glob
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
from scipy import stats
from matplotlib import rcParams
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
import textwrap


In [None]:
bed_columns = ['chr','start','end']
tab="\t"
plt.rcParams["font.family"] = "Arial"

In [None]:
path="/lab/solexa_gehring/elizabeth/ros1_endo_code_and_underlying_data/"
dmrpath=path+"DSS/DMRs/"
figurepath=path+"python_figures/"


In [None]:
r7it="$\it{ros1}$"+"-"+"$\it{7}$"
r3it="$\it{ros1}$"+"-"+"$\it{3}$"
r1it="$\it{ros1}$"+"-"+"$\it{1}$"

rddit="$\it{rdd}$"

# Data (leaf exWs Chr2):

In [None]:
#CG DMRs
r3l_CG = pd.read_csv(dmrpath+'r3_v_wt_leaf_CG.DMRs.txt', header =0, sep = tab)
r3l_CG_o = pd.read_csv(dmrpath+'r3_v_wt_leaf_CG.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r3l_CG_r = pd.read_csv(dmrpath+'r3_v_wt_leaf_CG.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )

r7l_CG = pd.read_csv(dmrpath+'r7_v_wt_leaf_CG.DMRs.txt', header =0, sep = tab)
r7l_CG_o = pd.read_csv(dmrpath+'r7_v_wt_leaf_CG.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r7l_CG_r = pd.read_csv(dmrpath+'r7_v_wt_leaf_CG.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )


#CHG DMRs
r3l_CHG = pd.read_csv(dmrpath+'r3_v_wt_leaf_CHG.DMRs.txt', header =0, sep = tab)
r3l_CHG_o = pd.read_csv(dmrpath+'r3_v_wt_leaf_CHG.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r3l_CHG_r = pd.read_csv(dmrpath+'r3_v_wt_leaf_CHG.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )

r7l_CHG = pd.read_csv(dmrpath+'r7_v_wt_leaf_CHG.DMRs.txt', header =0, sep = tab)
r7l_CHG_o = pd.read_csv(dmrpath+'r7_v_wt_leaf_CHG.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r7l_CHG_r = pd.read_csv(dmrpath+'r7_v_wt_leaf_CHG.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )

#CHH DMRs
r3l_CHH = pd.read_csv(dmrpath+'r3_v_wt_leaf_CHH.DMRs.txt', header =0, sep = tab)
r3l_CHH_o = pd.read_csv(dmrpath+'r3_v_wt_leaf_CHH.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r3l_CHH_r = pd.read_csv(dmrpath+'r3_v_wt_leaf_CHH.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )

r7l_CHH = pd.read_csv(dmrpath+'r7_v_wt_leaf_CHH.DMRs.txt', header =0, sep = tab)
r7l_CHH_o = pd.read_csv(dmrpath+'r7_v_wt_leaf_CHH.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r7l_CHH_r = pd.read_csv(dmrpath+'r7_v_wt_leaf_CHH.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )



# Data (endo exWs Chr2):

In [None]:
#CG DMRs
r3_CG = pd.read_csv(dmrpath+'r3_v_wt_CG.DMRs.txt', header =0, sep = tab)
r3_CG_o = pd.read_csv(dmrpath+'r3_v_wt_CG.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r3_CG_r = pd.read_csv(dmrpath+'r3_v_wt_CG.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )

r7_CG = pd.read_csv(dmrpath+'r7_v_wt_CG.DMRs.txt', header =0, sep = tab)
r7_CG_o = pd.read_csv(dmrpath+'r7_v_wt_CG.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r7_CG_r = pd.read_csv(dmrpath+'r7_v_wt_CG.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )


#CHG DMRs
r3_CHG = pd.read_csv(dmrpath+'r3_v_wt_CHG.DMRs.txt', header =0, sep = tab)
r3_CHG_o = pd.read_csv(dmrpath+'r3_v_wt_CHG.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r3_CHG_r = pd.read_csv(dmrpath+'r3_v_wt_CHG.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )

r7_CHG = pd.read_csv(dmrpath+'r7_v_wt_CHG.DMRs.txt', header =0, sep = tab)
r7_CHG_o = pd.read_csv(dmrpath+'r7_v_wt_CHG.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r7_CHG_r = pd.read_csv(dmrpath+'r7_v_wt_CHG.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )

#CHH DMRs
r3_CHH = pd.read_csv(dmrpath+'r3_v_wt_CHH.DMRs.txt', header =0, sep = tab)
r3_CHH_o = pd.read_csv(dmrpath+'r3_v_wt_CHH.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r3_CHH_r = pd.read_csv(dmrpath+'r3_v_wt_CHH.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )

r7_CHH = pd.read_csv(dmrpath+'r7_v_wt_CHH.DMRs.txt', header =0, sep = tab)
r7_CHH_o = pd.read_csv(dmrpath+'r7_v_wt_CHH.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
r7_CHH_r = pd.read_csv(dmrpath+'r7_v_wt_CHH.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )


# Data (endo exWs Chr2Chr3):

In [None]:
#CG DMRs
rdd_CG = pd.read_csv(dmrpath+'rdd_v_wt_CG.DMRs.txt', header =0, sep = tab)
rdd_CG_o = pd.read_csv(dmrpath+'rdd_v_wt_CG.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
rdd_CG_r = pd.read_csv(dmrpath+'rdd_v_wt_CG.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )

#CHG DMRs
rdd_CHG = pd.read_csv(dmrpath+'rdd_v_wt_CHG.DMRs.txt', header =0, sep = tab)
rdd_CHG_o = pd.read_csv(dmrpath+'rdd_v_wt_CHG.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
rdd_CHG_r = pd.read_csv(dmrpath+'rdd_v_wt_CHG.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )

rdd_CHH = pd.read_csv(dmrpath+'rdd_v_wt_CHH.DMRs.txt', header =0, sep = tab)
rdd_CHH_o = pd.read_csv(dmrpath+'rdd_v_wt_CHH.DMRs.hypo.bed', header =None, sep = tab, names=bed_columns )
rdd_CHH_r = pd.read_csv(dmrpath+'rdd_v_wt_CHH.DMRs.hyper.bed', header =None, sep = tab, names=bed_columns )


# plotting DMR functions:

In [None]:
#length of DMRs in bp
def get_total_length (df):
    total_length = sum(abs(df['end']-df['start']))
    return(total_length)


In [None]:
###Final version for barplotting DMRs###
def plot_all_DMRs (CG, CHG, CHH, x_ticks, y_label, limit):
    #style
    sns.set_style('white')
    plt.figure(figsize=(10, 10))
    plt.grid(axis='y')
    
    # width of the bars
    barWidth = 0.3

    # The x position of bars
    r1 = np.arange(len(CG))
    r2 = [x + barWidth for x in r1]
    r3 = [x + barWidth for x in r2]
    
    CGc = '#0072B2'
    CHGc='#E69F00'
    CHHc='#009E73'

    # Create CG bars
    plt.bar(r1, CG, width = barWidth, color = CGc, edgecolor = 'black', capsize=7, label='CG')
 
    # Create CHG bars
    plt.bar(r2, CHG, width = barWidth, color = CHGc, edgecolor = 'black', capsize=7, label='CHG')
    
    #Create CHH bars
    plt.bar(r3, CHH, width = barWidth, color = CHHc, edgecolor = 'black', capsize=7, label='CHH')

    # general layout
    plt.xticks([r + barWidth for r in range(len(CHG))], x_ticks, fontsize=30)#, rotation=45)
    plt.yticks(fontsize=30)
    plt.ylabel(y_label, fontsize=30)
    plt.legend(loc='upper right', fontsize=30)
    plt.ylim(0, limit)
    plt.tight_layout()


In [None]:
#stacked barplot of features near regions of interest
def feature_barplot(genotypes_list, feat_count_dict, x_ticks, limit):
    #style
    sns.set_style('white')
    
    genotypes = genotypes_list
    feat_counts = feat_count_dict
    width = 0.6
    
    fig, ax = plt.subplots()
    bottom = np.zeros(2)
    ax.grid(axis='y')
    Colors=["#FF6700", "#8EA604", "#531CB3", "#C9C9C9"]
    
    for boolean, feat_count in feat_counts.items():
        if boolean=="no feature":
            c=Colors[0]
        if boolean=="gene only":
            c=Colors[1]
        if boolean=="TE fragment only":
            c=Colors[3]
        if boolean=="gene and TE fragment":
            c=Colors[2]
        p = ax.bar(genotypes, feat_count, width, label=boolean, bottom=bottom, color=c)
        bottom += feat_count
        
    # general layout
    ax.tick_params(axis='both', labelsize=17)
    ax.set_ylabel("number of regions", fontsize=20)
    ax.legend(loc='upper right', title="Within 1kb of feature:", fontsize=15, title_fontsize=15)
    ax.set_ylim(0, limit)
    
    plt.tight_layout()


# plotting leaf DMRs

In [None]:
#leaf sample names
samples = [r7it+" vs Col-0 leaf", r3it+" vs Col-0 leaf"]


In [None]:
#leaf DMR length
CG_hyper = [len(r7l_CG_r), len(r3l_CG_r)]
CG_hypo = [len(r7l_CG_o), len(r3l_CG_o)]

CHG_hyper = [len(r7l_CHG_r), len(r3l_CHG_r)]
CHG_hypo = [len(r7l_CHG_o), len(r3l_CHG_o)]

CHH_hyper = [len(r7l_CHH_r), len(r3l_CHH_r)]
CHH_hypo = [len(r7l_CHH_o), len(r3l_CHH_o)]



In [None]:
#leaf DMR length in bp/1000 to get total kb
CG_hyper_lenkb = [get_total_length(r7l_CG_r)/1000, get_total_length(r3l_CG_r)/1000]
CG_hypo_lenkb = [get_total_length(r7l_CG_o)/1000, get_total_length(r3l_CG_o)/1000]

CHG_hyper_lenkb = [get_total_length(r7l_CHG_r)/1000, get_total_length(r3l_CHG_r)/1000]
CHG_hypo_lenkb = [get_total_length(r7l_CHG_o)/1000, get_total_length(r3l_CHG_o)/1000]

CHH_hyper_lenkb = [get_total_length(r7l_CHH_r)/1000, get_total_length(r3l_CHH_r)/1000]
CHH_hypo_lenkb = [get_total_length(r7l_CHH_o)/1000, get_total_length(r3l_CHH_o)/1000]


## Figures: Number of DMRs

In [None]:
plot_all_DMRs (CG_hyper, CHG_hyper, CHH_hyper, samples, 'number of hyper-DMRs', 2000)
plt.savefig(figurepath+'leaf_hyper_DMRs.pdf', dpi=350)


In [None]:
plot_all_DMRs (CG_hypo, CHG_hypo, CHH_hypo, samples, 'number of hypo-DMRs', 2000)
plt.savefig(figurepath+'leaf_hypo_DMRs.pdf', dpi=350)


## Figures: total length in DMRs

In [None]:
plot_all_DMRs (CG_hyper_lenkb, CHG_hyper_lenkb, CHH_hyper_lenkb, samples, 'total length in hyper-DMRs (kb)', 310)
plt.savefig(figurepath+'leaf_hyperlen_DMRs.pdf', dpi=350)

In [None]:
plot_all_DMRs (CG_hypo_lenkb, CHG_hypo_lenkb, CHH_hypo_lenkb, samples, 'total length in hypo-DMRs (kb)', 310)
plt.savefig(figurepath+'leaf_hypolen_DMRs.pdf', dpi=350)


## Figure: fraction of DMRs near gene, TE, both

In [None]:
# features near dmr outputs
windowcol=['chr','start','end','feature','score','strand','dmr_chr','dmr_start','dmr_end']
featurepath=dmrpath+"whatsnearby_outputs/"


In [None]:
# function to get variables that are counts of individual regions of each catagory
def get_feat_count(alldmr, genedmr, tefragdmr):
    features=genedmr.merge(tefragdmr, how="outer", on=['dmr_chr','dmr_start','dmr_end'], suffixes=['_gene','_TE'], indicator=True)
    featuresdrop=features.drop_duplicates(subset=['dmr_chr','dmr_start','dmr_end'])
    geneonly=featuresdrop['_merge'].value_counts()['left_only']
    teonly=featuresdrop['_merge'].value_counts()['right_only']
    both=featuresdrop['_merge'].value_counts()['both']
    
    neither=len(alldmr)-(geneonly+teonly+both)
    
    return neither, geneonly, teonly, both

In [None]:
#features near leaf DMRs
r3l_allc=pd.read_csv(dmrpath+"r3_v_wt_leaf_allC_hyper_allC.merge.bed", sep=tab, header=None, names=bed_columns)
r3l_genes1kb=pd.read_csv(featurepath+"genes_1kb_r3leaf_hyper.bed", sep=tab, header=None, names=windowcol)
r3l_TEfrag1kb=pd.read_csv(featurepath+"TE_fragments_1kb_r3leaf_hyper.bed", sep=tab, header=None, names=windowcol)
r3l_TEmerge1kb=pd.read_csv(featurepath+"TEmerge_1kb_r3leaf_hyper.bed", sep=tab, header=None, names=windowcol)

r7l_allc=pd.read_csv(dmrpath+"r7_v_wt_leaf_allC_hyper_allC.merge.bed", sep=tab, header=None, names=bed_columns)
r7l_genes1kb=pd.read_csv(featurepath+"genes_1kb_r7leaf_hyper.bed", sep=tab, header=None, names=windowcol)
r7l_TEfrag1kb=pd.read_csv(featurepath+"TE_fragments_1kb_r7leaf_hyper.bed", sep=tab, header=None, names=windowcol)
r7l_TEmerge1kb=pd.read_csv(featurepath+"TEmerge_1kb_r7leaf_hyper.bed", sep=tab, header=None, names=windowcol)


In [None]:
r3l_neither, r3l_geneonly, r3l_teonly, r3l_both=get_feat_count(r3l_allc, r3l_genes1kb, r3l_TEfrag1kb)
r7l_neither, r7l_geneonly, r7l_teonly, r7l_both=get_feat_count(r7l_allc, r7l_genes1kb, r7l_TEfrag1kb)

In [None]:
feature_barplot((r3it+" leaf targets", r7it+" leaf targets"), {
    "no feature": np.array([len(r3l_allc)-(r3l_geneonly+r3l_teonly+r3l_both), len(r7l_allc)-(r7l_geneonly+r7l_teonly+r7l_both)]),
    "gene only": np.array([r3l_geneonly, r7l_geneonly]),
    "TE fragment only": np.array([r3l_teonly, r7l_teonly]),
    "gene and TE fragment": np.array([r3l_both, r7l_both])
}, "number of DMRs", limit=2200)
plt.savefig(figurepath+'leaf_allDMRs_features1kb.pdf', dpi=350)

# plotting endo DMRs

In [None]:
# total length of endo DMRs
CG_hyper = [len(r7_CG_r), len(r3_CG_r), len(rdd_CG_r)]
CG_hypo = [len(r7_CG_o), len(r3_CG_o), len(rdd_CG_o)]

CHG_hyper = [len(r7_CHG_r), len(r3_CHG_r), len(rdd_CHG_r)]
CHG_hypo = [len(r7_CHG_o), len(r3_CHG_o), len(rdd_CHG_o)]

CHH_hyper = [len(r7_CHH_r), len(r3_CHH_r), len(rdd_CHH_r)]
CHH_hypo = [len(r7_CHH_o), len(r3_CHH_o), len(rdd_CHH_o)]

In [None]:
#endo DMR length in bp/1000 to get total kb
CG_hyper_lenkb = [get_total_length(r7_CG_r)/1000, get_total_length(r3_CG_r)/1000, get_total_length(rdd_CG_r)/1000]
CG_hypo_lenkb = [get_total_length(r7_CG_o)/1000, get_total_length(r3_CG_o)/1000, get_total_length(rdd_CG_o)/1000]

CHG_hyper_lenkb = [get_total_length(r7_CHG_r)/1000, get_total_length(r3_CHG_r)/1000, get_total_length(rdd_CHG_r)/1000]
CHG_hypo_lenkb = [get_total_length(r7_CHG_o)/1000, get_total_length(r3_CHG_o)/1000, get_total_length(rdd_CHG_o)/1000]

CHH_hyper_lenkb = [get_total_length(r7_CHH_r)/1000, get_total_length(r3_CHH_r)/1000, get_total_length(rdd_CHH_r)/1000]
CHH_hypo_lenkb = [get_total_length(r7_CHH_o)/1000, get_total_length(r3_CHH_o)/1000, get_total_length(rdd_CHH_o)/1000]


In [None]:
#endo sample names
samples = [r7it+" vs Col-0 endo", r3it+" vs Col-0 endo", rddit+" vs Col-0 endo"]
samplesmainfig = [r7it+" vs Col-0", r3it+" vs Col-0", rddit+" vs Col-0"]


## Figures: Number of DMRs

In [None]:
samplesjustr3 = [r3it+" > Col-0", r3it+" < Col-0"]
CG_hyper = [len(r3_CG_r), len(r3_CG_o)]
CG_hypo = [len(r3_CG_o)]

CHG_hyper = [len(r3_CHG_r), len(r3_CHG_o)]
CHG_hypo = [len(r3_CHG_o)]

CHH_hyper = [len(r3_CHH_r), len(r3_CHH_o)]
CHH_hypo = [len(r3_CHH_o)]
plot_all_DMRs (CG_hyper, CHG_hyper, CHH_hyper, samplesjustr3, 'number of regions', 1050)
plt.savefig(figurepath+'Just_r3_endo_hyper_DMRs.png', dpi=350)


In [None]:
plot_all_DMRs (CG_hyper, CHG_hyper, CHH_hyper, samples, 'number of hyper-DMRs', 1050)
plt.savefig(figurepath+'endo_hyper_DMRs.pdf', dpi=350)


In [None]:
plot_all_DMRs (CG_hypo, CHG_hypo, CHH_hypo, samples, 'number of hypo-DMRs', 1050)
plt.savefig(figurepath+'endo_hypo_DMRs.pdf', dpi=350)


## Figures: total length in DMRs

In [None]:
plot_all_DMRs (CG_hyper_lenkb, CHG_hyper_lenkb, CHH_hyper_lenkb, samplesmainfig, 'total length in hyper-DMRs (kb)', 145)
plt.savefig(figurepath+'endo_hyperlen_DMRs.pdf', dpi=350)


In [None]:
plot_all_DMRs (CG_hypo_lenkb, CHG_hypo_lenkb, CHH_hypo_lenkb, samplesmainfig, 'total length in hypo-DMRs (kb)', 145)
plt.savefig(figurepath+'endo_hypolen_DMRs.pdf', dpi=350)


In [None]:
#features near endo DMRs
r3_allc=pd.read_csv(dmrpath+"r3_v_wt_allC_hyper_allC.merge.bed", sep=tab, header=None, names=bed_columns)
r3_genes1kb=pd.read_csv(featurepath+"genes_1kb_r3_hyper.bed", sep=tab, header=None, names=windowcol)
r3_TEfrag1kb=pd.read_csv(featurepath+"TE_fragments_1kb_r3_hyper.bed", sep=tab, header=None, names=windowcol)
r3_TEmerge1kb=pd.read_csv(featurepath+"TEmerge_1kb_r3_hyper.bed", sep=tab, header=None, names=windowcol)

r7_allc=pd.read_csv(dmrpath+"r7_v_wt_allC_hyper_allC.merge.bed", sep=tab, header=None, names=bed_columns)
r7_genes1kb=pd.read_csv(featurepath+"genes_1kb_r7_hyper.bed", sep=tab, header=None, names=windowcol)
r7_TEfrag1kb=pd.read_csv(featurepath+"TE_fragments_1kb_r7_hyper.bed", sep=tab, header=None, names=windowcol)
r7_TEmerge1kb=pd.read_csv(featurepath+"TEmerge_1kb_r7_hyper.bed", sep=tab, header=None, names=windowcol)
r3_TEfrag1kb

In [None]:
r3_neither, r3_geneonly, r3_teonly, r3_both=get_feat_count(r3_allc, r3_genes1kb, r3_TEfrag1kb)
r7_neither, r7_geneonly, r7_teonly, r7_both=get_feat_count(r7_allc, r7_genes1kb, r7_TEfrag1kb)


In [None]:
r3_TEfrag1kb.drop_duplicates(["dmr_chr","dmr_start","dmr_end"])

In [None]:
feature_barplot((r3it+" targets", r7it+" targets"), {
    "no feature": np.array([len(r3_allc)-(r3_geneonly+r3_teonly+r3_both), len(r7_allc)-(r7_geneonly+r7_teonly+r7_both)]),
    "gene only": np.array([r3_geneonly, r7_geneonly]),
    "TE fragment only": np.array([r3_teonly, r7_teonly]),
    "gene and TE fragment": np.array([r3_both, r7_both])
}, "number of DMRs", limit=1401)
plt.savefig(figurepath+'endo_allDMRs_features1kb.pdf', dpi=350)

# Supp figure: DMR length histograms

In [None]:
def length_histogram(df, title, context):
    CGc='#0072B2'
    CHGc='#E69F00'
    CHHc='#009E73'

    if context=='CG':
        color=CGc
    if context=='CHG':
        color=CHGc
    if context=='CHH':
        color=CHHc
        
    df['length']=df['end']-df['start']
    
    plt=sns.histplot(data=df, x="length", binwidth=50, color=color)
    plt.set_title(title,fontname="Arial", fontsize=18)
    plt.tick_params(axis='both', which='major', labelsize=16)
    plt.set_xlabel('length', fontname="Arial", fontsize=16);
    plt.set_ylabel('count', fontname="Arial", fontsize=16);

    
    return plt

In [None]:

length_histogram(r3l_CG_r, r3it+" leaf CG hyper DMR length", "CG")
plt.savefig(figurepath+'r3leaf_CGr_histplot.png', dpi=350)


In [None]:
length_histogram(r3l_CHG_r, r3it+" leaf CHG hyper DMR length", "CHG")
plt.savefig(figurepath+'r3leaf_CHGr_histplot.png', dpi=350)

In [None]:

length_histogram(r3l_CHH_r, r3it+" leaf CHH hyper DMR length", "CHH")
plt.savefig(figurepath+'r3leaf_CHHr_histplot.png', dpi=350)

In [None]:


length_histogram(r7l_CG_r, r7it+" leaf CG hyper DMR length", "CG")
plt.savefig(figurepath+'r7leaf_CGr_histplot.png', dpi=350)


In [None]:
length_histogram(r7l_CHG_r, r7it+" leaf CHG hyper DMR length", "CHG")
plt.savefig(figurepath+'r7leaf_CHGr_histplot.png', dpi=350)

In [None]:
length_histogram(r7l_CHH_r, r7it+" leaf CHH hyper DMR length", "CHH")
plt.savefig(figurepath+'r7leaf_CHHr_histplot.png', dpi=350)

In [None]:
length_histogram(r3_CG_r, r3it+" endo CG hyper DMR length", "CG")
plt.savefig(figurepath+'r3endo_CGr_histplot.png', dpi=350)

In [None]:
length_histogram(r3_CHG_r, r3it+" endo CHG hyper DMR length", "CHG")
plt.savefig(figurepath+'r3endo_CHGr_histplot.png', dpi=350)

In [None]:
length_histogram(r3_CHH_r, r3it+" endo CHH hyper DMR length", "CHH")
plt.savefig(figurepath+'r3endo_CHHr_histplot.png', dpi=350)


In [None]:
length_histogram(r7_CG_r, r7it+" endo CG hyper DMR length", "CG")
plt.savefig(figurepath+'r7endo_CGr_histplot.png', dpi=350)


In [None]:
length_histogram(r7_CHG_r, r7it+" endo CHG hyper DMR length", "CHG")
plt.savefig(figurepath+'r7endo_CHGr_histplot.png', dpi=350)


In [None]:
length_histogram(r7_CHH_r, r7it+" endo CHH hyper DMR length", "CHH")
plt.savefig(figurepath+'r7endo_CHHr_histplot.png', dpi=350)


# identify 'hypermethylation-limited' DMRs

In [None]:
sumpath=path+"sumby_features/sumby_dmrs_nonallelic_outputs/"
sumcolumns=['chr','start','end','name','avg_methy','nC']
bigE_columns=['chr','start','end','wt_1_avg_methy','wt_1_nC','wt_2_avg_methy','wt_2_nC','wt_3_avg_methy','wt_3_nC']

In [None]:
def readindata(file):
    sumcolumns=['chr','start','end','name','avg_methy','nC']
    df=pd.read_csv(sumpath+file, header=None, sep=tab, names=sumcolumns)
    df=df.drop(['name'], axis=1)
    return (df)

## CG sumby data

In [None]:
wt_1_CG_sum_r3_CG=readindata('wt_1_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
wt_2_CG_sum_r3_CG=readindata('wt_2_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
wt_3_CG_sum_r3_CG=readindata('wt_3_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')

r3_1_CG_sum_r3_CG=readindata('r3_1_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
r3_2_CG_sum_r3_CG=readindata('r3_2_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
r3_3_CG_sum_r3_CG=readindata('r3_3_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')

r7_1_CG_sum_r3_CG=readindata('r7_1_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
r7_2_CG_sum_r3_CG=readindata('r7_2_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
r7_3_CG_sum_r3_CG=readindata('r7_3_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')


rdd_1_CG_sum_r3_CG=readindata('rdd_1_Col_spiked_CG_min5_sumby_r3_v_wt_CG.DMRs.hyper.bed')
rdd_2_CG_sum_r3_CG=readindata('rdd_2_Col_spiked_CG_min5_sumby_r3_v_wt_CG.DMRs.hyper.bed')
rdd_3_CG_sum_r3_CG=readindata('rdd_3_Col_spiked_CG_min5_sumby_r3_v_wt_CG.DMRs.hyper.bed')


wtl_1_CG_sum_r3_CG=readindata('wt_1_leaf_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
wtl_2_CG_sum_r3_CG=readindata('wt_2_leaf_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
wtl_3_CG_sum_r3_CG=readindata('wt_3_leaf_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')

r3l_1_CG_sum_r3_CG=readindata('r3_1_leaf_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
r3l_2_CG_sum_r3_CG=readindata('r3_2_leaf_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
r3l_3_CG_sum_r3_CG=readindata('r3_3_leaf_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')

r7l_1_CG_sum_r3_CG=readindata('r7_1_leaf_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
r7l_2_CG_sum_r3_CG=readindata('r7_2_leaf_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')
r7l_3_CG_sum_r3_CG=readindata('r7_3_leaf_Col_spiked_CG_sumby_r3_v_wt_CG.DMRs.hyper.bed')


In [None]:
wt_1_CG_sum_r7_CG=readindata('wt_1_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
wt_2_CG_sum_r7_CG=readindata('wt_2_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
wt_3_CG_sum_r7_CG=readindata('wt_3_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')

r3_1_CG_sum_r7_CG=readindata('r3_1_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
r3_2_CG_sum_r7_CG=readindata('r3_2_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
r3_3_CG_sum_r7_CG=readindata('r3_3_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')

r7_1_CG_sum_r7_CG=readindata('r7_1_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
r7_2_CG_sum_r7_CG=readindata('r7_2_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
r7_3_CG_sum_r7_CG=readindata('r7_3_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')

rdd_1_CG_sum_r7_CG=readindata('rdd_1_Col_spiked_CG_min5_sumby_r7_v_wt_CG.DMRs.hyper.bed')
rdd_2_CG_sum_r7_CG=readindata('rdd_2_Col_spiked_CG_min5_sumby_r7_v_wt_CG.DMRs.hyper.bed')
rdd_3_CG_sum_r7_CG=readindata('rdd_3_Col_spiked_CG_min5_sumby_r7_v_wt_CG.DMRs.hyper.bed')

wtl_1_CG_sum_r7_CG=readindata('wt_1_leaf_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
wtl_2_CG_sum_r7_CG=readindata('wt_2_leaf_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
wtl_3_CG_sum_r7_CG=readindata('wt_3_leaf_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')

r3l_1_CG_sum_r7_CG=readindata('r3_1_leaf_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
r3l_2_CG_sum_r7_CG=readindata('r3_2_leaf_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
r3l_3_CG_sum_r7_CG=readindata('r3_3_leaf_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')

r7l_1_CG_sum_r7_CG=readindata('r7_1_leaf_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
r7l_2_CG_sum_r7_CG=readindata('r7_2_leaf_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')
r7l_3_CG_sum_r7_CG=readindata('r7_3_leaf_Col_spiked_CG_sumby_r7_v_wt_CG.DMRs.hyper.bed')


## make big dataframe

In [None]:
def process_endo_leaf_sum (wt1,wt2,wt3,r71,r72,r73,r31,r32,r33,rdd1,rdd2,rdd3,wtl1,wtl2,wtl3,r7l1,r7l2,r7l3,r3l1,r3l2,r3l3, feature_df):
   
    wtoutdf = wt1.merge(wt2, on=['chr','start','end'],how ='inner').merge(wt3,on=['chr','start','end'],how ='inner')
    wtoutdf.columns=bigE_columns
    
    r7outdf = r71.merge(r72, on=['chr','start','end'],how ='inner').merge(r73,on=['chr','start','end'],how ='inner')
    r7outdf.columns=['chr','start','end','r7_1_avg_methy','r7_1_nC','r7_2_avg_methy','r7_2_nC','r7_3_avg_methy','r7_3_nC']
    
    r3outdf = r31.merge(r32, on=['chr','start','end'],how ='inner').merge(r33,on=['chr','start','end'],how ='inner')
    r3outdf.columns=['chr','start','end','r3_1_avg_methy','r3_1_nC','r3_2_avg_methy','r3_2_nC','r3_3_avg_methy','r3_3_nC']
    
    rddoutdf = rdd1.merge(rdd2, on=['chr','start','end'],how ='inner').merge(rdd3,on=['chr','start','end'],how ='inner')
    rddoutdf.columns=['chr','start','end','rdd_1_avg_methy','rdd_1_nC','rdd_2_avg_methy','rdd_2_nC','rdd_3_avg_methy','rdd_3_nC']
    
    wtloutdf = wtl1.merge(wtl2, on=['chr','start','end'],how ='inner').merge(wtl3,on=['chr','start','end'],how ='inner')
    wtloutdf.columns=['chr','start','end','wtl_1_avg_methy','wtl_1_nC','wtl_2_avg_methy','wtl_2_nC','wtl_3_avg_methy','wtl_3_nC']
    
    r7loutdf = r7l1.merge(r7l2, on=['chr','start','end'],how ='inner').merge(r7l3,on=['chr','start','end'],how ='inner')
    r7loutdf.columns=['chr','start','end','r7l_1_avg_methy','r7l_1_nC','r7l_2_avg_methy','r7l_2_nC','r7l_3_avg_methy','r7l_3_nC']
    
    r3loutdf = r3l1.merge(r3l2, on=['chr','start','end'],how ='inner').merge(r3l3,on=['chr','start','end'],how ='inner')
    r3loutdf.columns=['chr','start','end','r3l_1_avg_methy','r3l_1_nC','r3l_2_avg_methy','r3l_2_nC','r3l_3_avg_methy','r3l_3_nC']    
    
    columns = ['chr','start','end']
    
    Eoutdf=wtoutdf.merge(r7outdf, on=columns,how='inner').merge(r3outdf, on=columns,how='inner').merge(rddoutdf, on=columns,how='inner')
    Loutdf=wtloutdf.merge(r7loutdf, on=columns,how='inner').merge(r3loutdf, on=columns,how='inner')
    
    ELoutdf=Eoutdf.merge(Loutdf, on=columns, how='inner')
    ELoutdf['wt_avg_methy']=(ELoutdf['wt_1_avg_methy']+ELoutdf['wt_2_avg_methy']+ELoutdf['wt_3_avg_methy'])/3
    ELoutdf['r3_avg_methy']=(ELoutdf['r3_1_avg_methy']+ELoutdf['r3_2_avg_methy']+ELoutdf['r3_3_avg_methy'])/3
    ELoutdf['r7_avg_methy']=(ELoutdf['r7_1_avg_methy']+ELoutdf['r7_2_avg_methy']+ELoutdf['r7_3_avg_methy'])/3
    ELoutdf['rdd_avg_methy']=(ELoutdf['rdd_1_avg_methy']+ELoutdf['rdd_2_avg_methy']+ELoutdf['rdd_3_avg_methy'])/3

    ELoutdf['wtl_avg_methy']=(ELoutdf['wtl_1_avg_methy']+ELoutdf['wtl_2_avg_methy']+ELoutdf['wtl_3_avg_methy'])/3
    ELoutdf['r3l_avg_methy']=(ELoutdf['r3l_1_avg_methy']+ELoutdf['r3l_2_avg_methy']+ELoutdf['r3l_3_avg_methy'])/3
    ELoutdf['r7l_avg_methy']=(ELoutdf['r7l_1_avg_methy']+ELoutdf['r7l_2_avg_methy']+ELoutdf['r7l_3_avg_methy'])/3

    justavg=pd.DataFrame()
    justavg['wt_avg_methy']=(ELoutdf['wt_1_avg_methy']+ELoutdf['wt_2_avg_methy']+ELoutdf['wt_3_avg_methy'])/3
    justavg['r3_avg_methy']=(ELoutdf['r3_1_avg_methy']+ELoutdf['r3_2_avg_methy']+ELoutdf['r3_3_avg_methy'])/3
    justavg['r7_avg_methy']=(ELoutdf['r7_1_avg_methy']+ELoutdf['r7_2_avg_methy']+ELoutdf['r7_3_avg_methy'])/3
    justavg['wtl_avg_methy']=(ELoutdf['wtl_1_avg_methy']+ELoutdf['wtl_2_avg_methy']+ELoutdf['wtl_3_avg_methy'])/3
    justavg['r3l_avg_methy']=(ELoutdf['r3l_1_avg_methy']+ELoutdf['r3l_2_avg_methy']+ELoutdf['r3l_3_avg_methy'])/3
    justavg['r7l_avg_methy']=(ELoutdf['r7l_1_avg_methy']+ELoutdf['r7l_2_avg_methy']+ELoutdf['r7l_3_avg_methy'])/3

    if feature_df is not None:
        finaldf=feature_df.merge(ELoutdf, on=['chr','start','end'], how='left') 
        finaldf.drop(labels='feature', axis=1, inplace=True)
    
        return finaldf
    else:
        return ELoutdf


In [None]:
r3_CGr_endo_leaf=process_endo_leaf_sum (wt_1_CG_sum_r3_CG,wt_2_CG_sum_r3_CG,wt_3_CG_sum_r3_CG,
                       r7_1_CG_sum_r3_CG,r7_2_CG_sum_r3_CG,r7_3_CG_sum_r3_CG,
                       r3_1_CG_sum_r3_CG,r3_2_CG_sum_r3_CG,r3_3_CG_sum_r3_CG,
                       rdd_1_CG_sum_r3_CG,rdd_2_CG_sum_r3_CG,rdd_3_CG_sum_r3_CG,
                       wtl_1_CG_sum_r3_CG,wtl_2_CG_sum_r3_CG,wtl_3_CG_sum_r3_CG,
                       r7l_1_CG_sum_r3_CG,r7l_2_CG_sum_r3_CG,r7l_3_CG_sum_r3_CG,
                       r3l_1_CG_sum_r3_CG,r3l_2_CG_sum_r3_CG,r3l_3_CG_sum_r3_CG, feature_df=None)

In [None]:
r7_CGr_endo_leaf=process_endo_leaf_sum (wt_1_CG_sum_r7_CG,wt_2_CG_sum_r7_CG,wt_3_CG_sum_r7_CG,
                       r7_1_CG_sum_r7_CG,r7_2_CG_sum_r7_CG,r7_3_CG_sum_r7_CG,
                       r3_1_CG_sum_r7_CG,r3_2_CG_sum_r7_CG,r3_3_CG_sum_r7_CG,
                       rdd_1_CG_sum_r7_CG,rdd_2_CG_sum_r7_CG,rdd_3_CG_sum_r7_CG,
                       wtl_1_CG_sum_r7_CG,wtl_2_CG_sum_r7_CG,wtl_3_CG_sum_r7_CG,
                       r7l_1_CG_sum_r7_CG,r7l_2_CG_sum_r7_CG,r7l_3_CG_sum_r7_CG,
                       r3l_1_CG_sum_r7_CG,r3l_2_CG_sum_r7_CG,r3l_3_CG_sum_r7_CG, feature_df=None)

## define regions by threshold

In [None]:
#define capped regions
r3_CGr_cap50=r3_CGr_endo_leaf[(r3_CGr_endo_leaf['r3_avg_methy']/r3_CGr_endo_leaf['r3l_avg_methy']<=(0.50))]

r7_CGr_cap50=r7_CGr_endo_leaf[(r7_CGr_endo_leaf['r7_avg_methy']/r7_CGr_endo_leaf['r7l_avg_methy']<=(0.50))]

In [None]:
#identify not capped regions
r3_CGr_endo_leaf_indicate=r3_CGr_endo_leaf.merge(r3_CGr_cap50, how="outer", indicator=True)
r3_CGr_endo_leaf_notcap50=r3_CGr_endo_leaf_indicate[r3_CGr_endo_leaf_indicate["_merge"]=="left_only"].drop(labels="_merge", axis=1)

r7_CGr_endo_leaf_indicate=r7_CGr_endo_leaf.merge(r7_CGr_cap50, how="outer", indicator=True)
r7_CGr_endo_leaf_notcap50=r7_CGr_endo_leaf_indicate[r7_CGr_endo_leaf_indicate["_merge"]=="left_only"].drop(labels="_merge", axis=1)

In [None]:
def make_bed(df, path):
    bed=pd.DataFrame()
    bed['chr']=df['chr']
    bed['start']=df['start']
    bed['end']=df['end']
    
    if path is not None:
        bed.to_csv(path, sep="\t", index=None, header=None)
        print('Bed file saved here: '+path)
        return bed
    else:
        return bed

In [None]:
cappath=dmrpath+"hypermethylation_limited/"

In [None]:
r3_CGr_cap50_bed=make_bed(r3_CGr_cap50, cappath+"r3_CGr_limit50.bed")
r3_CGr_endo_leaf_notcap50_bed=make_bed(r3_CGr_endo_leaf_notcap50, cappath+"r3_CGr_notlimit50.bed")

r7_CGr_cap50_bed=make_bed(r7_CGr_cap50, cappath+"r7_CGr_limit50.bed")
r7_CGr_endo_leaf_notcap50_bed=make_bed(r7_CGr_endo_leaf_notcap50, cappath+"r7_CGr_notlimit50.bed")


## gene/TE enrichment in limited CG DMRs

In [None]:
#compcap=cappath+"make_comparisons/"
dmr_bed=['chr','start','end']
r3_CGr_cap50_int_gene=pd.read_csv(featurepath+"r3CGr_int_genes_limit50.bed", header=None, sep=tab, names=dmr_bed)
r3_CGr_notcap50_int_gene=pd.read_csv(featurepath+"r3CGr_int_genes_notlimit50.bed", header=None, sep=tab, names=dmr_bed)

r3_CGr_cap50_int_TEmerge=pd.read_csv(featurepath+"r3CGr_int_TEmerge_limit50.bed", header=None, sep=tab, names=dmr_bed)
r3_CGr_notcap50_int_TEmerge=pd.read_csv(featurepath+"r3CGr_int_TEmerge_notlimit50.bed", header=None, sep=tab, names=dmr_bed)

r7_CGr_cap50_int_gene=pd.read_csv(featurepath+"r7CGr_int_genes_limit50.bed", header=None, sep=tab, names=dmr_bed)
r7_CGr_notcap50_int_gene=pd.read_csv(featurepath+"r7CGr_int_genes_notlimit50.bed", header=None, sep=tab, names=dmr_bed)

r7_CGr_cap50_int_TEmerge=pd.read_csv(featurepath+"r7CGr_int_TEmerge_limit50.bed", header=None, sep=tab, names=dmr_bed)
r7_CGr_notcap50_int_TEmerge=pd.read_csv(featurepath+"r7CGr_int_TEmerge_notlimit50.bed", header=None, sep=tab, names=dmr_bed)


In [None]:
gbm_pie=['gene body', 'intergenic']
temerge_pie=['in TE', 'not in TE']

In [None]:
#stacked barplot of features near regions of interest
def feature_pie(labels, sizes, title="", colors=['white','lightgray']):
    #style
    sns.set_style('white')
    
    
    fig, ax = plt.subplots()
    ax.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors, 
           wedgeprops = {"edgecolor" : "black", 
                      'linewidth': 2, 
                      'antialiased': True}, textprops={'fontsize': 24})
    ax.set_title(title, fontsize=24)

    
    plt.tight_layout()


In [None]:
feature_pie(labels=gbm_pie, sizes=[len(r3_CGr_cap50_int_gene), len(r3_CGr_cap50_bed)-len(r3_CGr_cap50_int_gene)], title="hyper-mCG limited")
plt.savefig(figurepath+'r3_CGr_cap50_GBMpie.pdf', dpi=350)

In [None]:
feature_pie(labels=gbm_pie, sizes=[len(r3_CGr_notcap50_int_gene), len(r3_CGr_endo_leaf_notcap50_bed)-len(r3_CGr_notcap50_int_gene)], title="not hyper-mCG limited")
plt.savefig(figurepath+'r3_CGr_notcap50_GBMpie.pdf', dpi=350)

In [None]:
feature_pie(labels=temerge_pie, sizes=[len(r3_CGr_cap50_int_TEmerge), len(r3_CGr_cap50_bed)-len(r3_CGr_cap50_int_TEmerge)], title=r3it+" hyper-mCG limited regions")
plt.savefig(figurepath+'r3_CGr_cap50_TEmerge_pie.pdf', dpi=350)

In [None]:
feature_pie(labels=temerge_pie, sizes=[len(r3_CGr_notcap50_int_TEmerge), len(r3_CGr_endo_leaf_notcap50_bed)-len(r3_CGr_notcap50_int_TEmerge)], title=r3it+" hyper-mCG limited regions")
plt.savefig(figurepath+'r3_CGr_notcap50_TEmerge_pie.pdf', dpi=350)

In [None]:
feature_pie(labels=gbm_pie, sizes=[len(r7_CGr_cap50_int_gene), len(r7_CGr_cap50_bed)-len(r7_CGr_cap50_int_gene)], title=r7it+" hyper-mCG limited regions")
plt.savefig(figurepath+'r7_CGr_cap50_GBMpie.pdf', dpi=350)

In [None]:
feature_pie(labels=gbm_pie, sizes=[len(r7_CGr_notcap50_int_gene), len(r7_CGr_endo_leaf_notcap50_bed)-len(r7_CGr_notcap50_int_gene)], title=r7it+" not hyper-mCG limited regions")
plt.savefig(figurepath+'r7_CGr_notcap50_GBMpie.pdf', dpi=350)

In [None]:
feature_pie(labels=temerge_pie, sizes=[len(r7_CGr_cap50_int_TEmerge), len(r7_CGr_cap50_bed)-len(r7_CGr_cap50_int_TEmerge)], title=r7it+" hyper-mCG limited regions")
plt.savefig(figurepath+'r7_CGr_cap50_TEmerge_pie.pdf', dpi=350)

In [None]:
feature_pie(labels=temerge_pie, sizes=[len(r7_CGr_notcap50_int_TEmerge), len(r7_CGr_endo_leaf_notcap50_bed)-len(r7_CGr_notcap50_int_TEmerge)], title=r7it+" hyper-mCG limited regions")
plt.savefig(figurepath+'r7_CGr_notcap50_TEmerge_pie.pdf', dpi=350)

# mC sumby experiment 1 DMRs plotting

## sperm sumby

In [None]:
wt_sc_1_CG_sum_r3_CG=readindata('wt_sc_1_all_CpG_min5_sumby_r3_v_wt_CG.DMRs.hyper.bed')
wt_sc_2_CG_sum_r3_CG=readindata('wt_sc_2_all_CpG_min5_sumby_r3_v_wt_CG.DMRs.hyper.bed')

r3_sc_1_CG_sum_r3_CG=readindata('ros1_sc_1_all_CpG_min5_sumby_r3_v_wt_CG.DMRs.hyper.bed')
r3_sc_2_CG_sum_r3_CG=readindata('ros1_sc_2_all_CpG_min5_sumby_r3_v_wt_CG.DMRs.hyper.bed')

wt_sc_1_CHG_sum_r3_CHG=readindata('wt_sc_1_all_CHG_min5_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
wt_sc_2_CHG_sum_r3_CHG=readindata('wt_sc_2_all_CHG_min5_sumby_r3_v_wt_CHG.DMRs.hyper.bed')

r3_sc_1_CHG_sum_r3_CHG=readindata('ros1_sc_1_all_CHG_min5_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
r3_sc_2_CHG_sum_r3_CHG=readindata('ros1_sc_2_all_CHG_min5_sumby_r3_v_wt_CHG.DMRs.hyper.bed')

wt_sc_1_CHH_sum_r3_CHH=readindata('wt_sc_1_all_CHH_min5_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
wt_sc_2_CHH_sum_r3_CHH=readindata('wt_sc_2_all_CHH_min5_sumby_r3_v_wt_CHH.DMRs.hyper.bed')

r3_sc_1_CHH_sum_r3_CHH=readindata('ros1_sc_1_all_CHH_min5_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
r3_sc_2_CHH_sum_r3_CHH=readindata('ros1_sc_2_all_CHH_min5_sumby_r3_v_wt_CHH.DMRs.hyper.bed')


In [None]:
def process_endo_leaf_sperm_sum (wt1,wt2,wt3,r71,r72,r73,r31,r32,r33,rdd1,rdd2,rdd3,wtl1,wtl2,wtl3,r7l1,r7l2,r7l3,r3l1,r3l2,r3l3,wtsc1,wtsc2,r3sc1,r3sc2):
   
    wtoutdf = wt1.merge(wt2, on=['chr','start','end'],how ='inner').merge(wt3,on=['chr','start','end'],how ='inner')
    wtoutdf.columns=bigE_columns
    
    r7outdf = r71.merge(r72, on=['chr','start','end'],how ='inner').merge(r73,on=['chr','start','end'],how ='inner')
    r7outdf.columns=['chr','start','end','r7_1_avg_methy','r7_1_nC','r7_2_avg_methy','r7_2_nC','r7_3_avg_methy','r7_3_nC']
    
    r3outdf = r31.merge(r32, on=['chr','start','end'],how ='inner').merge(r33,on=['chr','start','end'],how ='inner')
    r3outdf.columns=['chr','start','end','r3_1_avg_methy','r3_1_nC','r3_2_avg_methy','r3_2_nC','r3_3_avg_methy','r3_3_nC']
    
    rddoutdf = rdd1.merge(rdd2, on=['chr','start','end'],how ='inner').merge(rdd3,on=['chr','start','end'],how ='inner')
    rddoutdf.columns=['chr','start','end','rdd_1_avg_methy','rdd_1_nC','rdd_2_avg_methy','rdd_2_nC','rdd_3_avg_methy','rdd_3_nC']
    
    wtloutdf = wtl1.merge(wtl2, on=['chr','start','end'],how ='inner').merge(wtl3,on=['chr','start','end'],how ='inner')
    wtloutdf.columns=['chr','start','end','wtl_1_avg_methy','wtl_1_nC','wtl_2_avg_methy','wtl_2_nC','wtl_3_avg_methy','wtl_3_nC']
    
    r7loutdf = r7l1.merge(r7l2, on=['chr','start','end'],how ='inner').merge(r7l3,on=['chr','start','end'],how ='inner')
    r7loutdf.columns=['chr','start','end','r7l_1_avg_methy','r7l_1_nC','r7l_2_avg_methy','r7l_2_nC','r7l_3_avg_methy','r7l_3_nC']
    
    r3loutdf = r3l1.merge(r3l2, on=['chr','start','end'],how ='inner').merge(r3l3,on=['chr','start','end'],how ='inner')
    r3loutdf.columns=['chr','start','end','r3l_1_avg_methy','r3l_1_nC','r3l_2_avg_methy','r3l_2_nC','r3l_3_avg_methy','r3l_3_nC']    

    wtscoutdf = wtsc1.merge(wtsc2, on=['chr','start','end'],how ='inner')
    wtscoutdf.columns=['chr','start','end','wtsc_1_avg_methy','wtsc_1_nC','wtsc_2_avg_methy','wtsc_2_nC']

    r3scoutdf = r3sc1.merge(r3sc2, on=['chr','start','end'],how ='inner')
    r3scoutdf.columns=['chr','start','end','r3sc_1_avg_methy','r3sc_1_nC','r3sc_2_avg_methy','r3sc_2_nC']

    columns = ['chr','start','end']
    
    Eoutdf=wtoutdf.merge(r7outdf, on=columns,how='inner').merge(r3outdf, on=columns,how='inner').merge(rddoutdf, on=columns,how='inner')
    Loutdf=wtloutdf.merge(r7loutdf, on=columns,how='inner').merge(r3loutdf, on=columns,how='inner')
    Soutdf=wtscoutdf.merge(r3scoutdf, on=columns,how='inner')
    
    ELSoutdf=Eoutdf.merge(Loutdf, on=columns, how='inner').merge(Soutdf, on=columns, how='inner')
    return ELSoutdf


In [None]:
r3_CGr_endo_leaf_sperm=process_endo_leaf_sperm_sum (wt_1_CG_sum_r3_CG,wt_2_CG_sum_r3_CG,wt_3_CG_sum_r3_CG,
                       r7_1_CG_sum_r3_CG,r7_2_CG_sum_r3_CG,r7_3_CG_sum_r3_CG,
                       r3_1_CG_sum_r3_CG,r3_2_CG_sum_r3_CG,r3_3_CG_sum_r3_CG,
                       rdd_1_CG_sum_r3_CG,rdd_2_CG_sum_r3_CG,rdd_3_CG_sum_r3_CG,
                       wtl_1_CG_sum_r3_CG,wtl_2_CG_sum_r3_CG,wtl_3_CG_sum_r3_CG,
                       r7l_1_CG_sum_r3_CG,r7l_2_CG_sum_r3_CG,r7l_3_CG_sum_r3_CG,
                       r3l_1_CG_sum_r3_CG,r3l_2_CG_sum_r3_CG,r3l_3_CG_sum_r3_CG,
                                        wt_sc_1_CG_sum_r3_CG,wt_sc_2_CG_sum_r3_CG,
                                        r3_sc_1_CG_sum_r3_CG,r3_sc_2_CG_sum_r3_CG)

## non-CG sumby data

### bulk sumby bulk

In [None]:
wt_1_CHG_sum_r3_CHG=readindata('wt_1_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
wt_2_CHG_sum_r3_CHG=readindata('wt_2_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
wt_3_CHG_sum_r3_CHG=readindata('wt_3_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')

r3_1_CHG_sum_r3_CHG=readindata('r3_1_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
r3_2_CHG_sum_r3_CHG=readindata('r3_2_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
r3_3_CHG_sum_r3_CHG=readindata('r3_3_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')

r7_1_CHG_sum_r3_CHG=readindata('r7_1_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
r7_2_CHG_sum_r3_CHG=readindata('r7_2_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
r7_3_CHG_sum_r3_CHG=readindata('r7_3_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')

rdd_1_CHG_sum_r3_CHG=readindata('rdd_1_Col_spiked_CHG_min5_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
rdd_2_CHG_sum_r3_CHG=readindata('rdd_2_Col_spiked_CHG_min5_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
rdd_3_CHG_sum_r3_CHG=readindata('rdd_3_Col_spiked_CHG_min5_sumby_r3_v_wt_CHG.DMRs.hyper.bed')

wtl_1_CHG_sum_r3_CHG=readindata('wt_1_leaf_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
wtl_2_CHG_sum_r3_CHG=readindata('wt_2_leaf_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
wtl_3_CHG_sum_r3_CHG=readindata('wt_3_leaf_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')

r3l_1_CHG_sum_r3_CHG=readindata('r3_1_leaf_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
r3l_2_CHG_sum_r3_CHG=readindata('r3_2_leaf_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
r3l_3_CHG_sum_r3_CHG=readindata('r3_3_leaf_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')

r7l_1_CHG_sum_r3_CHG=readindata('r7_1_leaf_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
r7l_2_CHG_sum_r3_CHG=readindata('r7_2_leaf_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')
r7l_3_CHG_sum_r3_CHG=readindata('r7_3_leaf_Col_spiked_CHG_sumby_r3_v_wt_CHG.DMRs.hyper.bed')


In [None]:
wt_1_CHG_sum_r7_CHG=readindata('wt_1_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
wt_2_CHG_sum_r7_CHG=readindata('wt_2_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
wt_3_CHG_sum_r7_CHG=readindata('wt_3_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')

r3_1_CHG_sum_r7_CHG=readindata('r3_1_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
r3_2_CHG_sum_r7_CHG=readindata('r3_2_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
r3_3_CHG_sum_r7_CHG=readindata('r3_3_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')

r7_1_CHG_sum_r7_CHG=readindata('r7_1_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
r7_2_CHG_sum_r7_CHG=readindata('r7_2_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
r7_3_CHG_sum_r7_CHG=readindata('r7_3_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')


rdd_1_CHG_sum_r7_CHG=readindata('rdd_1_Col_spiked_CHG_min5_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
rdd_2_CHG_sum_r7_CHG=readindata('rdd_2_Col_spiked_CHG_min5_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
rdd_3_CHG_sum_r7_CHG=readindata('rdd_3_Col_spiked_CHG_min5_sumby_r7_v_wt_CHG.DMRs.hyper.bed')


wtl_1_CHG_sum_r7_CHG=readindata('wt_1_leaf_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
wtl_2_CHG_sum_r7_CHG=readindata('wt_2_leaf_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
wtl_3_CHG_sum_r7_CHG=readindata('wt_3_leaf_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')

r3l_1_CHG_sum_r7_CHG=readindata('r3_1_leaf_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
r3l_2_CHG_sum_r7_CHG=readindata('r3_2_leaf_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
r3l_3_CHG_sum_r7_CHG=readindata('r3_3_leaf_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')

r7l_1_CHG_sum_r7_CHG=readindata('r7_1_leaf_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
r7l_2_CHG_sum_r7_CHG=readindata('r7_2_leaf_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')
r7l_3_CHG_sum_r7_CHG=readindata('r7_3_leaf_Col_spiked_CHG_sumby_r7_v_wt_CHG.DMRs.hyper.bed')


In [None]:
wt_1_CHH_sum_r3_CHH=readindata('wt_1_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
wt_2_CHH_sum_r3_CHH=readindata('wt_2_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
wt_3_CHH_sum_r3_CHH=readindata('wt_3_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')

r3_1_CHH_sum_r3_CHH=readindata('r3_1_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
r3_2_CHH_sum_r3_CHH=readindata('r3_2_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
r3_3_CHH_sum_r3_CHH=readindata('r3_3_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')

r7_1_CHH_sum_r3_CHH=readindata('r7_1_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
r7_2_CHH_sum_r3_CHH=readindata('r7_2_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
r7_3_CHH_sum_r3_CHH=readindata('r7_3_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')


rdd_1_CHH_sum_r3_CHH=readindata('rdd_1_Col_spiked_CHH_min5_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
rdd_2_CHH_sum_r3_CHH=readindata('rdd_2_Col_spiked_CHH_min5_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
rdd_3_CHH_sum_r3_CHH=readindata('rdd_3_Col_spiked_CHH_min5_sumby_r3_v_wt_CHH.DMRs.hyper.bed')

wtl_1_CHH_sum_r3_CHH=readindata('wt_1_leaf_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
wtl_2_CHH_sum_r3_CHH=readindata('wt_2_leaf_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
wtl_3_CHH_sum_r3_CHH=readindata('wt_3_leaf_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')

r3l_1_CHH_sum_r3_CHH=readindata('r3_1_leaf_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
r3l_2_CHH_sum_r3_CHH=readindata('r3_2_leaf_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
r3l_3_CHH_sum_r3_CHH=readindata('r3_3_leaf_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')

r7l_1_CHH_sum_r3_CHH=readindata('r7_1_leaf_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
r7l_2_CHH_sum_r3_CHH=readindata('r7_2_leaf_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')
r7l_3_CHH_sum_r3_CHH=readindata('r7_3_leaf_Col_spiked_CHH_sumby_r3_v_wt_CHH.DMRs.hyper.bed')


In [None]:
wt_1_CHH_sum_r7_CHH=readindata('wt_1_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
wt_2_CHH_sum_r7_CHH=readindata('wt_2_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
wt_3_CHH_sum_r7_CHH=readindata('wt_3_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')

r3_1_CHH_sum_r7_CHH=readindata('r3_1_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
r3_2_CHH_sum_r7_CHH=readindata('r3_2_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
r3_3_CHH_sum_r7_CHH=readindata('r3_3_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')

r7_1_CHH_sum_r7_CHH=readindata('r7_1_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
r7_2_CHH_sum_r7_CHH=readindata('r7_2_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
r7_3_CHH_sum_r7_CHH=readindata('r7_3_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')

rdd_1_CHH_sum_r7_CHH=readindata('rdd_1_Col_spiked_CHH_min5_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
rdd_2_CHH_sum_r7_CHH=readindata('rdd_2_Col_spiked_CHH_min5_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
rdd_3_CHH_sum_r7_CHH=readindata('rdd_3_Col_spiked_CHH_min5_sumby_r7_v_wt_CHH.DMRs.hyper.bed')

wtl_1_CHH_sum_r7_CHH=readindata('wt_1_leaf_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
wtl_2_CHH_sum_r7_CHH=readindata('wt_2_leaf_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
wtl_3_CHH_sum_r7_CHH=readindata('wt_3_leaf_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')

r3l_1_CHH_sum_r7_CHH=readindata('r3_1_leaf_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
r3l_2_CHH_sum_r7_CHH=readindata('r3_2_leaf_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
r3l_3_CHH_sum_r7_CHH=readindata('r3_3_leaf_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')

r7l_1_CHH_sum_r7_CHH=readindata('r7_1_leaf_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
r7l_2_CHH_sum_r7_CHH=readindata('r7_2_leaf_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')
r7l_3_CHH_sum_r7_CHH=readindata('r7_3_leaf_Col_spiked_CHH_sumby_r7_v_wt_CHH.DMRs.hyper.bed')


## nonCG big dataframes

In [None]:
r3_CHGr_endo_leaf_sperm=process_endo_leaf_sperm_sum (wt_1_CHG_sum_r3_CHG,wt_2_CHG_sum_r3_CHG,wt_3_CHG_sum_r3_CHG,
                       r7_1_CHG_sum_r3_CHG,r7_2_CHG_sum_r3_CHG,r7_3_CHG_sum_r3_CHG,
                       r3_1_CHG_sum_r3_CHG,r3_2_CHG_sum_r3_CHG,r3_3_CHG_sum_r3_CHG,
                       rdd_1_CHG_sum_r3_CHG,rdd_2_CHG_sum_r3_CHG,rdd_3_CHG_sum_r3_CHG, 
                       wtl_1_CHG_sum_r3_CHG,wtl_2_CHG_sum_r3_CHG,wtl_3_CHG_sum_r3_CHG,
                       r7l_1_CHG_sum_r3_CHG,r7l_2_CHG_sum_r3_CHG,r7l_3_CHG_sum_r3_CHG,
                       r3l_1_CHG_sum_r3_CHG,r3l_2_CHG_sum_r3_CHG,r3l_3_CHG_sum_r3_CHG,
                                        wt_sc_1_CHG_sum_r3_CHG,wt_sc_2_CHG_sum_r3_CHG,
                                        r3_sc_1_CHG_sum_r3_CHG,r3_sc_2_CHG_sum_r3_CHG)

In [None]:
r3_CHHr_endo_leaf_sperm=process_endo_leaf_sperm_sum (wt_1_CHH_sum_r3_CHH,wt_2_CHH_sum_r3_CHH,wt_3_CHH_sum_r3_CHH,
                       r7_1_CHH_sum_r3_CHH,r7_2_CHH_sum_r3_CHH,r7_3_CHH_sum_r3_CHH,
                       r3_1_CHH_sum_r3_CHH,r3_2_CHH_sum_r3_CHH,r3_3_CHH_sum_r3_CHH,
                       rdd_1_CHH_sum_r3_CHH,rdd_2_CHH_sum_r3_CHH,rdd_3_CHH_sum_r3_CHH,
                       wtl_1_CHH_sum_r3_CHH,wtl_2_CHH_sum_r3_CHH,wtl_3_CHH_sum_r3_CHH,
                       r7l_1_CHH_sum_r3_CHH,r7l_2_CHH_sum_r3_CHH,r7l_3_CHH_sum_r3_CHH,
                       r3l_1_CHH_sum_r3_CHH,r3l_2_CHH_sum_r3_CHH,r3l_3_CHH_sum_r3_CHH,
                                        wt_sc_1_CHH_sum_r3_CHH,wt_sc_2_CHH_sum_r3_CHH,
                                        r3_sc_1_CHH_sum_r3_CHH,r3_sc_2_CHH_sum_r3_CHH)

In [None]:
r3_CHGr_endo_leaf=process_endo_leaf_sum (wt_1_CHG_sum_r3_CHG,wt_2_CHG_sum_r3_CHG,wt_3_CHG_sum_r3_CHG,
                       r7_1_CHG_sum_r3_CHG,r7_2_CHG_sum_r3_CHG,r7_3_CHG_sum_r3_CHG,
                       r3_1_CHG_sum_r3_CHG,r3_2_CHG_sum_r3_CHG,r3_3_CHG_sum_r3_CHG,
                       rdd_1_CHG_sum_r3_CHG,rdd_2_CHG_sum_r3_CHG,rdd_3_CHG_sum_r3_CHG, 
                       wtl_1_CHG_sum_r3_CHG,wtl_2_CHG_sum_r3_CHG,wtl_3_CHG_sum_r3_CHG,
                       r7l_1_CHG_sum_r3_CHG,r7l_2_CHG_sum_r3_CHG,r7l_3_CHG_sum_r3_CHG,
                       r3l_1_CHG_sum_r3_CHG,r3l_2_CHG_sum_r3_CHG,r3l_3_CHG_sum_r3_CHG, feature_df=None)

In [None]:
r3_CHHr_endo_leaf=process_endo_leaf_sum (wt_1_CHH_sum_r3_CHH,wt_2_CHH_sum_r3_CHH,wt_3_CHH_sum_r3_CHH,
                       r7_1_CHH_sum_r3_CHH,r7_2_CHH_sum_r3_CHH,r7_3_CHH_sum_r3_CHH,
                       r3_1_CHH_sum_r3_CHH,r3_2_CHH_sum_r3_CHH,r3_3_CHH_sum_r3_CHH,
                       rdd_1_CHH_sum_r3_CHH,rdd_2_CHH_sum_r3_CHH,rdd_3_CHH_sum_r3_CHH,
                       wtl_1_CHH_sum_r3_CHH,wtl_2_CHH_sum_r3_CHH,wtl_3_CHH_sum_r3_CHH,
                       r7l_1_CHH_sum_r3_CHH,r7l_2_CHH_sum_r3_CHH,r7l_3_CHH_sum_r3_CHH,
                       r3l_1_CHH_sum_r3_CHH,r3l_2_CHH_sum_r3_CHH,r3l_3_CHH_sum_r3_CHH, feature_df=None)

In [None]:
r7_CHGr_endo_leaf=process_endo_leaf_sum (wt_1_CHG_sum_r7_CHG,wt_2_CHG_sum_r7_CHG,wt_3_CHG_sum_r7_CHG,
                       r7_1_CHG_sum_r7_CHG,r7_2_CHG_sum_r7_CHG,r7_3_CHG_sum_r7_CHG,
                       r3_1_CHG_sum_r7_CHG,r3_2_CHG_sum_r7_CHG,r3_3_CHG_sum_r7_CHG,
                       rdd_1_CHG_sum_r7_CHG,rdd_2_CHG_sum_r7_CHG,rdd_3_CHG_sum_r7_CHG, 
                       wtl_1_CHG_sum_r7_CHG,wtl_2_CHG_sum_r7_CHG,wtl_3_CHG_sum_r7_CHG,
                       r7l_1_CHG_sum_r7_CHG,r7l_2_CHG_sum_r7_CHG,r7l_3_CHG_sum_r7_CHG,
                       r3l_1_CHG_sum_r7_CHG,r3l_2_CHG_sum_r7_CHG,r3l_3_CHG_sum_r7_CHG, feature_df=None)

In [None]:
r7_CHHr_endo_leaf=process_endo_leaf_sum (wt_1_CHH_sum_r7_CHH,wt_2_CHH_sum_r7_CHH,wt_3_CHH_sum_r7_CHH,
                       r7_1_CHH_sum_r7_CHH,r7_2_CHH_sum_r7_CHH,r7_3_CHH_sum_r7_CHH,
                       r3_1_CHH_sum_r7_CHH,r3_2_CHH_sum_r7_CHH,r3_3_CHH_sum_r7_CHH,
                       rdd_1_CHH_sum_r7_CHH,rdd_2_CHH_sum_r7_CHH,rdd_3_CHH_sum_r7_CHH, 
                       wtl_1_CHH_sum_r7_CHH,wtl_2_CHH_sum_r7_CHH,wtl_3_CHH_sum_r7_CHH,
                       r7l_1_CHH_sum_r7_CHH,r7l_2_CHH_sum_r7_CHH,r7l_3_CHH_sum_r7_CHH,
                       r3l_1_CHH_sum_r7_CHH,r3l_2_CHH_sum_r7_CHH,r3l_3_CHH_sum_r7_CHH, feature_df=None)

## allelic sumby bulk DMRs

In [None]:
sumpath=path+"sumby_features/allelic_data_sumby_dmrs_outputs/"
Col_mat_1_CG_sum_r3_CG=readindata('ColxC24_1_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')
Col_mat_2_CG_sum_r3_CG=readindata('ColxC24_2_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')
Col_mat_3_CG_sum_r3_CG=readindata('ColxC24_3_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')

Col_pat_1_CG_sum_r3_CG=readindata('C24xCol_1_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')
Col_pat_2_CG_sum_r3_CG=readindata('C24xCol_2_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')
Col_pat_3_CG_sum_r3_CG=readindata('C24xCol_3_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')

C24_mat_1_CG_sum_r3_CG=readindata('C24xCol_1_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')
C24_mat_2_CG_sum_r3_CG=readindata('C24xCol_2_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')
C24_mat_3_CG_sum_r3_CG=readindata('C24xCol_3_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')

C24_pat_1_CG_sum_r3_CG=readindata('ColxC24_1_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')
C24_pat_2_CG_sum_r3_CG=readindata('ColxC24_2_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')
C24_pat_3_CG_sum_r3_CG=readindata('ColxC24_3_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')

r3_mat_1_CG_sum_r3_CG=readindata('r3xr1_1_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')
r3_mat_2_CG_sum_r3_CG=readindata('r3xr1_2_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')
r3_mat_3_CG_sum_r3_CG=readindata('r3xr1_3_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')

r3_pat_1_CG_sum_r3_CG=readindata('r1xr3_1_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')
r3_pat_2_CG_sum_r3_CG=readindata('r1xr3_2_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')
r3_pat_3_CG_sum_r3_CG=readindata('r1xr3_3_Col_spiked_CG_sumby_r3_v_wt_CG_hyper.bed')

r1_mat_1_CG_sum_r3_CG=readindata('r1xr3_1_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')
r1_mat_2_CG_sum_r3_CG=readindata('r1xr3_2_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')
r1_mat_3_CG_sum_r3_CG=readindata('r1xr3_3_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')

r1_pat_1_CG_sum_r3_CG=readindata('r3xr1_1_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')
r1_pat_2_CG_sum_r3_CG=readindata('r3xr1_2_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')
r1_pat_3_CG_sum_r3_CG=readindata('r3xr1_3_C24_pseudo_CG_sumby_r3_v_wt_CG_hyper.bed')


In [None]:
Col_mat_1_CG_sum_r3_CHG=readindata('ColxC24_1_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')
Col_mat_2_CG_sum_r3_CHG=readindata('ColxC24_2_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')
Col_mat_3_CG_sum_r3_CHG=readindata('ColxC24_3_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')

Col_pat_1_CG_sum_r3_CHG=readindata('C24xCol_1_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')
Col_pat_2_CG_sum_r3_CHG=readindata('C24xCol_2_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')
Col_pat_3_CG_sum_r3_CHG=readindata('C24xCol_3_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')

C24_mat_1_CG_sum_r3_CHG=readindata('C24xCol_1_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')
C24_mat_2_CG_sum_r3_CHG=readindata('C24xCol_2_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')
C24_mat_3_CG_sum_r3_CHG=readindata('C24xCol_3_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')

C24_pat_1_CG_sum_r3_CHG=readindata('ColxC24_1_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')
C24_pat_2_CG_sum_r3_CHG=readindata('ColxC24_2_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')
C24_pat_3_CG_sum_r3_CHG=readindata('ColxC24_3_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')

r3_mat_1_CG_sum_r3_CHG=readindata('r3xr1_1_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')
r3_mat_2_CG_sum_r3_CHG=readindata('r3xr1_2_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')
r3_mat_3_CG_sum_r3_CHG=readindata('r3xr1_3_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')

r3_pat_1_CG_sum_r3_CHG=readindata('r1xr3_1_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')
r3_pat_2_CG_sum_r3_CHG=readindata('r1xr3_2_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')
r3_pat_3_CG_sum_r3_CHG=readindata('r1xr3_3_Col_spiked_CG_sumby_r3_v_wt_CHG_hyper.bed')

r1_mat_1_CG_sum_r3_CHG=readindata('r1xr3_1_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')
r1_mat_2_CG_sum_r3_CHG=readindata('r1xr3_2_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')
r1_mat_3_CG_sum_r3_CHG=readindata('r1xr3_3_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')

r1_pat_1_CG_sum_r3_CHG=readindata('r3xr1_1_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')
r1_pat_2_CG_sum_r3_CHG=readindata('r3xr1_2_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')
r1_pat_3_CG_sum_r3_CHG=readindata('r3xr1_3_C24_pseudo_CG_sumby_r3_v_wt_CHG_hyper.bed')


In [None]:
Col_mat_1_CG_sum_r3_CHH=readindata('ColxC24_1_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')
Col_mat_2_CG_sum_r3_CHH=readindata('ColxC24_2_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')
Col_mat_3_CG_sum_r3_CHH=readindata('ColxC24_3_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')

Col_pat_1_CG_sum_r3_CHH=readindata('C24xCol_1_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')
Col_pat_2_CG_sum_r3_CHH=readindata('C24xCol_2_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')
Col_pat_3_CG_sum_r3_CHH=readindata('C24xCol_3_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')

C24_mat_1_CG_sum_r3_CHH=readindata('C24xCol_1_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')
C24_mat_2_CG_sum_r3_CHH=readindata('C24xCol_2_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')
C24_mat_3_CG_sum_r3_CHH=readindata('C24xCol_3_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')

C24_pat_1_CG_sum_r3_CHH=readindata('ColxC24_1_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')
C24_pat_2_CG_sum_r3_CHH=readindata('ColxC24_2_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')
C24_pat_3_CG_sum_r3_CHH=readindata('ColxC24_3_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')

r3_mat_1_CG_sum_r3_CHH=readindata('r3xr1_1_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')
r3_mat_2_CG_sum_r3_CHH=readindata('r3xr1_2_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')
r3_mat_3_CG_sum_r3_CHH=readindata('r3xr1_3_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')

r3_pat_1_CG_sum_r3_CHH=readindata('r1xr3_1_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')
r3_pat_2_CG_sum_r3_CHH=readindata('r1xr3_2_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')
r3_pat_3_CG_sum_r3_CHH=readindata('r1xr3_3_Col_spiked_CG_sumby_r3_v_wt_CHH_hyper.bed')

r1_mat_1_CG_sum_r3_CHH=readindata('r1xr3_1_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')
r1_mat_2_CG_sum_r3_CHH=readindata('r1xr3_2_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')
r1_mat_3_CG_sum_r3_CHH=readindata('r1xr3_3_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')

r1_pat_1_CG_sum_r3_CHH=readindata('r3xr1_1_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')
r1_pat_2_CG_sum_r3_CHH=readindata('r3xr1_2_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')
r1_pat_3_CG_sum_r3_CHH=readindata('r3xr1_3_C24_pseudo_CG_sumby_r3_v_wt_CHH_hyper.bed')


In [None]:
Col_mat_1_CHG_sum_r3_CHG=readindata('ColxC24_1_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')
Col_mat_2_CHG_sum_r3_CHG=readindata('ColxC24_2_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')
Col_mat_3_CHG_sum_r3_CHG=readindata('ColxC24_3_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')

Col_pat_1_CHG_sum_r3_CHG=readindata('C24xCol_1_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')
Col_pat_2_CHG_sum_r3_CHG=readindata('C24xCol_2_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')
Col_pat_3_CHG_sum_r3_CHG=readindata('C24xCol_3_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')

C24_mat_1_CHG_sum_r3_CHG=readindata('C24xCol_1_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')
C24_mat_2_CHG_sum_r3_CHG=readindata('C24xCol_2_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')
C24_mat_3_CHG_sum_r3_CHG=readindata('C24xCol_3_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')

C24_pat_1_CHG_sum_r3_CHG=readindata('ColxC24_1_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')
C24_pat_2_CHG_sum_r3_CHG=readindata('ColxC24_2_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')
C24_pat_3_CHG_sum_r3_CHG=readindata('ColxC24_3_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')

r3_mat_1_CHG_sum_r3_CHG=readindata('r3xr1_1_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')
r3_mat_2_CHG_sum_r3_CHG=readindata('r3xr1_2_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')
r3_mat_3_CHG_sum_r3_CHG=readindata('r3xr1_3_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')

r3_pat_1_CHG_sum_r3_CHG=readindata('r1xr3_1_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')
r3_pat_2_CHG_sum_r3_CHG=readindata('r1xr3_2_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')
r3_pat_3_CHG_sum_r3_CHG=readindata('r1xr3_3_Col_spiked_CHG_sumby_r3_v_wt_CHG_hyper.bed')

r1_mat_1_CHG_sum_r3_CHG=readindata('r1xr3_1_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')
r1_mat_2_CHG_sum_r3_CHG=readindata('r1xr3_2_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')
r1_mat_3_CHG_sum_r3_CHG=readindata('r1xr3_3_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')

r1_pat_1_CHG_sum_r3_CHG=readindata('r3xr1_1_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')
r1_pat_2_CHG_sum_r3_CHG=readindata('r3xr1_2_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')
r1_pat_3_CHG_sum_r3_CHG=readindata('r3xr1_3_C24_pseudo_CHG_sumby_r3_v_wt_CHG_hyper.bed')


In [None]:
Col_mat_1_CHG_sum_r3_CG=readindata('ColxC24_1_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')
Col_mat_2_CHG_sum_r3_CG=readindata('ColxC24_2_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')
Col_mat_3_CHG_sum_r3_CG=readindata('ColxC24_3_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')

Col_pat_1_CHG_sum_r3_CG=readindata('C24xCol_1_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')
Col_pat_2_CHG_sum_r3_CG=readindata('C24xCol_2_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')
Col_pat_3_CHG_sum_r3_CG=readindata('C24xCol_3_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')

C24_mat_1_CHG_sum_r3_CG=readindata('C24xCol_1_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')
C24_mat_2_CHG_sum_r3_CG=readindata('C24xCol_2_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')
C24_mat_3_CHG_sum_r3_CG=readindata('C24xCol_3_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')

C24_pat_1_CHG_sum_r3_CG=readindata('ColxC24_1_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')
C24_pat_2_CHG_sum_r3_CG=readindata('ColxC24_2_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')
C24_pat_3_CHG_sum_r3_CG=readindata('ColxC24_3_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')

r3_mat_1_CHG_sum_r3_CG=readindata('r3xr1_1_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')
r3_mat_2_CHG_sum_r3_CG=readindata('r3xr1_2_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')
r3_mat_3_CHG_sum_r3_CG=readindata('r3xr1_3_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')

r3_pat_1_CHG_sum_r3_CG=readindata('r1xr3_1_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')
r3_pat_2_CHG_sum_r3_CG=readindata('r1xr3_2_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')
r3_pat_3_CHG_sum_r3_CG=readindata('r1xr3_3_Col_spiked_CHG_sumby_r3_v_wt_CG_hyper.bed')

r1_mat_1_CHG_sum_r3_CG=readindata('r1xr3_1_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')
r1_mat_2_CHG_sum_r3_CG=readindata('r1xr3_2_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')
r1_mat_3_CHG_sum_r3_CG=readindata('r1xr3_3_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')

r1_pat_1_CHG_sum_r3_CG=readindata('r3xr1_1_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')
r1_pat_2_CHG_sum_r3_CG=readindata('r3xr1_2_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')
r1_pat_3_CHG_sum_r3_CG=readindata('r3xr1_3_C24_pseudo_CHG_sumby_r3_v_wt_CG_hyper.bed')


In [None]:
Col_mat_1_CHH_sum_r3_CHH=readindata('ColxC24_1_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')
Col_mat_2_CHH_sum_r3_CHH=readindata('ColxC24_2_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')
Col_mat_3_CHH_sum_r3_CHH=readindata('ColxC24_3_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')

Col_pat_1_CHH_sum_r3_CHH=readindata('C24xCol_1_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')
Col_pat_2_CHH_sum_r3_CHH=readindata('C24xCol_2_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')
Col_pat_3_CHH_sum_r3_CHH=readindata('C24xCol_3_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')

C24_mat_1_CHH_sum_r3_CHH=readindata('C24xCol_1_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')
C24_mat_2_CHH_sum_r3_CHH=readindata('C24xCol_2_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')
C24_mat_3_CHH_sum_r3_CHH=readindata('C24xCol_3_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')

C24_pat_1_CHH_sum_r3_CHH=readindata('ColxC24_1_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')
C24_pat_2_CHH_sum_r3_CHH=readindata('ColxC24_2_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')
C24_pat_3_CHH_sum_r3_CHH=readindata('ColxC24_3_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')

r3_mat_1_CHH_sum_r3_CHH=readindata('r3xr1_1_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')
r3_mat_2_CHH_sum_r3_CHH=readindata('r3xr1_2_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')
r3_mat_3_CHH_sum_r3_CHH=readindata('r3xr1_3_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')

r3_pat_1_CHH_sum_r3_CHH=readindata('r1xr3_1_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')
r3_pat_2_CHH_sum_r3_CHH=readindata('r1xr3_2_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')
r3_pat_3_CHH_sum_r3_CHH=readindata('r1xr3_3_Col_spiked_CHH_sumby_r3_v_wt_CHH_hyper.bed')

r1_mat_1_CHH_sum_r3_CHH=readindata('r1xr3_1_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')
r1_mat_2_CHH_sum_r3_CHH=readindata('r1xr3_2_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')
r1_mat_3_CHH_sum_r3_CHH=readindata('r1xr3_3_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')

r1_pat_1_CHH_sum_r3_CHH=readindata('r3xr1_1_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')
r1_pat_2_CHH_sum_r3_CHH=readindata('r3xr1_2_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')
r1_pat_3_CHH_sum_r3_CHH=readindata('r3xr1_3_C24_pseudo_CHH_sumby_r3_v_wt_CHH_hyper.bed')


In [None]:
Col_mat_1_CHH_sum_r3_CG=readindata('ColxC24_1_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')
Col_mat_2_CHH_sum_r3_CG=readindata('ColxC24_2_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')
Col_mat_3_CHH_sum_r3_CG=readindata('ColxC24_3_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')

Col_pat_1_CHH_sum_r3_CG=readindata('C24xCol_1_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')
Col_pat_2_CHH_sum_r3_CG=readindata('C24xCol_2_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')
Col_pat_3_CHH_sum_r3_CG=readindata('C24xCol_3_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')

C24_mat_1_CHH_sum_r3_CG=readindata('C24xCol_1_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')
C24_mat_2_CHH_sum_r3_CG=readindata('C24xCol_2_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')
C24_mat_3_CHH_sum_r3_CG=readindata('C24xCol_3_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')

C24_pat_1_CHH_sum_r3_CG=readindata('ColxC24_1_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')
C24_pat_2_CHH_sum_r3_CG=readindata('ColxC24_2_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')
C24_pat_3_CHH_sum_r3_CG=readindata('ColxC24_3_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')

r3_mat_1_CHH_sum_r3_CG=readindata('r3xr1_1_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')
r3_mat_2_CHH_sum_r3_CG=readindata('r3xr1_2_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')
r3_mat_3_CHH_sum_r3_CG=readindata('r3xr1_3_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')

r3_pat_1_CHH_sum_r3_CG=readindata('r1xr3_1_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')
r3_pat_2_CHH_sum_r3_CG=readindata('r1xr3_2_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')
r3_pat_3_CHH_sum_r3_CG=readindata('r1xr3_3_Col_spiked_CHH_sumby_r3_v_wt_CG_hyper.bed')

r1_mat_1_CHH_sum_r3_CG=readindata('r1xr3_1_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')
r1_mat_2_CHH_sum_r3_CG=readindata('r1xr3_2_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')
r1_mat_3_CHH_sum_r3_CG=readindata('r1xr3_3_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')

r1_pat_1_CHH_sum_r3_CG=readindata('r3xr1_1_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')
r1_pat_2_CHH_sum_r3_CG=readindata('r3xr1_2_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')
r1_pat_3_CHH_sum_r3_CG=readindata('r3xr1_3_C24_pseudo_CHH_sumby_r3_v_wt_CG_hyper.bed')


In [None]:
def makedatadf(C2xC_1,C2xC_2,C2xC_3,CxC2_1,CxC2_2,CxC2_3,r1xr3_1,r1xr3_2,r1xr3_3,r3xr1_1,r3xr1_2,r3xr1_3):
    out=pd.DataFrame()
    
    C2xCo=C2xC_1.merge(C2xC_2, on=['chr','start','end'], how="outer", suffixes=["_C24xCol_1", "_C24xCol_2"]).merge(C2xC_3, how="outer", suffixes=["","_C24xCol_3"])
    CoxC2=CxC2_1.merge(CxC2_2, on=['chr','start','end'], how="outer", suffixes=["_ColxC24_1", "_ColxC24_2"]).merge(CxC2_3, how="outer", suffixes=["","_ColxC24_3"])
    r1xr3=r1xr3_1.merge(r1xr3_2, on=['chr','start','end'], how="outer", suffixes=["_r1xr3_1", "_r1xr3_2"]).merge(r1xr3_3, how="outer", suffixes=["","_r1xr3_3"])
    r3xr1=r3xr1_1.merge(r3xr1_2, on=['chr','start','end'], how="outer", suffixes=["_r3xr1_1", "_r3xr1_2"]).merge(r3xr1_3, how="outer", suffixes=["","_r3xr1_3"])


    out['chr']=C2xCo['chr']
    out['start']=C2xCo['start']
    out['end']=C2xCo['end']
    out['avg_methy_C24xCol_1']=C2xCo['avg_methy_C24xCol_1']
    out['nC_C24xCol_1']=C2xCo['nC_C24xCol_1']
    out['avg_methy_C24xCol_2']=C2xCo['avg_methy_C24xCol_2']
    out['nC_C24xCol_2']=C2xCo['nC_C24xCol_2']
    out['avg_methy_C24xCol_3']=C2xCo['avg_methy']
    out['nC_C24xCol_3']=C2xCo['nC']

    out=out.merge(CoxC2, on=['chr','start','end'], how="outer")
    out=out.rename(columns={"avg_methy": "avg_methy_ColxC24_3", "nC": "nC_ColxC24_3"})

    out=out.merge(r1xr3, on=['chr','start','end'], how="outer")
    out=out.rename(columns={"avg_methy": "avg_methy_r1xr3_3", "nC": "nC_r1xr3_3"})

    out=out.merge(r3xr1, on=['chr','start','end'], how="outer")
    out=out.rename(columns={"avg_methy": "avg_methy_r3xr1_3", "nC": "nC_r3xr1_3"})

    dropNA_data=out.dropna()
    return(dropNA_data)

    dropNA_data=dropNA_data[['avg_methy_C24xCol_1','avg_methy_C24xCol_2','avg_methy_C24xCol_3','avg_methy_ColxC24_1',
                                                'avg_methy_ColxC24_2','avg_methy_ColxC24_3','avg_methy_r1xr3_1','avg_methy_r1xr3_2','avg_methy_r1xr3_3',
                                                'avg_methy_r3xr1_1','avg_methy_r3xr1_2','avg_methy_r3xr1_3']]
    #return(out)

def makeavgdf(datadf):
    bed=['chr','start','end']
    avgdf = datadf[bed].copy()
    avgdf['C24xCol']=((datadf['avg_methy_C24xCol_1']+datadf['avg_methy_C24xCol_2']+datadf['avg_methy_C24xCol_3'])/3)
    avgdf['ColxC24']=((datadf['avg_methy_ColxC24_1']+datadf['avg_methy_ColxC24_2']+datadf['avg_methy_ColxC24_3'])/3)
    avgdf['r1xr3']=((datadf['avg_methy_r1xr3_1']+datadf['avg_methy_r1xr3_2']+datadf['avg_methy_r1xr3_3'])/3)
    avgdf['r3xr1']=((datadf['avg_methy_r3xr1_1']+datadf['avg_methy_r3xr1_2']+datadf['avg_methy_r3xr1_3'])/3)
    return (avgdf)

def makediffdf(matpatavg):
    bed=['chr','start','end']
    diffdf = matpatavg[bed].copy()
    diffdf['r3_avg_mat_contribution']=matpatavg['r3xr1_mat']-matpatavg['ColxC24_mat']
    diffdf['r3_avg_pat_contribution']=matpatavg['r1xr3_pat']-matpatavg['C24xCol_pat']

    diffdf['r1_avg_mat_contribution']=matpatavg['r1xr3_mat']-matpatavg['C24xCol_mat']
    diffdf['r1_avg_pat_contribution']=matpatavg['r3xr1_pat']-matpatavg['ColxC24_pat']

    return (diffdf)



In [None]:
r3_CGr_cap50_bed['DMR class']='limited'

In [None]:
r3_CGr_pat_mCG=makedatadf(Col_pat_1_CG_sum_r3_CG, Col_pat_2_CG_sum_r3_CG, Col_pat_3_CG_sum_r3_CG, C24_pat_1_CG_sum_r3_CG, C24_pat_2_CG_sum_r3_CG, C24_pat_3_CG_sum_r3_CG,
 r3_pat_1_CG_sum_r3_CG, r3_pat_2_CG_sum_r3_CG, r3_pat_3_CG_sum_r3_CG, r1_pat_1_CG_sum_r3_CG, r1_pat_2_CG_sum_r3_CG, r1_pat_3_CG_sum_r3_CG)
r3_CGr_mat_mCG=makedatadf(C24_mat_1_CG_sum_r3_CG, C24_mat_2_CG_sum_r3_CG, C24_mat_3_CG_sum_r3_CG, Col_mat_1_CG_sum_r3_CG, Col_mat_2_CG_sum_r3_CG, Col_mat_3_CG_sum_r3_CG,
 r1_mat_1_CG_sum_r3_CG, r1_mat_2_CG_sum_r3_CG, r1_mat_3_CG_sum_r3_CG, r3_mat_1_CG_sum_r3_CG, r3_mat_2_CG_sum_r3_CG, r3_mat_3_CG_sum_r3_CG)

r3_CGr_pat_mCG_avg=makeavgdf(r3_CGr_pat_mCG)
r3_CGr_mat_mCG_avg=makeavgdf(r3_CGr_mat_mCG)

#merge mat and pat dfs
r3_CGr_matpat_mCG=r3_CGr_mat_mCG.merge(r3_CGr_pat_mCG, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()
r3_CGr_matpat_avg_mCG=r3_CGr_mat_mCG_avg.merge(r3_CGr_pat_mCG_avg, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()

# make difference df to plot both data on one plot?
r3_CGr_matpat_avgdiff_mCG=makediffdf(r3_CGr_matpat_avg_mCG)

#add cap DMR indicator
r3_CGr_matpat_mCG=r3_CGr_matpat_mCG.merge(r3_CGr_cap50_bed, on=['chr','start','end'], how="left")
r3_CGr_matpat_mCG=r3_CGr_matpat_mCG.fillna("not limited")

r3_CGr_matpat_avg_mCG=r3_CGr_matpat_avg_mCG.merge(r3_CGr_cap50_bed, on=['chr','start','end'], how="left")
r3_CGr_matpat_avg_mCG=r3_CGr_matpat_avg_mCG.fillna("not limited")

r3_CGr_matpat_avgdiff_mCG=r3_CGr_matpat_avgdiff_mCG.merge(r3_CGr_cap50_bed, on=['chr','start','end'], how="left")
r3_CGr_matpat_avgdiff_mCG=r3_CGr_matpat_avgdiff_mCG.fillna("not limited")

In [None]:
r3_CGr_pat_mCHG=makedatadf(Col_pat_1_CHG_sum_r3_CG, Col_pat_2_CHG_sum_r3_CG, Col_pat_3_CHG_sum_r3_CG, C24_pat_1_CHG_sum_r3_CG, C24_pat_2_CHG_sum_r3_CG, C24_pat_3_CHG_sum_r3_CG,
 r3_pat_1_CHG_sum_r3_CG, r3_pat_2_CHG_sum_r3_CG, r3_pat_3_CHG_sum_r3_CG, r1_pat_1_CHG_sum_r3_CG, r1_pat_2_CHG_sum_r3_CG, r1_pat_3_CHG_sum_r3_CG)
r3_CGr_mat_mCHG=makedatadf(C24_mat_1_CHG_sum_r3_CG, C24_mat_2_CHG_sum_r3_CG, C24_mat_3_CHG_sum_r3_CG, Col_mat_1_CHG_sum_r3_CG, Col_mat_2_CHG_sum_r3_CG, Col_mat_3_CHG_sum_r3_CG,
 r1_mat_1_CHG_sum_r3_CG, r1_mat_2_CHG_sum_r3_CG, r1_mat_3_CHG_sum_r3_CG, r3_mat_1_CHG_sum_r3_CG, r3_mat_2_CHG_sum_r3_CG, r3_mat_3_CHG_sum_r3_CG)

r3_CGr_pat_mCHG_avg=makeavgdf(r3_CGr_pat_mCHG)
r3_CGr_mat_mCHG_avg=makeavgdf(r3_CGr_mat_mCHG)

#merge mat and pat dfs
r3_CGr_matpat_mCHG=r3_CGr_mat_mCHG.merge(r3_CGr_pat_mCHG, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()
r3_CGr_matpat_avg_mCHG=r3_CGr_mat_mCHG_avg.merge(r3_CGr_pat_mCHG_avg, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()

# make difference df to plot both data on one plot?
r3_CGr_matpat_avgdiff_mCHG=makediffdf(r3_CGr_matpat_avg_mCHG)

#add cap DMR indicator
r3_CGr_matpat_mCHG=r3_CGr_matpat_mCHG.merge(r3_CGr_cap50_bed, on=['chr','start','end'], how="left")
r3_CGr_matpat_mCHG=r3_CGr_matpat_mCHG.fillna("not limited")

r3_CGr_matpat_avg_mCHG=r3_CGr_matpat_avg_mCHG.merge(r3_CGr_cap50_bed, on=['chr','start','end'], how="left")
r3_CGr_matpat_avg_mCHG=r3_CGr_matpat_avg_mCHG.fillna("not limited")

r3_CGr_matpat_avgdiff_mCHG=r3_CGr_matpat_avgdiff_mCHG.merge(r3_CGr_cap50_bed, on=['chr','start','end'], how="left")
r3_CGr_matpat_avgdiff_mCHG=r3_CGr_matpat_avgdiff_mCHG.fillna("not limited")

In [None]:
r3_CGr_pat_mCHH=makedatadf(Col_pat_1_CHH_sum_r3_CG, Col_pat_2_CHH_sum_r3_CG, Col_pat_3_CHH_sum_r3_CG, C24_pat_1_CHH_sum_r3_CG, C24_pat_2_CHH_sum_r3_CG, C24_pat_3_CHH_sum_r3_CG,
 r3_pat_1_CHH_sum_r3_CG, r3_pat_2_CHH_sum_r3_CG, r3_pat_3_CHH_sum_r3_CG, r1_pat_1_CHH_sum_r3_CG, r1_pat_2_CHH_sum_r3_CG, r1_pat_3_CHH_sum_r3_CG)
r3_CGr_mat_mCHH=makedatadf(C24_mat_1_CHH_sum_r3_CG, C24_mat_2_CHH_sum_r3_CG, C24_mat_3_CHH_sum_r3_CG, Col_mat_1_CHH_sum_r3_CG, Col_mat_2_CHH_sum_r3_CG, Col_mat_3_CHH_sum_r3_CG,
 r1_mat_1_CHH_sum_r3_CG, r1_mat_2_CHH_sum_r3_CG, r1_mat_3_CHH_sum_r3_CG, r3_mat_1_CHH_sum_r3_CG, r3_mat_2_CHH_sum_r3_CG, r3_mat_3_CHH_sum_r3_CG)

r3_CGr_pat_mCHH_avg=makeavgdf(r3_CGr_pat_mCHH)
r3_CGr_mat_mCHH_avg=makeavgdf(r3_CGr_mat_mCHH)

#merge mat and pat dfs
r3_CGr_matpat_mCHH=r3_CGr_mat_mCHH.merge(r3_CGr_pat_mCHH, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()
r3_CGr_matpat_avg_mCHH=r3_CGr_mat_mCHH_avg.merge(r3_CGr_pat_mCHH_avg, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()

# make difference df to plot both data on one plot?
r3_CGr_matpat_avgdiff_mCHH=makediffdf(r3_CGr_matpat_avg_mCHH)

#add cap DMR indicator
r3_CGr_matpat_mCHH=r3_CGr_matpat_mCHH.merge(r3_CGr_cap50_bed, on=['chr','start','end'], how="left")
r3_CGr_matpat_mCHH=r3_CGr_matpat_mCHH.fillna("not limited")

r3_CGr_matpat_avg_mCHH=r3_CGr_matpat_avg_mCHH.merge(r3_CGr_cap50_bed, on=['chr','start','end'], how="left")
r3_CGr_matpat_avg_mCHH=r3_CGr_matpat_avg_mCHH.fillna("not limited")

r3_CGr_matpat_avgdiff_mCHH=r3_CGr_matpat_avgdiff_mCHH.merge(r3_CGr_cap50_bed, on=['chr','start','end'], how="left")
r3_CGr_matpat_avgdiff_mCHH=r3_CGr_matpat_avgdiff_mCHH.fillna("not limited")

In [None]:
r3_CHGr_pat_mCG=makedatadf(Col_pat_1_CG_sum_r3_CHG, Col_pat_2_CG_sum_r3_CHG, Col_pat_3_CG_sum_r3_CHG, C24_pat_1_CG_sum_r3_CHG, C24_pat_2_CG_sum_r3_CHG, C24_pat_3_CG_sum_r3_CHG,
 r3_pat_1_CG_sum_r3_CHG, r3_pat_2_CG_sum_r3_CHG, r3_pat_3_CG_sum_r3_CHG, r1_pat_1_CG_sum_r3_CHG, r1_pat_2_CG_sum_r3_CHG, r1_pat_3_CG_sum_r3_CHG)
r3_CHGr_mat_mCG=makedatadf(C24_mat_1_CG_sum_r3_CHG, C24_mat_2_CG_sum_r3_CHG, C24_mat_3_CG_sum_r3_CHG, Col_mat_1_CG_sum_r3_CHG, Col_mat_2_CG_sum_r3_CHG, Col_mat_3_CG_sum_r3_CHG,
 r1_mat_1_CG_sum_r3_CHG, r1_mat_2_CG_sum_r3_CHG, r1_mat_3_CG_sum_r3_CHG, r3_mat_1_CG_sum_r3_CHG, r3_mat_2_CG_sum_r3_CHG, r3_mat_3_CG_sum_r3_CHG)

r3_CHGr_pat_mCG_avg=makeavgdf(r3_CHGr_pat_mCG)
r3_CHGr_mat_mCG_avg=makeavgdf(r3_CHGr_mat_mCG)

#merge mat and pat dfs
r3_CHGr_matpat_mCG=r3_CHGr_mat_mCG.merge(r3_CHGr_pat_mCG, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()
r3_CHGr_matpat_avg_mCG=r3_CHGr_mat_mCG_avg.merge(r3_CHGr_pat_mCG_avg, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()

# make difference df to plot both data on one plot?
r3_CHGr_matpat_avgdiff_mCG=makediffdf(r3_CHGr_matpat_avg_mCG)

In [None]:
r3_CHHr_pat_mCG=makedatadf(Col_pat_1_CG_sum_r3_CHH, Col_pat_2_CG_sum_r3_CHH, Col_pat_3_CG_sum_r3_CHH, C24_pat_1_CG_sum_r3_CHH, C24_pat_2_CG_sum_r3_CHH, C24_pat_3_CG_sum_r3_CHH,
 r3_pat_1_CG_sum_r3_CHH, r3_pat_2_CG_sum_r3_CHH, r3_pat_3_CG_sum_r3_CHH, r1_pat_1_CG_sum_r3_CHH, r1_pat_2_CG_sum_r3_CHH, r1_pat_3_CG_sum_r3_CHH)
r3_CHHr_mat_mCG=makedatadf(C24_mat_1_CG_sum_r3_CHH, C24_mat_2_CG_sum_r3_CHH, C24_mat_3_CG_sum_r3_CHH, Col_mat_1_CG_sum_r3_CHH, Col_mat_2_CG_sum_r3_CHH, Col_mat_3_CG_sum_r3_CHH,
 r1_mat_1_CG_sum_r3_CHH, r1_mat_2_CG_sum_r3_CHH, r1_mat_3_CG_sum_r3_CHH, r3_mat_1_CG_sum_r3_CHH, r3_mat_2_CG_sum_r3_CHH, r3_mat_3_CG_sum_r3_CHH)

r3_CHHr_pat_mCG_avg=makeavgdf(r3_CHHr_pat_mCG)
r3_CHHr_mat_mCG_avg=makeavgdf(r3_CHHr_mat_mCG)

#merge mat and pat dfs
r3_CHHr_matpat_mCG=r3_CHHr_mat_mCG.merge(r3_CHHr_pat_mCG, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()
r3_CHHr_matpat_avg_mCG=r3_CHHr_mat_mCG_avg.merge(r3_CHHr_pat_mCG_avg, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()

# make difference df to plot both data on one plot?
r3_CHHr_matpat_avgdiff_mCG=makediffdf(r3_CHHr_matpat_avg_mCG)

In [None]:
r3_CHGr_pat_mCHG=makedatadf(Col_pat_1_CHG_sum_r3_CHG, Col_pat_2_CHG_sum_r3_CHG, Col_pat_3_CHG_sum_r3_CHG, C24_pat_1_CHG_sum_r3_CHG, C24_pat_2_CHG_sum_r3_CHG, C24_pat_3_CHG_sum_r3_CHG,
 r3_pat_1_CHG_sum_r3_CHG, r3_pat_2_CHG_sum_r3_CHG, r3_pat_3_CHG_sum_r3_CHG, r1_pat_1_CHG_sum_r3_CHG, r1_pat_2_CHG_sum_r3_CHG, r1_pat_3_CHG_sum_r3_CHG)
r3_CHGr_mat_mCHG=makedatadf(C24_mat_1_CHG_sum_r3_CHG, C24_mat_2_CHG_sum_r3_CHG, C24_mat_3_CHG_sum_r3_CHG, Col_mat_1_CHG_sum_r3_CHG, Col_mat_2_CHG_sum_r3_CHG, Col_mat_3_CHG_sum_r3_CHG,
 r1_mat_1_CHG_sum_r3_CHG, r1_mat_2_CHG_sum_r3_CHG, r1_mat_3_CHG_sum_r3_CHG, r3_mat_1_CHG_sum_r3_CHG, r3_mat_2_CHG_sum_r3_CHG, r3_mat_3_CHG_sum_r3_CHG)

r3_CHGr_pat_mCHG_avg=makeavgdf(r3_CHGr_pat_mCHG)
r3_CHGr_mat_mCHG_avg=makeavgdf(r3_CHGr_mat_mCHG)

#merge mat and pat dfs
r3_CHGr_matpat_mCHG=r3_CHGr_mat_mCHG.merge(r3_CHGr_pat_mCHG, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()
r3_CHGr_matpat_avg_mCHG=r3_CHGr_mat_mCHG_avg.merge(r3_CHGr_pat_mCHG_avg, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()

# make difference df to plot both data on one plot?
r3_CHGr_matpat_avgdiff_mCHG=makediffdf(r3_CHGr_matpat_avg_mCHG)

In [None]:
r3_CHHr_pat_mCHH=makedatadf(Col_pat_1_CHH_sum_r3_CHH, Col_pat_2_CHH_sum_r3_CHH, Col_pat_3_CHH_sum_r3_CHH, C24_pat_1_CHH_sum_r3_CHH, C24_pat_2_CHH_sum_r3_CHH, C24_pat_3_CHH_sum_r3_CHH,
 r3_pat_1_CHH_sum_r3_CHH, r3_pat_2_CHH_sum_r3_CHH, r3_pat_3_CHH_sum_r3_CHH, r1_pat_1_CHH_sum_r3_CHH, r1_pat_2_CHH_sum_r3_CHH, r1_pat_3_CHH_sum_r3_CHH)
r3_CHHr_mat_mCHH=makedatadf(C24_mat_1_CHH_sum_r3_CHH, C24_mat_2_CHH_sum_r3_CHH, C24_mat_3_CHH_sum_r3_CHH, Col_mat_1_CHH_sum_r3_CHH, Col_mat_2_CHH_sum_r3_CHH, Col_mat_3_CHH_sum_r3_CHH,
 r1_mat_1_CHH_sum_r3_CHH, r1_mat_2_CHH_sum_r3_CHH, r1_mat_3_CHH_sum_r3_CHH, r3_mat_1_CHH_sum_r3_CHH, r3_mat_2_CHH_sum_r3_CHH, r3_mat_3_CHH_sum_r3_CHH)

r3_CHHr_pat_mCHH_avg=makeavgdf(r3_CHHr_pat_mCHH)
r3_CHHr_mat_mCHH_avg=makeavgdf(r3_CHHr_mat_mCHH)

#merge mat and pat dfs
r3_CHHr_matpat_mCHH=r3_CHHr_mat_mCHH.merge(r3_CHHr_pat_mCHH, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()
r3_CHHr_matpat_avg_mCHH=r3_CHHr_mat_mCHH_avg.merge(r3_CHHr_pat_mCHH_avg, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()

# make difference df to plot both data on one plot?
r3_CHHr_matpat_avgdiff_mCHH=makediffdf(r3_CHHr_matpat_avg_mCHH)

In [None]:
# allelic data sumby bulk r1 vs C24 DMRs
sumpath="/lab/solexa_gehring/elizabeth/allelic_emseq/sumby_2/C24_dmr_sumby/"
Col_mat_1_CG_sum_r1_CG=readindata('ColxC24_1_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')
Col_mat_2_CG_sum_r1_CG=readindata('ColxC24_2_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')
Col_mat_3_CG_sum_r1_CG=readindata('ColxC24_3_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')

Col_pat_1_CG_sum_r1_CG=readindata('C24xCol_1_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')
Col_pat_2_CG_sum_r1_CG=readindata('C24xCol_2_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')
Col_pat_3_CG_sum_r1_CG=readindata('C24xCol_3_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')

C24_mat_1_CG_sum_r1_CG=readindata('C24xCol_1_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')
C24_mat_2_CG_sum_r1_CG=readindata('C24xCol_2_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')
C24_mat_3_CG_sum_r1_CG=readindata('C24xCol_3_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')

C24_pat_1_CG_sum_r1_CG=readindata('ColxC24_1_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')
C24_pat_2_CG_sum_r1_CG=readindata('ColxC24_2_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')
C24_pat_3_CG_sum_r1_CG=readindata('ColxC24_3_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')

r3_mat_1_CG_sum_r1_CG=readindata('r3xr1_1_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')
r3_mat_2_CG_sum_r1_CG=readindata('r3xr1_2_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')
r3_mat_3_CG_sum_r1_CG=readindata('r3xr1_3_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')

r3_pat_1_CG_sum_r1_CG=readindata('r1xr3_1_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')
r3_pat_2_CG_sum_r1_CG=readindata('r1xr3_2_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')
r3_pat_3_CG_sum_r1_CG=readindata('r1xr3_3_Col_spiked_CG_sumby_r1_v_C24_CG_hyper.bed')

r1_mat_1_CG_sum_r1_CG=readindata('r1xr3_1_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')
r1_mat_2_CG_sum_r1_CG=readindata('r1xr3_2_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')
r1_mat_3_CG_sum_r1_CG=readindata('r1xr3_3_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')

r1_pat_1_CG_sum_r1_CG=readindata('r3xr1_1_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')
r1_pat_2_CG_sum_r1_CG=readindata('r3xr1_2_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')
r1_pat_3_CG_sum_r1_CG=readindata('r3xr1_3_C24_pseudo_CG_sumby_r1_v_C24_CG_hyper.bed')



r1_CGr_pat_mCG=makedatadf(Col_pat_1_CG_sum_r1_CG, Col_pat_2_CG_sum_r1_CG, Col_pat_3_CG_sum_r1_CG, C24_pat_1_CG_sum_r1_CG, C24_pat_2_CG_sum_r1_CG, C24_pat_3_CG_sum_r1_CG,
 r3_pat_1_CG_sum_r1_CG, r3_pat_2_CG_sum_r1_CG, r3_pat_3_CG_sum_r1_CG, r1_pat_1_CG_sum_r1_CG, r1_pat_2_CG_sum_r1_CG, r1_pat_3_CG_sum_r1_CG)
r1_CGr_mat_mCG=makedatadf(C24_mat_1_CG_sum_r1_CG, C24_mat_2_CG_sum_r1_CG, C24_mat_3_CG_sum_r1_CG, Col_mat_1_CG_sum_r1_CG, Col_mat_2_CG_sum_r1_CG, Col_mat_3_CG_sum_r1_CG,
 r1_mat_1_CG_sum_r1_CG, r1_mat_2_CG_sum_r1_CG, r1_mat_3_CG_sum_r1_CG, r3_mat_1_CG_sum_r1_CG, r3_mat_2_CG_sum_r1_CG, r3_mat_3_CG_sum_r1_CG)

r1_CGr_pat_mCG_avg=makeavgdf(r1_CGr_pat_mCG)
r1_CGr_mat_mCG_avg=makeavgdf(r1_CGr_mat_mCG)

#merge mat and pat dfs
r1_CGr_matpat_mCG=r1_CGr_mat_mCG.merge(r1_CGr_pat_mCG, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()
r1_CGr_matpat_avg_mCG=r1_CGr_mat_mCG_avg.merge(r1_CGr_pat_mCG_avg, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()



Col_mat_1_CHG_sum_r1_CHG=readindata('ColxC24_1_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')
Col_mat_2_CHG_sum_r1_CHG=readindata('ColxC24_2_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')
Col_mat_3_CHG_sum_r1_CHG=readindata('ColxC24_3_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')

Col_pat_1_CHG_sum_r1_CHG=readindata('C24xCol_1_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')
Col_pat_2_CHG_sum_r1_CHG=readindata('C24xCol_2_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')
Col_pat_3_CHG_sum_r1_CHG=readindata('C24xCol_3_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')

C24_mat_1_CHG_sum_r1_CHG=readindata('C24xCol_1_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')
C24_mat_2_CHG_sum_r1_CHG=readindata('C24xCol_2_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')
C24_mat_3_CHG_sum_r1_CHG=readindata('C24xCol_3_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')

C24_pat_1_CHG_sum_r1_CHG=readindata('ColxC24_1_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')
C24_pat_2_CHG_sum_r1_CHG=readindata('ColxC24_2_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')
C24_pat_3_CHG_sum_r1_CHG=readindata('ColxC24_3_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')

r3_mat_1_CHG_sum_r1_CHG=readindata('r3xr1_1_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')
r3_mat_2_CHG_sum_r1_CHG=readindata('r3xr1_2_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')
r3_mat_3_CHG_sum_r1_CHG=readindata('r3xr1_3_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')

r3_pat_1_CHG_sum_r1_CHG=readindata('r1xr3_1_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')
r3_pat_2_CHG_sum_r1_CHG=readindata('r1xr3_2_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')
r3_pat_3_CHG_sum_r1_CHG=readindata('r1xr3_3_Col_spiked_CHG_sumby_r1_v_C24_CHG_hyper.bed')

r1_mat_1_CHG_sum_r1_CHG=readindata('r1xr3_1_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')
r1_mat_2_CHG_sum_r1_CHG=readindata('r1xr3_2_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')
r1_mat_3_CHG_sum_r1_CHG=readindata('r1xr3_3_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')

r1_pat_1_CHG_sum_r1_CHG=readindata('r3xr1_1_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')
r1_pat_2_CHG_sum_r1_CHG=readindata('r3xr1_2_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')
r1_pat_3_CHG_sum_r1_CHG=readindata('r3xr1_3_C24_pseudo_CHG_sumby_r1_v_C24_CHG_hyper.bed')



r1_CHGr_pat_mCHG=makedatadf(Col_pat_1_CHG_sum_r1_CHG, Col_pat_2_CHG_sum_r1_CHG, Col_pat_3_CHG_sum_r1_CHG, C24_pat_1_CHG_sum_r1_CHG, C24_pat_2_CHG_sum_r1_CHG, C24_pat_3_CHG_sum_r1_CHG,
 r3_pat_1_CHG_sum_r1_CHG, r3_pat_2_CHG_sum_r1_CHG, r3_pat_3_CHG_sum_r1_CHG, r1_pat_1_CHG_sum_r1_CHG, r1_pat_2_CHG_sum_r1_CHG, r1_pat_3_CHG_sum_r1_CHG)
r1_CHGr_mat_mCHG=makedatadf(C24_mat_1_CHG_sum_r1_CHG, C24_mat_2_CHG_sum_r1_CHG, C24_mat_3_CHG_sum_r1_CHG, Col_mat_1_CHG_sum_r1_CHG, Col_mat_2_CHG_sum_r1_CHG, Col_mat_3_CHG_sum_r1_CHG,
 r1_mat_1_CHG_sum_r1_CHG, r1_mat_2_CHG_sum_r1_CHG, r1_mat_3_CHG_sum_r1_CHG, r3_mat_1_CHG_sum_r1_CHG, r3_mat_2_CHG_sum_r1_CHG, r3_mat_3_CHG_sum_r1_CHG)

r1_CHGr_pat_mCHG_avg=makeavgdf(r1_CHGr_pat_mCHG)
r1_CHGr_mat_mCHG_avg=makeavgdf(r1_CHGr_mat_mCHG)

#merge mat and pat dfs
r1_CHGr_matpat_mCHG=r1_CHGr_mat_mCHG.merge(r1_CHGr_pat_mCHG, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()
r1_CHGr_matpat_avg_mCHG=r1_CHGr_mat_mCHG_avg.merge(r1_CHGr_pat_mCHG_avg, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()



Col_mat_1_CHH_sum_r1_CHH=readindata('ColxC24_1_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')
Col_mat_2_CHH_sum_r1_CHH=readindata('ColxC24_2_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')
Col_mat_3_CHH_sum_r1_CHH=readindata('ColxC24_3_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')

Col_pat_1_CHH_sum_r1_CHH=readindata('C24xCol_1_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')
Col_pat_2_CHH_sum_r1_CHH=readindata('C24xCol_2_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')
Col_pat_3_CHH_sum_r1_CHH=readindata('C24xCol_3_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')

C24_mat_1_CHH_sum_r1_CHH=readindata('C24xCol_1_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')
C24_mat_2_CHH_sum_r1_CHH=readindata('C24xCol_2_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')
C24_mat_3_CHH_sum_r1_CHH=readindata('C24xCol_3_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')

C24_pat_1_CHH_sum_r1_CHH=readindata('ColxC24_1_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')
C24_pat_2_CHH_sum_r1_CHH=readindata('ColxC24_2_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')
C24_pat_3_CHH_sum_r1_CHH=readindata('ColxC24_3_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')

r3_mat_1_CHH_sum_r1_CHH=readindata('r3xr1_1_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')
r3_mat_2_CHH_sum_r1_CHH=readindata('r3xr1_2_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')
r3_mat_3_CHH_sum_r1_CHH=readindata('r3xr1_3_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')

r3_pat_1_CHH_sum_r1_CHH=readindata('r1xr3_1_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')
r3_pat_2_CHH_sum_r1_CHH=readindata('r1xr3_2_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')
r3_pat_3_CHH_sum_r1_CHH=readindata('r1xr3_3_Col_spiked_CHH_sumby_r1_v_C24_CHH_hyper.bed')

r1_mat_1_CHH_sum_r1_CHH=readindata('r1xr3_1_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')
r1_mat_2_CHH_sum_r1_CHH=readindata('r1xr3_2_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')
r1_mat_3_CHH_sum_r1_CHH=readindata('r1xr3_3_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')

r1_pat_1_CHH_sum_r1_CHH=readindata('r3xr1_1_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')
r1_pat_2_CHH_sum_r1_CHH=readindata('r3xr1_2_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')
r1_pat_3_CHH_sum_r1_CHH=readindata('r3xr1_3_C24_pseudo_CHH_sumby_r1_v_C24_CHH_hyper.bed')



r1_CHHr_pat_mCHH=makedatadf(Col_pat_1_CHH_sum_r1_CHH, Col_pat_2_CHH_sum_r1_CHH, Col_pat_3_CHH_sum_r1_CHH, C24_pat_1_CHH_sum_r1_CHH, C24_pat_2_CHH_sum_r1_CHH, C24_pat_3_CHH_sum_r1_CHH,
 r3_pat_1_CHH_sum_r1_CHH, r3_pat_2_CHH_sum_r1_CHH, r3_pat_3_CHH_sum_r1_CHH, r1_pat_1_CHH_sum_r1_CHH, r1_pat_2_CHH_sum_r1_CHH, r1_pat_3_CHH_sum_r1_CHH)
r1_CHHr_mat_mCHH=makedatadf(C24_mat_1_CHH_sum_r1_CHH, C24_mat_2_CHH_sum_r1_CHH, C24_mat_3_CHH_sum_r1_CHH, Col_mat_1_CHH_sum_r1_CHH, Col_mat_2_CHH_sum_r1_CHH, Col_mat_3_CHH_sum_r1_CHH,
 r1_mat_1_CHH_sum_r1_CHH, r1_mat_2_CHH_sum_r1_CHH, r1_mat_3_CHH_sum_r1_CHH, r3_mat_1_CHH_sum_r1_CHH, r3_mat_2_CHH_sum_r1_CHH, r3_mat_3_CHH_sum_r1_CHH)

r1_CHHr_pat_mCHH_avg=makeavgdf(r1_CHHr_pat_mCHH)
r1_CHHr_mat_mCHH_avg=makeavgdf(r1_CHHr_mat_mCHH)

#merge mat and pat dfs
r1_CHHr_matpat_mCHH=r1_CHHr_mat_mCHH.merge(r1_CHHr_pat_mCHH, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()
r1_CHHr_matpat_avg_mCHH=r1_CHHr_mat_mCHH_avg.merge(r1_CHHr_pat_mCHH_avg, on=['chr','start','end'], how="outer", suffixes=['_mat','_pat']).dropna()


## Setup

In [None]:
#dictionaries for which replicates/samples to plot
wt_r3_r7_1rep={'WT endo':'wt_1_avg_methy',r7it+' endo':'r7_1_avg_methy',r3it+' endo':'r3_1_avg_methy', 'DMR':'DMR'}
wt_r3_endoleaf_1rep={'WT endo':'wt_1_avg_methy',r3it+' endo':'r3_1_avg_methy', 'WT leaf':'wtl_2_avg_methy',r3it+' leaf':'r3l_1_avg_methy'}
wt_r3_endoleafsperm_1rep={'WT endo':'wt_1_avg_methy',r3it+' endo':'r3_1_avg_methy', 'WT leaf':'wtl_2_avg_methy',r3it+' leaf':'r3l_1_avg_methy',
                         'WT sperm':'wtsc_1_avg_methy',r3it+' sperm':'r3sc_1_avg_methy'}

wt_r3_endoleafsperm_allrep={'WT endo 1':'wt_1_avg_methy','WT endo 2':'wt_2_avg_methy','WT endo 3':'wt_3_avg_methy',
                 r7it+' endo 1':'r7_1_avg_methy',r7it+' endo 2':'r7_2_avg_methy',r7it+' endo 3':'r7_3_avg_methy',
                 r3it+' endo 1':'r3_1_avg_methy', r3it+' endo 2':'r3_2_avg_methy', r3it+' endo 3':'r3_3_avg_methy',
                 'WT leaf 1':'wtl_1_avg_methy','WT leaf 2':'wtl_2_avg_methy','WT leaf 3':'wtl_3_avg_methy',
                 r7it+' leaf 1':'r7l_1_avg_methy',r7it+' leaf 2':'r7l_2_avg_methy',r7it+' leaf 3':'r7l_3_avg_methy',
                 r3it+' leaf 1':'r3l_1_avg_methy', r3it+' leaf 2':'r3l_2_avg_methy', r3it+' leaf 3':'r3l_3_avg_methy',
                 'WT sperm 1':'wtsc_1_avg_methy','WT sperm 2':'wtsc_1_avg_methy',r3it+' sperm 1':'r3sc_1_avg_methy',r3it+' sperm 2':'r3sc_2_avg_methy'}

wt_r3_r7_allrep={'WT endo 1':'wt_1_avg_methy','WT endo 2':'wt_2_avg_methy','WT endo 3':'wt_3_avg_methy',
                 r7it+' endo 1':'r7_1_avg_methy',r7it+' endo 2':'r7_2_avg_methy',r7it+' endo 3':'r7_3_avg_methy',
                 r3it+' endo 1':'r3_1_avg_methy', r3it+' endo 2':'r3_2_avg_methy', r3it+' endo 3':'r3_3_avg_methy',
                 'DMR':'DMR'}

wt_r3_r7_rdd_endoleaf_allrep={'WT endo 1':'wt_1_avg_methy','WT endo 2':'wt_2_avg_methy','WT endo 3':'wt_3_avg_methy',
                 r7it+' endo 1':'r7_1_avg_methy',r7it+' endo 2':'r7_2_avg_methy',r7it+' endo 3':'r7_3_avg_methy',
                 r3it+' endo 1':'r3_1_avg_methy', r3it+' endo 2':'r3_2_avg_methy', r3it+' endo 3':'r3_3_avg_methy',
                 rddit+' endo 1':'rdd_1_avg_methy', rddit+' endo 2':'rdd_2_avg_methy', rddit+' endo 3':'rdd_3_avg_methy',
                 'WT leaf 1':'wtl_1_avg_methy','WT leaf 2':'wtl_2_avg_methy','WT leaf 3':'wtl_3_avg_methy',
                 r7it+' leaf 1':'r7l_1_avg_methy',r7it+' leaf 2':'r7l_2_avg_methy',r7it+' leaf 3':'r7l_3_avg_methy',
                 r3it+' leaf 1':'r3l_1_avg_methy', r3it+' leaf 2':'r3l_2_avg_methy', r3it+' leaf 3':'r3l_3_avg_methy',
                 'DMR':'DMR'}

In [None]:
r3_CGr_endo_leaf['DMR']=r3it
r7_CGr_endo_leaf['DMR']=r7it

r3_CHGr_endo_leaf['DMR']=r3it
r7_CHGr_endo_leaf['DMR']=r7it

r3_CHHr_endo_leaf['DMR']=r3it
r7_CHHr_endo_leaf['DMR']=r7it

In [None]:
r3_r7_endoCGr=pd.concat([r3_CGr_endo_leaf, r7_CGr_endo_leaf], axis=0)
r3_r7_endoCHGr=pd.concat([r3_CHGr_endo_leaf, r7_CHGr_endo_leaf], axis=0)
r3_r7_endoCHHr=pd.concat([r3_CHHr_endo_leaf, r7_CHHr_endo_leaf], axis=0)

## Figure: sumby boxplots in endosperm DMRs


In [None]:
def sumby_boxplot(df, samplesdict, mCcontext, hue=None, color=None):
    
    df2plot=pd.DataFrame()
    for key, value in samplesdict.items():  
        df2plot[key]=df[value]
    
    fig, ax = plt.subplots(figsize=(6,6))
    
    #return df2plot
    if hue is not None:
        df2plot=df2plot.melt(id_vars=['DMR'], var_name='sample', value_name='summed m'+mCcontext)
        sns.boxplot(data=df2plot, x="sample", y="summed m"+mCcontext, hue=hue, palette="pastel", ax=ax)
        ax.legend(loc="best", fontsize=14, title=mCcontext+" hyperDMRs",title_fontsize=14)

    else:
        if color is not None:
            c=color
        else:
            c="#a1c9f4"
            
        df2plot=df2plot.melt(var_name='sample', value_name='summed m'+mCcontext)
        sns.boxplot(data=df2plot, x="sample", y="summed m"+mCcontext, ax=ax, color=c)
        
    # general layout
    ax.set_xlabel("sample", fontsize=18)
    ax.tick_params(axis='both', labelsize=15)
    ax.set_ylabel("summed fraction m"+mCcontext, fontsize=20)
    
    return fig, ax
    #plt.tight_layout()

r3color="#a1c9f4"
r7color="#ffb482"

In [None]:
sumby_boxplot(r3_r7_endoCGr, wt_r3_r7_1rep, "CG", hue='DMR')
plt.tight_layout()
plt.savefig(figurepath+'endo_ros1_CGr_DMRs_sumbymCG.pdf', dpi=350)


In [None]:
sumby_boxplot(r3_CGr_endo_leaf, wt_r3_endoleaf_1rep, "CG")
plt.savefig(figurepath+'endo_leaf_r3_CGr_DMRs_sumbymCG.pdf', dpi=350)


In [None]:
fig, ax=sumby_boxplot(r3_r7_endoCGr, wt_r3_r7_allrep, "CG", hue='DMR')
ax.tick_params(axis='x', labelsize=15, labelrotation=35)
fig.set_size_inches(10,6)
plt.tight_layout()
plt.savefig(figurepath+'endo_ros1_CGr_DMRs_sumbymCG_allreps.pdf', dpi=350)


In [None]:
fig, ax=sumby_boxplot(r3_r7_endoCGr, wt_r3_r7_rdd_endoleaf_allrep, "CG", hue='DMR')
ax.tick_params(axis='x', labelsize=15, labelrotation=35)
fig.set_size_inches(15,6)
plt.tight_layout()

plt.savefig(figurepath+'endo_ros1_CGr_DMRs_sumbymCG_endoleaf_allreps.pdf', dpi=350)


In [None]:
sumby_boxplot(r3_r7_endoCHGr, wt_r3_r7_1rep, "CHG", hue='DMR')
plt.tight_layout()
plt.savefig(figurepath+'endo_ros1_CHGr_DMRs_sumbymCHG.pdf', dpi=350)

In [None]:
fig, ax=sumby_boxplot(r3_r7_endoCHGr, wt_r3_r7_rdd_endoleaf_allrep, "CHG", hue='DMR')
ax.tick_params(axis='x', labelsize=15, labelrotation=35)
fig.set_size_inches(15,6)
plt.tight_layout()

plt.savefig(figurepath+'endo_ros1_CHGr_DMRs_sumbymCHG_endoleaf_allreps.pdf', dpi=350)


In [None]:
sumby_boxplot(r3_r7_endoCHHr, wt_r3_r7_1rep, "CHH", hue='DMR')
plt.tight_layout()
plt.savefig(figurepath+'endo_ros1_CHHr_DMRs_sumbymCHH.pdf', dpi=350)

In [None]:
fig, ax=sumby_boxplot(r3_r7_endoCHHr, wt_r3_r7_rdd_endoleaf_allrep, "CHH", hue='DMR')
ax.tick_params(axis='x', labelsize=15, labelrotation=35)
fig.set_size_inches(15,6)
plt.tight_layout()

plt.savefig(figurepath+'endo_ros1_CHHr_DMRs_sumbymCHH_endoleaf_allreps.pdf', dpi=350)


In [None]:
# 
fig, ax=sumby_boxplot(r3_CGr_endo_leaf_sperm, wt_r3_endoleafsperm_1rep, "CG")
ax.tick_params(axis='x', labelrotation=35)
plt.tight_layout()
plt.savefig(figurepath+'endo_leaf_sperm_1rep_r3_CGr_DMRs_sumbymCG.pdf', dpi=350)


In [None]:
fig, ax=sumby_boxplot(r3_CGr_endo_leaf_sperm, wt_r3_endoleafsperm_allrep, "CG")
ax.tick_params(axis='x', labelsize=15, labelrotation=35)
fig.set_size_inches(15,6)
plt.tight_layout()

plt.savefig(figurepath+'endo_leaf_sperm_allrep_r3_CGr_DMRs_sumbymCG.pdf', dpi=350)


In [None]:
fig, ax=sumby_boxplot(r3_CHGr_endo_leaf_sperm, wt_r3_endoleafsperm_allrep, "CHG")
ax.tick_params(axis='x', labelsize=15, labelrotation=35)
fig.set_size_inches(15,6)
plt.tight_layout()

plt.savefig(figurepath+'endo_leaf_sperm_allrep_r3_CHGr_DMRs_sumbymCHG.pdf', dpi=350)


In [None]:
fig, ax=sumby_boxplot(r3_CHHr_endo_leaf_sperm, wt_r3_endoleafsperm_allrep, "CHH")
ax.tick_params(axis='x', labelsize=15, labelrotation=35)
fig.set_size_inches(15,6)
plt.tight_layout()

plt.savefig(figurepath+'endo_leaf_sperm_allrep_r3_CHHr_DMRs_sumbymCHH.pdf', dpi=350)


## Figure: mat/pat mC sumby bulk endosperm-defined DMRs in scatterplot

### functions - matpat sumby DMR scatterplot and correlation

In [None]:
import scipy as sp

def get_correlation (df, col1, col2):
    r, p = sp.stats.pearsonr(x=df[col1], y=df[col2])
    return r, p

def scatter_dmrmatpat(df, x, y, xlabel, ylabel, hue='DMR class', xlim=[-0.02,1.02], ylim=[-0.02, 1.02],correlate=False, left_text=True, saveas=False):
    fig, ax = plt.subplots()
    
    if hue==None:
        sns.scatterplot(data=df, x=x, y=y, ax=ax)
    elif hue is not None:
        sns.scatterplot(data=df, x=x, y=y,hue=hue, ax=ax, legend=False)
        #sns.move_legend(ax, 'lower right')
        if hue=="_lefton_TE":
            legend_elements = [Line2D([0], [0], marker='o', color='w', label='Scatter',markerfacecolor='b', markersize=7),
                               Line2D([0], [0], marker='o', color='w', label='Scatter',markerfacecolor='g', markersize=7)]
            ax.legend(handles=legend_elements, labels=['no gene within 1kb','gene within 1kb or intersecting'])
        if hue=="_lefton_gene":
            legend_elements = [Line2D([0], [0], marker='o', color='w', label='Scatter',markerfacecolor='b', markersize=7),
                               Line2D([0], [0], marker='o', color='w', label='Scatter',markerfacecolor='g', markersize=7)]
            ax.legend(handles=legend_elements, labels=['no TE within 1kb','TE within 1kb or intersecting'])
      
        
    if correlate==True:
        r, p = get_correlation(df, x, y)
        
        if left_text==True:
            #print('putting correlation in text of paper')
            plt.text(.05, .8, "Pearson's r ={:.2f}".format(r), transform=ax.transAxes)
            #plt.text(.05, .75, "p value ={:.4f}".format(p), transform=ax.transAxes)
            #plt.text(.05, .7, "n="+str(len(df)), transform=ax.transAxes)
        elif left_text==False:
            #print('putting correlation in text of paper')
            plt.text(.65, .4, "Pearson's r ={:.2f}".format(r), transform=ax.transAxes)
            #plt.text(.65, .35, "p value ={:.4f}".format(p), transform=ax.transAxes)
            #plt.text(.65, .3, "n="+str(len(df)), transform=ax.transAxes)

        
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ax.set_xlabel(xlabel, fontsize=15)
    ax.set_ylabel(ylabel, fontsize=15)
    ax.tick_params(labelsize=12)
    
    print('r value ='+ str(r))

    print('p value ='+ str(p))

    if saveas!=False:
        print('plot saved')
        fig.savefig(saveas, dpi=350)
    else:
        print('plot not saved')
        return fig, ax
        plt.show()
            

### CG plots

In [None]:
scatter_dmrmatpat(r3_CGr_matpat_avg_mCG, x="ColxC24_mat", y="C24xCol_pat", xlabel="Col-0 maternal allele (avg fraction mCG)", ylabel="Col-0 paternal allele (avg fraction mCG)", correlate=True, saveas=figurepath+"r3_CGr_sumby_Colmatpat_scatter.png")

In [None]:
scatter_dmrmatpat(r3_CGr_matpat_avg_mCG, x="r3xr1_mat", y="r1xr3_pat", xlabel=r3it+" maternal allele (avg fraction mCG)", ylabel=r3it+" paternal allele (avg fraction mCG)", correlate=True, left_text=False, saveas=figurepath+"r3_CGr_sumby_r3matpat_scatter.png")

In [None]:
scatter_dmrmatpat(r3_CGr_matpat_avg_mCG, x="C24xCol_mat", y="ColxC24_pat",  xlabel="C24 maternal allele (avg fraction mCG)", ylabel="C24 paternal allele (avg fraction mCG)", correlate=True, left_text=False, saveas=figurepath+"r3_CGr_sumby_C24matpat_scatter.png")

In [None]:
scatter_dmrmatpat(r3_CGr_matpat_avg_mCG, x="r1xr3_mat", y="r3xr1_pat", xlabel=r1it+" maternal allele (avg fraction mCG)", ylabel=r1it+" paternal allele (avg fraction mCG)", correlate=True, left_text=False, saveas=figurepath+"r3_CGr_sumby_r1matpat_scatter.png")

### nonCG plots

In [None]:
scatter_dmrmatpat(r3_CHGr_matpat_avg_mCHG, x="ColxC24_mat", y="C24xCol_pat", xlabel="Col-0 maternal allele (avg fraction mCHG)", 
                  ylabel="Col-0 paternal allele (avg fraction mCHG)", hue=None, correlate=True, saveas=figurepath+"r3_CHGr_sumby_Colmatpat_scatter.png")



In [None]:
scatter_dmrmatpat(r3_CHGr_matpat_avg_mCHG, x="r3xr1_mat", y="r1xr3_pat", xlabel=r3it+" mat allele (avg fraction mCHG)", 
                  ylabel=r3it+ " pat allele (avg fraction mCHG)", hue=None, correlate=True, saveas=figurepath+"r3_CHGr_sumby_r3matpat_scatter.png")


In [None]:
scatter_dmrmatpat(r3_CHHr_matpat_avg_mCHH, x="ColxC24_mat", y="C24xCol_pat", xlabel="Col-0 maternal allele (avg fraction mCHH)", 
                  ylabel="Col-0 paternal allele (avg fraction mCHH)", hue=None, correlate=True, saveas=figurepath+"r3_CHHr_sumby_Colmatpat_scatter.png")



In [None]:
scatter_dmrmatpat(r3_CHHr_matpat_avg_mCHH, x="r3xr1_mat", y="r1xr3_pat", xlabel=r3it+" mat allele (avg fraction mCHH)", 
                  ylabel=r3it+" pat allele (avg fraction mCHH)", hue=None, correlate=True, saveas=figurepath+"r3_CHHr_sumby_r3matpat_scatter.png")


In [None]:
scatter_dmrmatpat(r1_CGr_matpat_avg_mCG, x="r3xr1_mat", y="r1xr3_pat", xlabel=r3it+" maternal allele (avg fraction mCG)", ylabel=r3it+" paternal allele (avg fraction mCG)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CGr_sumby_r3matpat_scatter.png")


scatter_dmrmatpat(r1_CGr_matpat_avg_mCG, x="r1xr3_mat", y="r3xr1_pat", xlabel=r1it+" maternal allele (avg fraction mCG)", ylabel=r1it+" paternal allele (avg fraction mCG)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CGr_sumby_r1matpat_scatter.png")


scatter_dmrmatpat(r1_CGr_matpat_avg_mCG, x="C24xCol_mat", y="ColxC24_pat", xlabel="C24 maternal allele (avg fraction mCG)", ylabel="C24 paternal allele (avg fraction mCG)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CGr_sumby_C24matpat_scatter.png")


scatter_dmrmatpat(r1_CGr_matpat_avg_mCG, x="ColxC24_mat", y="C24xCol_pat", xlabel="Col maternal allele (avg fraction mCG)", ylabel="Col paternal allele (avg fraction mCG)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CGr_sumby_Colmatpat_scatter.png")


In [None]:
scatter_dmrmatpat(r1_CHGr_matpat_avg_mCHG, x="r3xr1_mat", y="r1xr3_pat", xlabel=r3it+" maternal allele (avg fraction mCHG)", ylabel=r3it+" paternal allele (avg fraction mCHG)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CHGr_sumby_r3matpat_scatter.png")


scatter_dmrmatpat(r1_CHGr_matpat_avg_mCHG, x="r1xr3_mat", y="r3xr1_pat", xlabel=r1it+" maternal allele (avg fraction mCHG)", ylabel=r1it+" paternal allele (avg fraction mCHG)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CHGr_sumby_r1matpat_scatter.png")


scatter_dmrmatpat(r1_CHGr_matpat_avg_mCHG, x="C24xCol_mat", y="ColxC24_pat", xlabel="C24 maternal allele (avg fraction mCHG)", ylabel="C24 paternal allele (avg fraction mCHG)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CHGr_sumby_C24matpat_scatter.png")


scatter_dmrmatpat(r1_CHGr_matpat_avg_mCHG, x="ColxC24_mat", y="C24xCol_pat", xlabel="Col maternal allele (avg fraction mCHG)", ylabel="Col paternal allele (avg fraction mCHG)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CHGr_sumby_Colmatpat_scatter.png")


In [None]:
scatter_dmrmatpat(r1_CHHr_matpat_avg_mCHH, x="r3xr1_mat", y="r1xr3_pat", xlabel=r3it+" maternal allele (avg fraction mCHH)", ylabel=r3it+" paternal allele (avg fraction mCHH)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CHHr_sumby_r3matpat_scatter.png")


scatter_dmrmatpat(r1_CHHr_matpat_avg_mCHH, x="r1xr3_mat", y="r3xr1_pat", xlabel=r1it+" maternal allele (avg fraction mCHH)", ylabel=r1it+" paternal allele (avg fraction mCHH)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CHHr_sumby_r1matpat_scatter.png")


scatter_dmrmatpat(r1_CHHr_matpat_avg_mCHH, x="C24xCol_mat", y="ColxC24_pat", xlabel="C24 maternal allele (avg fraction mCHH)", ylabel="C24 paternal allele (avg fraction mCHH)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CHHr_sumby_C24matpat_scatter.png")


scatter_dmrmatpat(r1_CHHr_matpat_avg_mCHH, x="ColxC24_mat", y="C24xCol_pat", xlabel="Col maternal allele (avg fraction mCHH)", ylabel="Col paternal allele (avg fraction mCHH)", hue=None,correlate=True, left_text=False, saveas=figurepath+"r1_CHHr_sumby_Colmatpat_scatter.png")


# Allelic DMRs - expt 3

## setup and dfs

In [None]:
bed_columns1 = ['chr','start','end']

In [None]:
DMEdom=pd.read_csv(dmrpath+"WT_and_ros1_matVpat_hypo_allC.merge.bed", header=None, names=bed_columns1, sep=tab)
ROS1dom=pd.read_csv(dmrpath+"ros1_only_matVpat_hypo_allC.merge.bed", header=None, names=bed_columns1, sep=tab)
ROS1only=pd.read_csv(dmrpath+"ros1_only_NEITHERcheck_matVpat_hypo_allC.bed", header=None, names=bed_columns1, sep=tab)
ROS1bi=pd.read_csv(dmrpath+"ros1_v_wt_PatMat_hyper_allC_C.merge.bed", header=None, names=bed_columns1, sep=tab)
ROS1mat=pd.read_csv(dmrpath+"ros1_v_wt_matonly_hyper_allC.bed", header=None, names=bed_columns1, sep=tab)
ROS1pat=pd.read_csv(dmrpath+"ros1_v_wt_patonly_hyper_allC.bed", header=None, names=bed_columns1, sep=tab)


In [None]:
allR1=ROS1dom.merge(ROS1only, on=['chr','start','end'], how='left', indicator=True)
justR1dom=allR1[allR1['_merge']=='left_only']

In [None]:
DMEdom['length']=abs(DMEdom['end']-DMEdom['start'])
ROS1only['length']=abs(ROS1only['end']-ROS1only['start'])
justR1dom['length']=abs(justR1dom['end']-justR1dom['start'])

ROS1mat['length']=abs(ROS1mat['end']-ROS1mat['start'])
ROS1pat['length']=abs(ROS1pat['end']-ROS1pat['start'])
ROS1bi['length']=abs(ROS1bi['end']-ROS1bi['start'])


## numbers of interest

In [None]:
print("Fraction of ros1 biallelic or maternal hypermethylation over total hypermethylation: "+ str((np.sum(ROS1bi['length'])+np.sum(ROS1mat['length']))/(np.sum(ROS1mat['length'])+np.sum(ROS1bi['length'])+np.sum(ROS1pat['length']))))
print("Fraction of ros1 paternal hypermethylation over total hypermethylation: "+ str((np.sum(ROS1pat['length']))/(np.sum(ROS1mat['length'])+np.sum(ROS1bi['length'])+np.sum(ROS1pat['length']))))

In [None]:
print("Total length within a DME-dominant region (kb): " +str(np.sum(DMEdom['length'])/1000))
print("Total number of DME-dominant regions called by DSS: "+str(len(DMEdom)))

In [None]:
print("Total length within a confident ROS1-antagonized region (kb): " +str(np.sum(ROS1only['length'])/1000))
print("Total number of confident ROS1-antagonized regions called by DSS: "+str(len(ROS1only)))

## histogram of DMR length

In [None]:
fig=sns.histplot(data=DMEdom, x="length")
plt.savefig(figurepath+"DMEdom_length_histplot_nomax.png", dpi=350)


In [None]:
fig=sns.histplot(data=DMEdom[DMEdom['length']<=2000], x="length", binrange=[0,2000])
plt.savefig(figurepath+"DMEdom_length_histplot_max2000.png", dpi=350)


In [None]:
sns.histplot(data=ROS1only[ROS1only['length']<=2000], x="length", binrange=[0,2000])
plt.savefig(figurepath+"ros1needed_length_histplot_max2000.png", dpi=350)


# Ends analysis plotting

## ends plotting function

In [None]:
def methylends_format (df, plotselect, title, legend, ticklist, ticklabels, vlines, vlinescenter, ymin, ymax, yint, context='CG', sRNA=False):
    #special plot lists by context:
    if context=='CG':
        contextalt='CpG'
    else:
        contextalt=context
    print(context)
  
    matplot=['C24xCol_1_C24_pseudo_'+context+'_min5','C24xCol_2_C24_pseudo_'+context+'_min5','C24xCol_3_C24_pseudo_'+context+'_min5',
             'ColxC24_1_Col_spiked_'+context+'_min5','ColxC24_2_Col_spiked_'+context+'_min5','ColxC24_3_Col_spiked_'+context+'_min5',
             'r1xr3_1_C24_pseudo_'+context+'_min5','r1xr3_2_C24_pseudo_'+context+'_min5','r1xr3_3_C24_pseudo_'+context+'_min5',
             'r3xr1_1_Col_spiked_'+context+'_min5','r3xr1_2_Col_spiked_'+context+'_min5','r3xr1_3_Col_spiked_'+context+'_min5']
    patplot=['C24xCol_1_Col_spiked_'+context+'_min5','C24xCol_2_Col_spiked_'+context+'_min5','C24xCol_3_Col_spiked_'+context+'_min5',
             'ColxC24_1_C24_pseudo_'+context+'_min5','ColxC24_2_C24_pseudo_'+context+'_min5','ColxC24_3_C24_pseudo_'+context+'_min5',
             'r1xr3_1_Col_spiked_'+context+'_min5','r1xr3_2_Col_spiked_'+context+'_min5','r1xr3_3_Col_spiked_'+context+'_min5',
             'r3xr1_1_C24_pseudo_'+context+'_min5','r3xr1_2_C24_pseudo_'+context+'_min5','r3xr1_3_C24_pseudo_'+context+'_min5']
    scplot=['wt_sc_1_all_'+contextalt+'_min5_'+context+'con_pass_fixed','wt_sc_2_all_'+contextalt+'_min5_'+context+'con_pass_fixed',
            'ros1_sc_1_all_'+contextalt+'_min5_'+context+'con_pass_fixed','ros1_sc_2_all_'+contextalt+'_min5_'+context+'con_pass_fixed',
            'dme_sc_1_all_'+contextalt+'_min5_'+context+'con_pass_fixed','dme_sc_2_all_'+contextalt+'_min5_'+context+'con_pass_fixed',
            'dme_ros1_sc_1_all_'+contextalt+'_min5','dme_ros1_sc_2_all_'+contextalt+'_min5']
    hsiehplot=['wt_endo_hs_all_'+contextalt+'','dme_endo_hs_all_'+contextalt+'']
    endoplot=['wt_1_Col_spiked_'+context+'_min5','wt_2_Col_spiked_'+context+'_min5','wt_3_Col_spiked_'+context+'_min5',
              'r7_1_Col_spiked_'+context+'_min5','r7_2_Col_spiked_'+context+'_min5','r7_3_Col_spiked_'+context+'_min5',
              'r3_1_Col_spiked_'+context+'_min5','r3_2_Col_spiked_'+context+'_min5','r3_3_Col_spiked_'+context+'_min5']
    leafplot=['wt_1_leaf_Col_spiked_'+context+'_min5','wt_2_leaf_Col_spiked_'+context+'_min5','wt_3_leaf_Col_spiked_'+context+'_min5',
              'r7_1_leaf_Col_spiked_'+context+'_min5','r7_2_leaf_Col_spiked_'+context+'_min5','r7_3_leaf_Col_spiked_'+context+'_min5',
              'r3_1_leaf_Col_spiked_'+context+'_min5','r3_2_leaf_Col_spiked_'+context+'_min5','r3_3_leaf_Col_spiked_'+context+'_min5']
    sRNAplot=['embryo24nt','endo24nt']
    matpat_c24=[matplot[0],matplot[1],matplot[2],patplot[3],patplot[4],patplot[5],matplot[6],matplot[7],matplot[8],patplot[9],patplot[10],patplot[11]]
    matpat_c24_1rep=[matplot[0],patplot[3],matplot[6],patplot[9]]

    matpat_col=[matplot[3],matplot[4],matplot[5],patplot[0],patplot[1],patplot[2],matplot[9],matplot[10],matplot[11],patplot[6],patplot[7],patplot[8]]
    matpat_col_1rep=[matplot[3],patplot[0],matplot[9],patplot[6]]

    endo1rep=[endoplot[0], endoplot[3], endoplot[6]]
    bulkplot=endoplot+leafplot
    bulk_r3=[bulkplot[0], bulkplot[6], bulkplot[11], bulkplot[16]]
    bulkplot1rep=[bulkplot[0], bulkplot[4], bulkplot[6], bulkplot[11], bulkplot[13], bulkplot[16]]
    matpatplot=matplot+patplot
    matpat1rep=[matplot[0],matplot[6],patplot[0],patplot[6]]
    matpatscplot=matpatplot+scplot
    matpatsc1rep=[matpat1rep, scplot[0], scplot[2], scplot[4], scplot[6]]
    matpatschsiehplot=matpatscplot+hsiehplot
    matpatschsieh1rep=matpatsc1rep+hsiehplot
    allplot=matpatschsiehplot+bulkplot
    matpatdme=matpatplot+hsiehplot
    matpatdme1rep=matpat1rep+hsiehplot
    bulkdme=bulkplot+hsiehplot
    patscplot=patplot+scplot
    patsc1rep=[patplot[0], patplot[6], scplot[0], scplot[2], scplot[4], scplot[6]]
    matpatbulkplot=matpatplot+bulkplot
    matdmeplot=matplot+hsiehplot

    plotdict={'dme':hsiehplot,'matpat':matpatplot, 'matpat1rep':matpat1rep, 'matpatsc':matpatscplot, 'matpatsc1rep':matpatsc1rep,
             'matpatscdme':matpatschsiehplot, 'matpatdme':matpatdme, 'matpatdme1rep': matpatdme1rep, 'patsc':patscplot, 'patsc1rep':patsc1rep, 'sRNA':sRNAplot,
             'bulk':bulkplot, 'bulk1rep':bulkplot1rep, 'endo':endoplot, 'endo1rep':endo1rep, 'bulk_r3':bulk_r3, 'matpatcol':matpat_col, 'matpatc24':matpat_c24,
             'col1rep':matpat_col_1rep, 'c241rep':matpat_c24_1rep}
    plotlist=plotdict.get(plotselect)
    
    endscooler={'C24xCol_1_C24_pseudo_'+context+'_min5': '#241023',
     'C24xCol_2_C24_pseudo_'+context+'_min5': '#241023',
     'C24xCol_3_C24_pseudo_'+context+'_min5': '#241023',
     'ColxC24_1_Col_spiked_'+context+'_min5': '#241023',
     'ColxC24_2_Col_spiked_'+context+'_min5': '#241023',
     'ColxC24_3_Col_spiked_'+context+'_min5': '#241023',
     'r1xr3_1_C24_pseudo_'+context+'_min5': '#2B9EB3',
     'r1xr3_2_C24_pseudo_'+context+'_min5': '#2B9EB3',
     'r1xr3_3_C24_pseudo_'+context+'_min5': '#2B9EB3',
     'r3xr1_1_Col_spiked_'+context+'_min5': '#2B9EB3',
     'r3xr1_2_Col_spiked_'+context+'_min5': '#2B9EB3',
     'r3xr1_3_Col_spiked_'+context+'_min5': '#2B9EB3',
     'C24xCol_1_Col_spiked_'+context+'_min5': '#ccb301',
     'C24xCol_2_Col_spiked_'+context+'_min5': '#ccb301',
     'C24xCol_3_Col_spiked_'+context+'_min5': '#ccb301',
     'ColxC24_1_C24_pseudo_'+context+'_min5': '#ccb301',
     'ColxC24_2_C24_pseudo_'+context+'_min5': '#ccb301',
     'ColxC24_3_C24_pseudo_'+context+'_min5': '#ccb301',
     'r1xr3_1_Col_spiked_'+context+'_min5': '#6aba58',
     'r1xr3_2_Col_spiked_'+context+'_min5': '#6aba58',
     'r1xr3_3_Col_spiked_'+context+'_min5': '#6aba58',
     'r3xr1_1_C24_pseudo_'+context+'_min5': '#6aba58',
     'r3xr1_2_C24_pseudo_'+context+'_min5': '#6aba58',
     'r3xr1_3_C24_pseudo_'+context+'_min5': '#6aba58',
     'wt_sc_1_all_'+contextalt+'_min5_'+context+'con_pass_fixed': '#F8333C',
     'wt_sc_2_all_'+contextalt+'_min5_'+context+'con_pass_fixed': '#F8333C',
     'ros1_sc_1_all_'+contextalt+'_min5_'+context+'con_pass_fixed': '#ADD9F4',
     'ros1_sc_2_all_'+contextalt+'_min5_'+context+'con_pass_fixed': '#ADD9F4',
     'dme_sc_1_all_'+contextalt+'_min5_'+context+'con_pass_fixed': '#F7B2BD',
     'dme_sc_2_all_'+contextalt+'_min5_'+context+'con_pass_fixed': '#F7B2BD',
     'dme_ros1_sc_1_all_'+contextalt+'_min5': '#3F826D',
     'dme_ros1_sc_2_all_'+contextalt+'_min5': '#3F826D',
     'wt_endo_hs_all_'+contextalt+'': '#ffa600',
     'dme_endo_hs_all_'+contextalt+'': '#FFFC31',
     'wt_1_Col_spiked_'+context+'_min5': '#66c2a5',
     'wt_2_Col_spiked_'+context+'_min5': '#66c2a5',
     'wt_3_Col_spiked_'+context+'_min5': '#66c2a5',
     'r7_1_Col_spiked_'+context+'_min5': '#fc8d62',
     'r7_2_Col_spiked_'+context+'_min5': '#fc8d62',
     'r7_3_Col_spiked_'+context+'_min5': '#fc8d62',
     'r3_1_Col_spiked_'+context+'_min5': '#8da0cb',
     'r3_2_Col_spiked_'+context+'_min5': '#8da0cb',
     'r3_3_Col_spiked_'+context+'_min5': '#8da0cb',
     'rdd_1_Col_spiked_'+context+'_min5': '#60A561',
     'rdd_2_Col_spiked_'+context+'_min5': '#60A561',
     'rdd_3_Col_spiked_'+context+'_min5': '#60A561',
                'embryo24nt':'#022B3A','endo24nt':'#DA3548',
                'r7_1_leaf_Col_spiked_'+context+'_min5': "#a6d854",
                'r7_2_leaf_Col_spiked_'+context+'_min5': "#a6d854",
                'r7_3_leaf_Col_spiked_'+context+'_min5': "#a6d854",
                'wt_1_leaf_Col_spiked_'+context+'_min5': '#e78ac3',
                'wt_2_leaf_Col_spiked_'+context+'_min5': '#e78ac3',
                'wt_3_leaf_Col_spiked_'+context+'_min5': '#e78ac3',

                'r3_1_leaf_Col_spiked_'+context+'_min5': "#ffd92f",
                'r3_2_leaf_Col_spiked_'+context+'_min5': "#ffd92f",
                'r3_3_leaf_Col_spiked_'+context+'_min5': "#ffd92f"
               }

    #df set up to include plostlis samples
    dfplot=df[df['sample'].isin(plotlist)]

    #dfplot.to_csv(endsout+"testdf.txt", sep=tab)
    #make seaborn lineplot
    sns.set(style="ticks",font="Arial")
    if sRNA!=False:
        plot='mC'
    else:
        plot='mC'
        
    window = dfplot['window']
    avgmethy = dfplot[plot]
    hue = dfplot['sample']
    plt.figure(figsize=(12,10))

    ax= sns.lineplot(x=window, y=avgmethy, hue=hue, palette=endscooler, linewidth=2)

    #plt.title(title, fontsize=20)
    ax.set_xlabel(xlabel="")
    ax.set_ylabel(ylabel="average "+context+" methylation", fontsize=30)
    
    if sRNA!=False:
        ax.set_ylabel(ylabel="average single-base comp 24nt sRNA", fontsize=30)
        
    #custom xticks for boundaries
    plt.xticks(ticks=ticklist, labels=ticklabels, fontsize=28)
    plt.xticks(rotation=50)
    plt.yticks(fontsize=28)
    
    if yint==None:
        yint=0
    else:
        yint=yint
        
    ax.set_yticks(ticks=np.arange(ymin, ymax, yint, dtype=float))

    #custom lines to denote boundries clearly
    plt.vlines(x=vlines, ymin=ymin, ymax=ymax, colors='green', ls=':', lw=2)
    plt.vlines(x=vlinescenter, ymin=ymin, ymax=ymax, colors='black', lw=2)
   
    #legend
    
    handle_list=[]
    for key, value in legend.items():
        newsample = mlines.Line2D([], [], color=value, markersize=12, label=key, linewidth=2)
        handle_list.append(newsample)

    ax.legend(handles=handle_list, fontsize=18, loc='upper left')
  
    plt.tight_layout()

    return fig,ax


## lists and dicts

In [None]:
# lists to create ticks with ends distance and vertical lines for boundaries
in500_out3000_50bp_ticks=[1, 60, 70.5, 80, 140]

in500_out3000_50bp_ticks_labels=['-3kb', "5' DMR", '+/- 500bp', "3' DMR", '+3kb']

in500_out3000_50bp_vlines=[1, 60, 80, 140]
in500_out3000_50bp_vlines_center =[70.5]

in500_out3000_50bp_moreticks=[1, 10, 20, 30, 40, 50, 60, 70.5, 80, 90, 100, 110, 120, 130, 140]
in500_out3000_50bp_ticks_morelabels=['-3kb','-2.5kb','-2kb','-1.5kb','-1kb','-0.5kb', "5' TEmerge", '+/- 500bp', "3' TEmerge",'+0.5kb','+1kb','+1.5kb','+2kb','+2.5kb','+3kb']


# lines for smaller x axis from bigger ends data
in200_out1000_50bp_ticks=[1, 20, 24.5, 29, 48]

in200_out1000_50bp_ticks_DMR_labels=['-1kb', "5' DMR", '+/- 200bp', "3' DMR", '+1kb']
in200_out1000_50bp_ticks_DSR_labels=['-1kb', "5' DSR", '+/- 200bp', "3' DSR", '+1kb']

in200_out1000_50bp_vlines=[1, 20, 29, 48]
in200_out1000_50bp_vlines_center =[24.5]

# lines for smaller x axis from bigger ends data
in400_out1000_50bp_ticks=[1, 20, 28, 36, 56]

in400_out1000_50bp_ticks_labels=['-1kb', "5' DMR", '+/- 400bp', "3' DMR", '+1kb']

in400_out1000_50bp_vlines=[1, 20, 36, 56]
in400_out1000_50bp_vlines_center =[28]


# 
in1000_out2000_100bp_ticks=[1, 20, 30, 40, 60]
in1000_out2000_100bp_tick_labels_gene=['-2kb', "5' gene", '+/- 1kb', "3' gene", '+2kb']
in1000_out2000_100bp_vlines=[1, 20, 40, 60]
in1000_out2000_100bp_vlines_center=[30]

#
in2000_out2000_100bp_ticks=[1, 21, 41, 60, 80]
in2000_out2000_100bp_tick_labels_gene=['-2kb', "5' gene", '+/- 2kb', "3' gene", '+2kb']
in2000_out2000_100bp_vlines=[1, 21, 60, 80]
in2000_out2000_100bp_vlines_center=[41]

# 

in1000_out2000_100bp_tick_labels_TE=['-2kb', "TE start", '+/- 1kb', "TE end", '+2kb']


#

in2000_out2000_100bp_tick_labels_TE=['-2kb', "TE start", '+/- 2kb', "TE end", '+2kb']



# lines for smaller x axis from bigger ends data
in100_out1000_50bp_ticks=[1, 20, 22, 24, 44]

in100_out1000_50bp_ticks_DMR_labels=['-1kb', "5' DMR", '+/- 100bp', "3' DMR", '+1kb']
in100_out1000_50bp_ticks_DSR_labels=['-1kb', "5' DSR", '+/- 100bp', "3' DSR", '+1kb']

in100_out1000_50bp_vlines=[1, 20, 24, 44]
in100_out1000_50bp_vlines_center =[22]




In [None]:
# ends legend dictionaries
matpatlegend={'C24xCol maternal': '#241023',
              'C24xCol paternal': '#ccb301',
              'r1xr3 maternal': '#2B9EB3',
              'r1xr3 paternal': '#6aba58'}

matpatlegend1rep={'C24xCol maternal': '#241023',
              'C24xCol paternal': '#ccb301',
              'r1xr3 maternal': '#2B9EB3',
              'r1xr3 paternal': '#6aba58'}

matpatschsiehlegend={'C24xCol recip maternal': '#241023',
              'C24xCol recip paternal': '#ccb301',
              'r1xr3 recip maternal': '#2B9EB3',
              'r1xr3 recip paternal': '#6aba58',
              'Col-0 sperm': '#F8333C',
              'ros1 sperm': '#ADD9F4',
              'dme sperm': '#F7B2BD',
              'dme;ros1 sperm': '#3F826D',
              'WT endosperm Hsieh et al': '#ffa600',
              'dme endosperm Hsieh et al': '#FFFC31'}
              

patsclegend={'C24xCol recip paternal': '#ccb301',
             'r1xr3 recip paternal': '#6aba58',
             'Col-0 sperm': '#F8333C',
             'ros1 sperm': '#ADD9F4',
             'dme/+ sperm': '#F7B2BD',
             'dme/+;ros1 sperm': '#3F826D'}   

matpatdmelegend={'C24xCol recip maternal': '#241023',
                 'C24xCol recip paternal': '#ccb301',
                 'r1xr3 recip maternal': '#2B9EB3',
                 'r1xr3 recip paternal': '#6aba58',
                 'WT endosperm*': '#ffa600','dme endosperm*': '#FFFC31'}
 
matdmelegend={'C24xCol recip maternal': '#241023',
              'r1xr3 recip maternal': '#2B9EB3',
              'WT endosperm*': '#ffa600','dme endosperm*': '#FFFC31'}

patsclegend1rep={'C24xCol paternal': '#ccb301',
             'r1xr3 paternal': '#6aba58',
             'Col-0 sperm': '#F8333C',
             'ros1 sperm': '#ADD9F4',
             'dme/+ sperm': '#F7B2BD',
             'dme/+;ros1 sperm': '#3F826D'}   

matpatdmelegend1rep={'C24xCol maternal': '#241023',
                 'C24xCol paternal': '#ccb301',
                 'r1xr3 maternal': '#2B9EB3',
                 'r1xr3 paternal': '#6aba58',
                 'WT endosperm*': '#ffa600','dme endosperm*': '#FFFC31'}
dmeit="$\it{dme}$"
dmelegend={'WT endosperm': '#ffa600', dmeit+' endosperm': '#FFFC31'}

srnalegend={'embryo 24nt':'#022B3A','endo 24nt':'#DA3548'}


bulklegend={'Col-0 endosperm': '#66c2a5',
              r7it+' endosperm': '#fc8d62',
              r3it+' endosperm': '#8da0cb',
              'Col-0 leaf': '#e78ac3',
              r7it+' leaf': '#a6d854',
              r3it+' leaf': '#ffd92f'}

endolegend={'Col-0 endosperm': '#66c2a5',
              'ros1-7 endosperm': '#fc8d62',
              'ros1-3 endosperm': '#8da0cb'}

bulkr3legend={'Col-0 endosperm': '#66c2a5',
              r3it+' endosperm': '#8da0cb',
              'Col-0 leaf': '#e78ac3',
              r3it+' leaf': '#ffd92f'}

matpat_collegend={'Col maternal': '#241023',
                 'Col paternal': '#ccb301',
                 r3it+" maternal": '#2B9EB3',
                 r3it+" paternal": '#6aba58'}
matpat_c24legend={'C24 maternal': '#241023',
                 'C24 paternal': '#ccb301',
                 r1it+" maternal": '#2B9EB3',
                 r1it+" paternal": '#6aba58'}


## read in data

In [None]:
endscol=['window','mC','sample']

#ends paths:
mCends=path+"ends_analysis/results_underlying_figs_published/mC_summed/"
sRNAends=path+"ends_analysis/results_underlying_figs_published/sRNAs_summed/"

def read_ends_data(filepath):
    df=pd.read_csv(filepath, header=None, sep=tab, names=endscol)
    df['sample'] = df['sample'].str.replace('.bed', '')
    
    return df


In [None]:
# DMR ends
# allelic DMRs
dmedom_CG=read_ends_data(mCends+'dme_dom_CG_in400out1kb_average_counts_filt.txt')
dmedom_CHG=read_ends_data(mCends+'dme_dom_CHG_in400out1kb_average_counts_filt.txt')
dmedom_CHH=read_ends_data(mCends+'dme_dom_CHH_in400out1kb_average_counts_filt.txt')

ros1ant_CG=read_ends_data(mCends+'ros1needed_CG_in200out1kb_average_counts_filt.txt')
ros1ant_CHG=read_ends_data(mCends+'ros1needed_CHG_in200out1kb_average_counts_filt.txt')
ros1ant_CHH=read_ends_data(mCends+'ros1needed_CHH_in200out1kb_average_counts_filt.txt')

#bulk DMRs
r3_allCr_mCG_ends=read_ends_data(mCends+"r3_v_wt_allC_hyper_allC_allCreg_CG_average_counts_filt.txt")
r7_allCr_mCG_ends=read_ends_data(mCends+"r7_v_wt_allC_hyper_allC_allCreg_CG_average_counts_filt.txt")

r3_allCr_mCHG_ends=read_ends_data(mCends+"r3_v_wt_allC_hyper_allC_allCreg_CHG_average_counts_filt.txt")
r7_allCr_mCHG_ends=read_ends_data(mCends+"r7_v_wt_allC_hyper_allC_allCreg_CHG_average_counts_filt.txt")

r3_allCr_mCHH_ends=read_ends_data(mCends+"r3_v_wt_allC_hyper_allC_allCreg_CHH_average_counts_filt.txt")
r7_allCr_mCHH_ends=read_ends_data(mCends+"r7_v_wt_allC_hyper_allC_allCreg_CHH_average_counts_filt.txt")


### features nearby

In [None]:
# genes near DMRs - in 1kb, out 2kb, window 100
all_genes_mCG_ends=read_ends_data(mCends+"genes_gene_CG_average_counts_filt.txt")
all_genes_mCHG_ends=read_ends_data(mCends+"genes_gene_CHG_average_counts_filt.txt")
all_genes_mCHH_ends=read_ends_data(mCends+"genes_gene_CHH_average_counts_filt.txt")

# allelic DMRs

#bulk DMRs
r3_genes_mCG_ends=read_ends_data(mCends+"genes_1kb_r3_hyper_gene_CG_average_counts_filt.txt")
r7_genes_mCG_ends=read_ends_data(mCends+"genes_1kb_r7_hyper_gene_CG_average_counts_filt.txt")

r3_genes_mCHG_ends=read_ends_data(mCends+"genes_1kb_r3_hyper_gene_CHG_average_counts_filt.txt")
r3_genes_mCHH_ends=read_ends_data(mCends+"genes_1kb_r3_hyper_gene_CHH_average_counts_filt.txt")

r7_genes_mCHG_ends=read_ends_data(mCends+"genes_1kb_r7_hyper_gene_CHG_average_counts_filt.txt")
r7_genes_mCHH_ends=read_ends_data(mCends+"genes_1kb_r7_hyper_gene_CHH_average_counts_filt.txt")

NOTr3_genes_mCG_ends=read_ends_data(mCends+"genes_NOT_1kb_r3_hyper_gene_CG_average_counts_filt.txt")
NOTr7_genes_mCG_ends=read_ends_data(mCends+"genes_NOT_1kb_r7_hyper_gene_CG_average_counts_filt.txt")

NOTr3_genes_mCHG_ends=read_ends_data(mCends+"genes_NOT_1kb_r3_hyper_gene_CHG_average_counts_filt.txt")
NOTr3_genes_mCHH_ends=read_ends_data(mCends+"genes_NOT_1kb_r3_hyper_gene_CHH_average_counts_filt.txt")

NOTr7_genes_mCHG_ends=read_ends_data(mCends+"genes_NOT_1kb_r7_hyper_gene_CHG_average_counts_filt.txt")
NOTr7_genes_mCHH_ends=read_ends_data(mCends+"genes_NOT_1kb_r7_hyper_gene_CHH_average_counts_filt.txt")

In [None]:
# TEs near DMRs
r3_TEs_mCG_ends=read_ends_data(mCends+"TEmerge_1kb_r3_hyper_TE_CG_average_counts_filt.txt")
r7_TEs_mCG_ends=read_ends_data(mCends+"TEmerge_1kb_r7_hyper_TE_CG_average_counts_filt.txt")

r3_TEs_mCHG_ends=read_ends_data(mCends+"TEmerge_1kb_r3_hyper_TE_CHG_average_counts_filt.txt")
r7_TEs_mCHG_ends=read_ends_data(mCends+"TEmerge_1kb_r7_hyper_TE_CHG_average_counts_filt.txt")

r3_TEs_mCHH_ends=read_ends_data(mCends+"TEmerge_1kb_r3_hyper_TE_CHH_average_counts_filt.txt")
r7_TEs_mCHH_ends=read_ends_data(mCends+"TEmerge_1kb_r7_hyper_TE_CHH_average_counts_filt.txt")

NOTr3_TEs_mCG_ends=read_ends_data(mCends+"TEmerge_NOT_1kb_r3_hyper_TE_CG_average_counts_filt.txt")
NOTr7_TEs_mCG_ends=read_ends_data(mCends+"TEmerge_NOT_1kb_r7_hyper_TE_CG_average_counts_filt.txt")

NOTr3_TEs_mCHG_ends=read_ends_data(mCends+"TEmerge_NOT_1kb_r3_hyper_TE_CHG_average_counts_filt.txt")
NOTr7_TEs_mCHG_ends=read_ends_data(mCends+"TEmerge_NOT_1kb_r7_hyper_TE_CHG_average_counts_filt.txt")

NOTr3_TEs_mCHH_ends=read_ends_data(mCends+"TEmerge_NOT_1kb_r3_hyper_TE_CHH_average_counts_filt.txt")
NOTr7_TEs_mCHH_ends=read_ends_data(mCends+"TEmerge_NOT_1kb_r7_hyper_TE_CHH_average_counts_filt.txt")

### sRNAs

In [None]:
# sRNA ends
r3_TEs_24nt_ends=read_ends_data(sRNAends+"TEmerge_1kb_r3_hyper_TE_24ntsRNA_average_counts_filt.txt")
r7_TEs_24nt_ends=read_ends_data(sRNAends+"TEmerge_1kb_r7_hyper_TE_24ntsRNA_average_counts_filt.txt")

NOTr3_TEs_24nt_ends=read_ends_data(sRNAends+"TEmerge_NOT_1kb_r3_hyper_TE_24ntsRNA_average_counts_filt.txt")
NOTr7_TEs_24nt_ends=read_ends_data(sRNAends+"TEmerge_NOT_1kb_r7_hyper_TE_24ntsRNA_average_counts_filt.txt")

endoDSR_24nt_ends=read_ends_data(sRNAends+"endosperm_dsr_erdmann_DSR_24ntsRNA_average_counts_filt.txt")
embryoDSR_24nt_ends=read_ends_data(sRNAends+"embryo_dsr_erdmann_DSR_24ntsRNA_average_counts_filt.txt")


In [None]:
# DSRs

endoDSR_mCG_ends=read_ends_data(mCends+"endosperm_dsr_erdmann_DSR_CG_average_counts_filt.txt")

endoDSR_mCHG_ends=read_ends_data(mCends+"endosperm_dsr_erdmann_DSR_CHG_average_counts_filt.txt")

endoDSR_mCHH_ends=read_ends_data(mCends+"endosperm_dsr_erdmann_DSR_CHH_average_counts_filt.txt")


embryoDSR_mCG_ends=read_ends_data(mCends+"embryo_dsr_erdmann_DSR_CG_average_counts_filt.txt")

embryoDSR_mCHG_ends=read_ends_data(mCends+"embryo_dsr_erdmann_DSR_CHG_average_counts_filt.txt")

embryoDSR_mCHH_ends=read_ends_data(mCends+"embryo_dsr_erdmann_DSR_CHH_average_counts_filt.txt")



## Figure: plotting ends analysis over TEs and genes

#### all genes

In [None]:
methylends_format(all_genes_mCG_ends, 'bulk1rep', 'all genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 51, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'allgenes_mCG_ends_bulk1rep.pdf', dpi=350)



In [None]:
methylends_format(all_genes_mCG_ends, 'bulk', 'all genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 51, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'allgenes_mCG_ends_bulk.pdf', dpi=350)


In [None]:
methylends_format(all_genes_mCHG_ends, 'bulk', 'all genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 6, 1, context='CHG', sRNA=False)
plt.savefig(figurepath+'allgenes_mCHG_ends_bulk.pdf', dpi=350)


In [None]:
methylends_format(all_genes_mCHH_ends, 'bulk', 'all genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 6, 1, context='CHH', sRNA=False)
plt.savefig(figurepath+'allgenes_mCHH_ends_bulk.pdf', dpi=350)


#### ros1 genes

In [None]:
methylends_format(r3_genes_mCG_ends, 'bulk1rep', 'r3 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 51, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r3_genes_mCG_ends_bulk1rep.pdf', dpi=350)



In [None]:
methylends_format(r3_genes_mCG_ends, 'bulk', 'r3 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 51, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r3_genes_mCG_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(r3_genes_mCHG_ends, 'bulk', 'r3 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 15, 2, context='CHG', sRNA=False)
plt.savefig(figurepath+'r3_genes_mCHG_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(r3_genes_mCHH_ends, 'bulk', 'r3 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 6.5, 1, context='CHH', sRNA=False)
plt.savefig(figurepath+'r3_genes_mCHH_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(r7_genes_mCG_ends, 'bulk1rep', 'r7 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 51, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r7_genes_mCG_ends_bulk1rep.pdf', dpi=350)




In [None]:
methylends_format(r7_genes_mCG_ends, 'bulk', 'r7 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 51, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r7_genes_mCG_ends_bulk.pdf', dpi=350)




In [None]:
methylends_format(r7_genes_mCHG_ends, 'bulk', 'r7 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 15, 2, context='CHG', sRNA=False)
plt.savefig(figurepath+'r7_genes_mCHG_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(r7_genes_mCHH_ends, 'bulk', 'r7 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 6.5, 1, context='CHH', sRNA=False)
plt.savefig(figurepath+'r7_genes_mCHH_ends_bulk.pdf', dpi=350)



#### notros1 genes

In [None]:
methylends_format(NOTr3_genes_mCG_ends, 'bulk1rep', 'r3 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 51, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'NOT_r3_genes_mCG_ends_bulk1rep.pdf', dpi=350)



In [None]:
methylends_format(NOTr3_genes_mCG_ends, 'bulk', 'r3 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 51, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'NOT_r3_genes_mCG_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(NOTr3_genes_mCHG_ends, 'bulk', 'r3 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 15, 2, context='CHG', sRNA=False)
plt.savefig(figurepath+'NOT_r3_genes_mCHG_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(NOTr3_genes_mCHH_ends, 'bulk', 'r3 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 6.5, 1, context='CHH', sRNA=False)
plt.savefig(figurepath+'NOT_r3_genes_mCHH_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(NOTr7_genes_mCG_ends, 'bulk1rep', 'r7 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 51, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'NOT_r7_genes_mCG_ends_bulk1rep.pdf', dpi=350)




In [None]:
methylends_format(NOTr7_genes_mCG_ends, 'bulk', 'r7 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 51, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'NOT_r7_genes_mCG_ends_bulk.pdf', dpi=350)




In [None]:
methylends_format(NOTr7_genes_mCHG_ends, 'bulk', 'r7 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 15, 2, context='CHG', sRNA=False)
plt.savefig(figurepath+'NOT_r7_genes_mCHG_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(NOTr7_genes_mCHH_ends, 'bulk', 'r7 genes', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_gene,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 6.5, 1, context='CHH', sRNA=False)
plt.savefig(figurepath+'NOT_r7_genes_mCHH_ends_bulk.pdf', dpi=350)



#### r3 TEs

##### mC

In [None]:
figurepath="/lab/solexa_gehring/elizabeth/ros1_endo_code_and_underlying_data/test_endsout/"

In [None]:
fig,ax=methylends_format(r3_TEs_mCG_ends, 'bulk', 'r3 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'r3_TEfrag_mCG_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(r3_TEs_mCG_ends, 'bulk1rep', 'r3 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r3_TEfrag_mCG_ends_bulk1rep.pdf', dpi=350)



In [None]:
methylends_format(r3_TEs_mCG_ends, 'bulk_r3', 'r3 TEs', bulkr3legend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r3_TEfrag_mCG_ends_bulkr3_1rep.pdf', dpi=350)



In [None]:
methylends_format(r3_TEs_mCG_ends, 'endo', 'r3 TEs', endolegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r3_TEfrag_mCG_ends_endo_bulk.pdf', dpi=350)



In [None]:
methylends_format(r3_TEs_mCG_ends, 'endo1rep', 'r3 TEs', endolegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r3_TEfrag_mCG_ends_bulkendo1rep.pdf', dpi=350)



In [None]:
methylends_format(NOTr3_TEs_mCG_ends, 'endo1rep', 'not r3 TEs', endolegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'NOT_r3_TEfrag_mCG_ends_bulkendo1rep.pdf', dpi=350)



In [None]:
fig,ax=methylends_format(NOTr3_TEs_mCG_ends, 'bulk', 'not r3 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'NOT_r3_TEfrag_mCG_ends_bulk.pdf', dpi=350)



In [None]:
fig,ax=methylends_format(r3_TEs_mCHG_ends, 'endo', 'r3 TEs', endolegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 61, 10, context='CHG', sRNA=False)
plt.savefig(figurepath+'r3_TEfrag_mCHG_ends_endo_bulk.pdf', dpi=350)



In [None]:
fig,ax=methylends_format(r3_TEs_mCHG_ends, 'bulk', 'r3 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 61, 10, context='CHG', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'r3_TEfrag_mCHG_ends_bulk.pdf', dpi=350)



In [None]:
fig,ax=methylends_format(NOTr3_TEs_mCHG_ends, 'bulk', 'not r3 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 61, 10, context='CHG', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'NOT_r3_TEfrag_mCHG_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(r3_TEs_mCHH_ends, 'endo', 'r3 TEs', endolegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 21, 2, context='CHH', sRNA=False)
plt.savefig(figurepath+'r3_TEfrag_mCHH_ends_endo_bulk.pdf', dpi=350)



In [None]:
fig,ax=methylends_format(r3_TEs_mCHH_ends, 'bulk', 'r3 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 21, 2, context='CHH', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'r3_TEfrag_mCHH_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(NOTr3_TEs_mCHH_ends, 'bulk', 'not r3 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 21, 2, context='CHH', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'NOT_r3_TEfrag_mCHH_ends_bulk.pdf', dpi=350)



##### sRNA

In [None]:
methylends_format(r3_TEs_24nt_ends, 'sRNA', 'r3 TEs', srnalegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, .026, .005, context='CG', sRNA=True)
plt.savefig(figurepath+'r3_TEfrag_sRNA_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(r7_TEs_24nt_ends, 'sRNA', 'r7 TEs', srnalegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, .036, .005, context='CG', sRNA=True)
plt.savefig(figurepath+'r7_TEfrag_sRNA_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(NOTr3_TEs_24nt_ends, 'sRNA', 'r3 TEs', srnalegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, .026, .005, context='CG', sRNA=True)
plt.savefig(figurepath+'NOT_r3_TEfrag_sRNA_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(NOTr7_TEs_24nt_ends, 'sRNA', 'r7 TEs', srnalegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, .026, .005, context='CG', sRNA=True)
plt.savefig(figurepath+'NOT_r7_TEfrag_sRNA_ends_bulk.pdf', dpi=350)



#### r7 TEs

In [None]:
fig,ax=methylends_format(r7_TEs_mCG_ends, 'bulk1rep', 'r7 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r7_TEfrag_mCG_ends_bulk1rep.pdf', dpi=350)



In [None]:
fig,ax=methylends_format(r7_TEs_mCG_ends, 'bulk', 'r7 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'r7_TEfrag_mCG_ends_bulk.pdf', dpi=350)



In [None]:
fig,ax=methylends_format(NOTr7_TEs_mCG_ends, 'bulk', 'not r7 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'NOT_r7_TEfrag_mCG_ends_bulk.pdf', dpi=350)



In [None]:
methylends_format(r7_TEs_mCHG_ends, 'bulk', 'r7 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 61, 10, context='CHG', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'r7_TEfrag_mCHG_ends_bulk.pdf', dpi=350)



In [None]:
fig,ax=methylends_format(NOTr7_TEs_mCHG_ends, 'bulk', 'not r7 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 61, 10, context='CHG', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'NOT_r7_TEfrag_mCHG_ends_bulk.pdf', dpi=350)



In [None]:
fig,ax=methylends_format(r7_TEs_mCHH_ends, 'bulk', 'r7 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 21, 2, context='CHH', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'r7_TEfrag_mCHH_ends_bulk.pdf', dpi=350)



In [None]:
fig,ax=methylends_format(NOTr7_TEs_mCHH_ends, 'bulk', 'not r7 TEs', bulklegend, in2000_out2000_100bp_ticks, in2000_out2000_100bp_tick_labels_TE,
                  in2000_out2000_100bp_vlines, in2000_out2000_100bp_vlines_center, 0, 21, 2, context='CHH', sRNA=False)
plt.tight_layout()
plt.savefig(figurepath+'NOT_r7_TEfrag_mCHH_ends_bulk.pdf', dpi=350)



#### DSR ends

In [None]:
methylends_format(endoDSR_24nt_ends, 'sRNA', '', srnalegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 0.015, .001, context='CG', sRNA=True)
plt.savefig(figurepath+'endoDSR_24nt_ends.pdf', dpi=350)



In [None]:
methylends_format(embryoDSR_24nt_ends, 'sRNA', '', srnalegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 0.056, .01, context='CG', sRNA=True)
plt.savefig(figurepath+'embryoDSR_24nt_ends.pdf', dpi=350)



In [None]:
methylends_format(embryoDSR_mCG_ends, 'endo1rep', '', endolegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'embryoDSR_mCG_ends_bulk1rep.pdf', dpi=350)



In [None]:
methylends_format(embryoDSR_mCHG_ends, 'endo1rep', '', endolegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 101, 10, context='CHG', sRNA=False)
plt.savefig(figurepath+'embryoDSR_mCHG_ends_bulk1rep.pdf', dpi=350)


In [None]:
methylends_format(embryoDSR_mCHH_ends, 'endo1rep', '', endolegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 14.1, 1, context='CHH', sRNA=False)
plt.savefig(figurepath+'embryoDSR_mCHH_ends_bulk1rep.pdf', dpi=350)


In [None]:
methylends_format(endoDSR_mCHH_ends, 'endo', '', endolegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 6.1, 1, context='CHH', sRNA=False)
plt.savefig(figurepath+'endoDSR_mCHH_ends_bulk1rep.pdf', dpi=350)



In [None]:
methylends_format(endoDSR_mCHG_ends, 'endo1rep', '', endolegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 12.1, 1, context='CHG', sRNA=False)
plt.savefig(figurepath+'endoDSR_mCHG_ends_bulk1rep.pdf', dpi=350)



In [None]:
methylends_format(endoDSR_mCG_ends, 'endo1rep', '', endolegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 30.1, 5, context='CG', sRNA=False)
plt.savefig(figurepath+'endoDSR_mCG_ends_bulk1rep.pdf', dpi=350)


In [None]:
methylends_format(endoDSR_mCG_ends, 'matpatcol', '', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 36,5, context='CG', sRNA=False)
plt.savefig(figurepath+'endoDSR_mCG_ends_allelic_ColG_allreps.pdf', dpi=350)


In [None]:
methylends_format(endoDSR_mCG_ends, 'col1rep', '', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 36,5, context='CG', sRNA=False)
plt.savefig(figurepath+'endoDSR_mCG_ends_allelic_ColG_1reps.pdf', dpi=350)


In [None]:
methylends_format(endoDSR_mCHG_ends, 'matpatcol', '', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 13,2, context='CHG', sRNA=False)
plt.savefig(figurepath+'endoDSR_mCHG_ends_allelic_ColG_allreps.pdf', dpi=350)


In [None]:
methylends_format(endoDSR_mCHG_ends, 'col1rep', '', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 13,2, context='CHG', sRNA=False)
plt.savefig(figurepath+'endoDSR_mCHG_ends_allelic_ColG_1reps.pdf', dpi=350)


In [None]:
methylends_format(endoDSR_mCHH_ends, 'matpatcol', '', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 4.25,0.5, context='CHH', sRNA=False)
plt.savefig(figurepath+'endoDSR_mCHH_ends_allelic_ColG_allreps.pdf', dpi=350)


In [None]:
methylends_format(endoDSR_mCHH_ends, 'col1rep', '', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DSR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 4.25,0.5, context='CHH', sRNA=False)
plt.savefig(figurepath+'endoDSR_mCHH_ends_allelic_ColG_1reps.pdf', dpi=350)


#### allelic plot over bulk DMRs

In [None]:
#### 1rep for main fig

In [None]:
methylends_format(r3_allCr_mCG_ends, 'col1rep', 'r3 endosperm targets', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r3_allCr_mCG_ends_matpat_Col01rep.pdf', dpi=350)


In [None]:
methylends_format(r3_allCr_mCHH_ends, 'col1rep', 'r3 endosperm targets', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 13, 2, context='CHH', sRNA=False)
plt.savefig(figurepath+'r3_allCr_mCHH_ends_matpatcol1rep.pdf', dpi=350)


In [None]:
methylends_format(r3_allCr_mCHG_ends, 'col1rep', 'r3 endosperm targets', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 31, 5, context='CHG', sRNA=False)
plt.savefig(figurepath+'r3_allCr_mCHG_ends_matpatcol1rep.pdf', dpi=350)


In [None]:
# all reps for other figs

In [None]:
methylends_format(r3_allCr_mCG_ends, 'matpatcol', 'r3 endosperm targets', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r3_allCr_mCG_ends_matpat_Col0.pdf', dpi=350)



In [None]:
methylends_format(r3_allCr_mCG_ends, 'matpat1rep', 'r3 endosperm targets', matpatlegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r3_allCr_mCG_ends_matpat1rep.pdf', dpi=350)



In [None]:
methylends_format(r3_allCr_mCHG_ends, 'matpatcol', 'r3 endosperm targets', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 31, 5, context='CHG', sRNA=False)
plt.savefig(figurepath+'r3_allCr_mCHG_ends_matpatcol.pdf', dpi=350)


In [None]:
methylends_format(r3_allCr_mCG_ends, 'matpatc24', 'r3 endosperm targets', matpat_c24legend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r3_allCr_mCG_ends_matpat_C24.pdf', dpi=350)


In [None]:
methylends_format(r3_allCr_mCHG_ends, 'matpatc24', 'r3 endosperm targets', matpat_c24legend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 31, 5, context='CHG', sRNA=False)
plt.savefig(figurepath+'r3_allCr_mCHG_ends_matpatc24.pdf', dpi=350)


In [None]:
methylends_format(r3_allCr_mCHH_ends, 'matpatcol', 'r3 endosperm targets', matpat_collegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 13, 2, context='CHH', sRNA=False)
plt.savefig(figurepath+'r3_allCr_mCHH_ends_matpatcol.pdf', dpi=350)


In [None]:
methylends_format(r3_allCr_mCHH_ends, 'matpatc24', 'r3 endosperm targets', matpat_c24legend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 13, 2, context='CHH', sRNA=False)
plt.savefig(figurepath+'r3_allCr_mCHH_ends_matpatc24.pdf', dpi=350)


In [None]:
methylends_format(r7_allCr_mCG_ends, 'matpat1rep', 'r7 endosperm targets', matpatlegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'r7_allCr_mCG_ends_matpat1rep.pdf', dpi=350)



In [None]:
methylends_format(r7_allCr_mCHG_ends, 'matpat1rep', 'r7 endosperm targets', matpatlegend, in100_out1000_50bp_ticks, in100_out1000_50bp_ticks_DMR_labels,
                  in100_out1000_50bp_vlines, in100_out1000_50bp_vlines_center, 0, 26, 5, context='CHG', sRNA=False)
plt.savefig(figurepath+'r7_allCr_mCHG_ends_matpat1rep.pdf', dpi=350)



#### mat hypo DMRs

In [None]:
methylends_format(dmedom_CG, 'matpatdme1rep', '', matpatdmelegend1rep, in400_out1000_50bp_ticks, in400_out1000_50bp_ticks_labels,
                  in400_out1000_50bp_vlines, in400_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'dmedom_CG_matpatdme_ends_1rep.pdf', dpi=350)



In [None]:
methylends_format(dmedom_CG, 'matpatdme', '', matpatdmelegend, in400_out1000_50bp_ticks, in400_out1000_50bp_ticks_labels,
                  in400_out1000_50bp_vlines, in400_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'dmedom_CG_matpatdme_ends.pdf', dpi=350)



In [None]:
methylends_format(dmedom_CG, 'dme', '', dmelegend, in400_out1000_50bp_ticks, in400_out1000_50bp_ticks_labels,
                  in400_out1000_50bp_vlines, in400_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'dmedom_CG_dme_ends_1rep.pdf', dpi=350)


In [None]:
methylends_format(dmedom_CHG, 'matpatdme', '', matpatdmelegend, in400_out1000_50bp_ticks, in400_out1000_50bp_ticks_labels,
                  in400_out1000_50bp_vlines, in400_out1000_50bp_vlines_center, 0, 41, 5, context='CHG', sRNA=False)
plt.savefig(figurepath+'dmedom_CHG_matpatdme_ends.pdf', dpi=350)


In [None]:
methylends_format(dmedom_CHH, 'matpatdme', '', matpatdmelegend, in400_out1000_50bp_ticks, in400_out1000_50bp_ticks_labels,
                  in400_out1000_50bp_vlines, in400_out1000_50bp_vlines_center, 0, 21, 2, context='CHH', sRNA=False)
plt.savefig(figurepath+'dmedom_CHH_matpatdme_ends.pdf', dpi=350)


In [None]:
methylends_format(ros1ant_CG, 'dme', '', dmelegend, in200_out1000_50bp_ticks, in200_out1000_50bp_ticks_DMR_labels,
                  in200_out1000_50bp_vlines, in200_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'ros1ant_CG_dme_ends_1rep.pdf', dpi=350)


In [None]:
methylends_format(ros1ant_CG, 'matpatdme1rep', '', matpatdmelegend1rep, in200_out1000_50bp_ticks, in200_out1000_50bp_ticks_DMR_labels,
                  in200_out1000_50bp_vlines, in200_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'ros1ant_CG_matpatdme_ends_1rep.pdf', dpi=350)


In [None]:
methylends_format(ros1ant_CG, 'matpatdme', '', matpatdmelegend, in200_out1000_50bp_ticks, in200_out1000_50bp_ticks_DMR_labels,
                  in200_out1000_50bp_vlines, in200_out1000_50bp_vlines_center, 0, 101, 10, context='CG', sRNA=False)
plt.savefig(figurepath+'ros1ant_CG_matpatdme_ends.pdf', dpi=350)


In [None]:
methylends_format(ros1ant_CHG, 'matpatdme', '', matpatdmelegend, in200_out1000_50bp_ticks, in200_out1000_50bp_ticks_DMR_labels,
                  in200_out1000_50bp_vlines, in200_out1000_50bp_vlines_center, 0, 26.5, 2, context='CHG', sRNA=False)
plt.savefig(figurepath+'ros1ant_CHG_matpatdme_ends.pdf', dpi=350)


In [None]:
methylends_format(ros1ant_CHH, 'matpatdme', '', matpatdmelegend, in200_out1000_50bp_ticks, in200_out1000_50bp_ticks_DMR_labels,
                  in200_out1000_50bp_vlines, in200_out1000_50bp_vlines_center, 0, 14.5, 2, context='CHH', sRNA=False)
plt.savefig(figurepath+'ros1ant_CHH_matpatdme_ends.pdf', dpi=350)


# Comparing ROS1-antagonized and DME-dominant regions to published regions of interest

In [None]:
windowcol=['chr','start','end','feature','score','strand','dmr_chr','dmr_start','dmr_end']


genes=pd.read_csv(featurepath+"regions2compare/genes.bed", sep=tab, header=None, names=['chr','start','end','feature','score','strand'])
dmegenes=pd.read_csv(featurepath+"regions2compare/dme_genes_gehring.bed", sep=tab, header=None, names=['feature'])
endohypo=pd.read_csv(featurepath+"regions2compare/Col_endo_vs_embryo_CpG_neg_DMRs.sorted.uniq.bed", sep=tab, header=None, names=['chr','start','end'])
endohyper=pd.read_csv(featurepath+"regions2compare/Col_endo_vs_embryo_CpG_pos_DMRs.sorted.uniq.bed", sep=tab, header=None, names=['chr','start','end'])
dmebed=pd.read_csv(featurepath+"regions2compare/dme_genes_gehring.bed", sep=tab, header=None, names=['chr','start','end','feature'])
drddcgonly=pd.read_csv(featurepath+"regions2compare/drdd_CG_hyperdmrs_CGonly.bed", sep=tab, header=None, names=['chr','start','end','score'])



## genes and tes

In [None]:
#features near DMRs
DMEdom_genes1kb=pd.read_csv(featurepath+"genes_1kb_WT_and_ros1_matVpathypo.bed", sep=tab, header=None, names=windowcol)
DMEdom_TEfrag1kb=pd.read_csv(featurepath+"TE_fragments_1kb_WT_and_ros1_matVpathypo.bed", sep=tab, header=None, names=windowcol)
DMEdom_TEmerge1kb=pd.read_csv(featurepath+"TEmerge_named_1kb_WT_and_ros1_matVpathypo.bed", sep=tab, header=None, names=windowcol)

ROS1ant_genes1kb=pd.read_csv(featurepath+"genes_1kb_ros1_needed_matVpathypo.bed", sep=tab, header=None, names=windowcol)
ROS1ant_TEfrag1kb=pd.read_csv(featurepath+"TE_fragments_1kb_ros1_needed_matVpathypo.bed", sep=tab, header=None, names=windowcol)
ROS1ant_TEmerge1kb=pd.read_csv(featurepath+"TEmerge_named_1kb_ros1_needed_matVpathypo.bed", sep=tab, header=None, names=windowcol)


In [None]:
DMEdom_neither, DMEdom_geneonly, DMEdom_teonly, DMEdom_both=get_feat_count(DMEdom, DMEdom_genes1kb, DMEdom_TEfrag1kb)
ROS1ant_neither, ROS1ant_geneonly, ROS1ant_teonly, ROS1ant_both=get_feat_count(ROS1only, ROS1ant_genes1kb, ROS1ant_TEfrag1kb)

In [None]:
feature_barplot(("DME-dominant targets", "ROS1-ant targets"), {
    "no feature": np.array([len(DMEdom)-(DMEdom_neither+DMEdom_teonly+DMEdom_both), len(ROS1only)-(ROS1ant_geneonly+ROS1ant_teonly+ROS1ant_both)]),
    "gene only": np.array([DMEdom_geneonly, ROS1ant_geneonly]),
    "TE fragment only": np.array([DMEdom_teonly, ROS1ant_teonly]),
    "gene and TE fragment": np.array([DMEdom_both, ROS1ant_both])
}, "number of DMRs", limit=1800)
plt.savefig(figurepath+'mathypo_DMRs_features1kb.pdf', dpi=350)

## DME-genes and imprinted genes

In [None]:
print("Number of DME genes defined in Gehring et al that are within 1kb of a 'DME-dominant' DMR: " +str(len(dmebed.merge(DMEdom_genes1kb, on=['chr','start','end','feature'], how="inner").drop_duplicates('feature'))))
print("out of total: " +str(len(dmegenes)) + " genes")

## number of DMRs that are within 1kb of an imprinted gene

In [None]:
dmelist=pd.DataFrame(DMEdom_genes1kb['feature'].copy())
ros1list=pd.DataFrame(ROS1ant_genes1kb['feature'].copy())

In [None]:
#dmelist.to_csv(featurepath+'TAIR_genes_dmedominant.csv', header=False, index=False)
#ros1list.to_csv(featurepath+'TAIR_genes_ros1_needed.csv', header=False, index=False)
#ros1patlist.to_csv(featurepath+'TAIR_genes_ros1_paternal.csv', header=False, index=False)

In [None]:
impath=dmrpath+"imprinting_analysis/"

In [None]:
AtMEG=pd.read_csv(impath+"thaliana_megs.txt", sep=tab, header=0).fillna("no").rename(columns={"locus_name": "feature"})
AtMEG['gene class']="AtMEG"
AtPEG=pd.read_csv(impath+"thaliana_pegs.txt", sep=tab, header=0).fillna("no").rename(columns={"locus_name": "feature"})
AtPEG['gene class']="AtPEG"
AlMEG=pd.read_csv(impath+"lyrata_megs.txt", sep=tab, header=0).fillna("no").rename(columns={"TAIR10 homologue": "feature"})
AlMEG['gene class']="AlMEG"
AlPEG=pd.read_csv(impath+"lyrata_pegs.txt", sep=tab, header=0).fillna("no").rename(columns={"TAIR10 homologue": "feature"})
AlPEG['gene class']="AlPEG"

In [None]:
# function to get variables that are counts of individual regions of each catagory
def get_imprinted_gene_count(genedmr):
    featuresdrop=genedmr.drop_duplicates(subset=['dmr_chr','dmr_start','dmr_end'])
    #print(len(featuresdrop))
        
    Atmeg_indicate=genedmr.merge(AtMEG, on=['feature'], how="left", indicator=True).drop_duplicates(subset=['dmr_chr','dmr_start','dmr_end'])
    AtMEG_count=Atmeg_indicate['_merge'].value_counts()['both']
    
    Atpeg_indicate=genedmr.merge(AtPEG, on=['feature'], how="left", indicator=True).drop_duplicates(subset=['dmr_chr','dmr_start','dmr_end'])
    AtPEG_count=Atpeg_indicate['_merge'].value_counts()['both']
    
    Almeg_indicate=genedmr.merge(AlMEG, on=['feature'], how="left", indicator=True).drop_duplicates(subset=['dmr_chr','dmr_start','dmr_end'])
    AlMEG_count=Almeg_indicate['_merge'].value_counts()['both']
    
    Alpeg_indicate=genedmr.merge(AlPEG, on=['feature'], how="left", indicator=True).drop_duplicates(subset=['dmr_chr','dmr_start','dmr_end'])
    AlPEG_count=Alpeg_indicate['_merge'].value_counts()['both']
    #return allimprinted                                                                                                     
    not_imprinted=len(featuresdrop)-(AtMEG_count+AtPEG_count+AlMEG_count+AlPEG_count)

    return AtMEG_count, AtPEG_count, AlMEG_count, AlPEG_count, not_imprinted

In [None]:
#stacked barplot of features near regions of interest
def imprinting_barplot(genotypes_list, feat_count_dict, y_label, limit):
    #style
    sns.set_style('white')
    
    genotypes = genotypes_list
    feat_counts = feat_count_dict
    width = 0.6
    
    fig, ax = plt.subplots()
    bottom = np.zeros(2)
    ax.grid(axis='y')
    Colors=["#DEE1E1", "#8EA604", "#CCCCFF", "#DE1389", "#D35269"]
    
    for boolean, feat_count in feat_counts.items():
        if boolean=="not imprinted":
            c=Colors[0]
        if boolean=="AtMEG":
            c=Colors[1]
        if boolean=="AtPEG":
            c=Colors[3]
        if boolean=="AlMEG":
            c=Colors[2]
        if boolean=="AlPEG":
            c=Colors[4]
        p = ax.bar(genotypes, feat_count, width, label=boolean, bottom=bottom, color=c)
        bottom += feat_count
        
    # general layout
    ax.tick_params(axis='both', labelsize=17)
    ax.set_ylabel(y_label, fontsize=20)
    ax.legend(loc='upper right', title="Within 1kb of gene:", fontsize=15, title_fontsize=15)
    ax.set_ylim(0, limit)
    
    plt.tight_layout()


In [None]:
mathypo_list=("DME-dominant", "ROS1-antagonized")

In [None]:
DME_atmeg, DME_atpeg, DME_almeg, DME_alpeg, DME_not=get_imprinted_gene_count(DMEdom_genes1kb)
ROS1_atmeg, ROS1_atpeg, ROS1_almeg, ROS1_alpeg, ROS1_not=get_imprinted_gene_count(ROS1ant_genes1kb)

In [None]:
imprinting_barplot(mathypo_list, {
    "not imprinted": np.array([DME_not, ROS1_not]),
    "AtMEG": np.array([DME_atmeg, ROS1_atmeg]),
    "AtPEG": np.array([DME_atpeg, ROS1_atpeg])
}, y_label="# of gene-associated regions", limit=1020)
plt.savefig(figurepath+'mathypo_DMRs_near_imprintedgene.pdf', dpi=350)

In [None]:
imprinting_barplot(mathypo_list, {
    "AtMEG": np.array([DME_atmeg, ROS1_atmeg]),
    "AtPEG": np.array([DME_atpeg, ROS1_atpeg])
}, y_label="# of imprinted gene\n-associated regions", limit=60)
plt.savefig(figurepath+'mathypo_DMRs_near_imprintedgene_onlyimprintedgenes.pdf', dpi=350)

## ISRs from erdmann et al

In [None]:
CV_matISR=pd.read_csv(featurepath+'regions2compare/Col_Cvi_matISR.bed', sep=tab, header=None, names=bed_columns)
CV_patISR=pd.read_csv(featurepath+'regions2compare/Col_Cvi_patISR.bed', sep=tab, header=None, names=bed_columns)
CL_matISR=pd.read_csv(featurepath+'regions2compare/Col_Ler_matISR.bed', sep=tab, header=None, names=bed_columns)
CL_patISR=pd.read_csv(featurepath+'regions2compare/Col_Ler_patISR.bed', sep=tab, header=None, names=bed_columns)


In [None]:
CV_matISR_ros1ant=pd.read_csv(featurepath+'Col_Cvi_matISR_1kb_ros1_needed_matVpathypo.bed', sep=tab, header=None, names=['chr','start','end','dmr_chr','dmr_start','dmr_end'])
CL_matISR_ros1ant=pd.read_csv(featurepath+'Col_Ler_matISR_1kb_ros1_needed_matVpathypo.bed', sep=tab, header=None, names=['chr','start','end','dmr_chr','dmr_start','dmr_end'])

CV_patISR_ros1ant=pd.read_csv(featurepath+'Col_Cvi_patISR_1kb_ros1_needed_matVpathypo.bed', sep=tab, header=None, names=['chr','start','end','dmr_chr','dmr_start','dmr_end'])
CL_patISR_ros1ant=pd.read_csv(featurepath+'Col_Ler_patISR_1kb_ros1_needed_matVpathypo.bed', sep=tab, header=None, names=['chr','start','end','dmr_chr','dmr_start','dmr_end'])

CV_matISR_dmedom=pd.read_csv(featurepath+'Col_Cvi_matISR_1kb_WT_and_ros1_matVpathypo.bed', sep=tab, header=None, names=['chr','start','end','dmr_chr','dmr_start','dmr_end'])
CL_matISR_dmedom=pd.read_csv(featurepath+'Col_Ler_matISR_1kb_WT_and_ros1_matVpathypo.bed', sep=tab, header=None, names=['chr','start','end','dmr_chr','dmr_start','dmr_end'])

CV_patISR_dmedom=pd.read_csv(featurepath+'Col_Cvi_patISR_1kb_WT_and_ros1_matVpathypo.bed', sep=tab, header=None, names=['chr','start','end','dmr_chr','dmr_start','dmr_end'])
CL_patISR_dmedom=pd.read_csv(featurepath+'Col_Ler_patISR_1kb_WT_and_ros1_matVpathypo.bed', sep=tab, header=None, names=['chr','start','end','dmr_chr','dmr_start','dmr_end'])


In [None]:
ros1ant_near_CVmatISR=len(CV_matISR_ros1ant.drop_duplicates(['dmr_chr','dmr_start','dmr_end']))
ros1ant_near_CLmatISR=len(CL_matISR_ros1ant.drop_duplicates(['dmr_chr','dmr_start','dmr_end']))

ros1ant_near_CVpatISR=len(CV_patISR_ros1ant.drop_duplicates(['dmr_chr','dmr_start','dmr_end']))
ros1ant_near_CLpatISR=len(CL_patISR_ros1ant.drop_duplicates(['dmr_chr','dmr_start','dmr_end']))

dmedom_near_CVmatISR=len(CV_matISR_dmedom.drop_duplicates(['dmr_chr','dmr_start','dmr_end']))
dmedom_near_CLmatISR=len(CL_matISR_dmedom.drop_duplicates(['dmr_chr','dmr_start','dmr_end']))

dmedom_near_CVpatISR=len(CV_patISR_dmedom.drop_duplicates(['dmr_chr','dmr_start','dmr_end']))
dmedom_near_CLpatISR=len(CL_patISR_dmedom.drop_duplicates(['dmr_chr','dmr_start','dmr_end']))


In [None]:
dmedom_fraction_near_CVmatISR=dmedom_near_CVmatISR/len(DMEdom)
dmedom_fraction_near_CVpatISR=dmedom_near_CVpatISR/len(DMEdom)
dmedom_fraction_near_CLmatISR=dmedom_near_CLmatISR/len(DMEdom)
dmedom_fraction_near_CLpatISR=dmedom_near_CLpatISR/len(DMEdom)

ros1ant_fraction_near_CVmatISR=ros1ant_near_CVmatISR/len(ROS1only)
ros1ant_fraction_near_CVpatISR=ros1ant_near_CVpatISR/len(ROS1only)
ros1ant_fraction_near_CLmatISR=ros1ant_near_CLmatISR/len(ROS1only)
ros1ant_fraction_near_CLpatISR=ros1ant_near_CLpatISR/len(ROS1only)


In [None]:
print("fraction ros1-ant near CV mat ISR:"+str(ros1ant_fraction_near_CVmatISR))
print("fraction ros1-ant near CL mat ISR:"+str(ros1ant_fraction_near_CLmatISR))

print("fraction ros1-ant near CV pat ISR:"+str(ros1ant_fraction_near_CVpatISR))
print("fraction ros1-ant near CL pat ISR:"+str(ros1ant_fraction_near_CLpatISR))

print("fraction dme dom near CV mat ISR:"+str(dmedom_fraction_near_CVmatISR))
print("fraction dme dom near CL mat ISR:"+str(dmedom_fraction_near_CLmatISR))

print("fraction dme dom near CV pat ISR:"+str(dmedom_fraction_near_CVpatISR))
print("fraction dme dom near CL pat ISR:"+str(dmedom_fraction_near_CLpatISR))


In [None]:
CV_matISR_fraction_near_ros1ant=len(CV_matISR_ros1ant)/len(CV_matISR)
CL_matISR_fraction_near_ros1ant=len(CL_matISR_ros1ant)/len(CL_matISR)

CV_patISR_fraction_near_ros1ant=len(CV_patISR_ros1ant)/len(CV_patISR)
CL_patISR_fraction_near_ros1ant=len(CL_patISR_ros1ant)/len(CL_patISR)

CV_matISR_fraction_near_dmedom=len(CV_matISR_dmedom)/len(CV_matISR)
CL_matISR_fraction_near_dmedom=len(CL_matISR_dmedom)/len(CL_matISR)

CV_patISR_fraction_near_dmedom=len(CV_patISR_dmedom)/len(CV_patISR)
CL_patISR_fraction_near_dmedom=len(CL_patISR_dmedom)/len(CL_patISR)


In [None]:
print("fraction CV mat ISR near ros1-ant:"+str(CV_matISR_fraction_near_ros1ant))
print("fraction CL mat ISR near ros1-ant:"+str(CL_matISR_fraction_near_ros1ant))

print("fraction CV pat ISR near ros1-ant:"+str(CV_patISR_fraction_near_ros1ant))
print("fraction CL pat ISR near ros1-ant:"+str(CL_patISR_fraction_near_ros1ant))

print("fraction CV mat ISR near dme dom:"+str(CV_matISR_fraction_near_dmedom))
print("fraction CL mat ISR near dme dom:"+str(CL_matISR_fraction_near_dmedom))

print("fraction CV pat ISR near dme dom:"+str(CV_patISR_fraction_near_dmedom))
print("fraction CL pat ISR near dme dom:"+str(CL_patISR_fraction_near_dmedom))


# Sumby ros1 parent of origin data at allelic DMRs

In [None]:
sumpath=path+"sumby_features/2parent_v_1parent_ros1_analysis/"

### functions

In [None]:
def readindata(file):
    sumcolumns=['chr','start','end','name','avg_methy','nC']
    df=pd.read_csv(sumpath+file, header=None, sep=tab, names=sumcolumns)
    df=df.drop(['name'], axis=1)
    return (df)
def makeavgdf(datadf):
    bed=['chr','start','end']
    avgdf = datadf[bed].copy()
    avgdf['C24xr3_pat']=((datadf['C24xr3_1_pat']+datadf['C24xr3_2_pat']+datadf['C24xr3_3_pat'])/3)
    avgdf['r3xC24_pat']=((datadf['r3xC24_1_pat']+datadf['r3xC24_2_pat']+datadf['r3xC24_3_pat'])/3)
    avgdf['r3xC24_mat']=((datadf['r3xC24_1_mat']+datadf['r3xC24_2_mat']+datadf['r3xC24_3_mat'])/3)
    avgdf['C24xr3_mat']=((datadf['C24xr3_1_mat']+datadf['C24xr3_2_mat']+datadf['C24xr3_3_mat'])/3)
    return (avgdf)

poi_columns=['chr','start','end','C24xr3_1_pat','nC_C24xr3_1_pat','C24xr3_2_pat','nC_C24xr3_2_pat','C24xr3_3_pat','nC_C24xr3_3_pat',
             'r3xC24_1_pat','nC_r3xC24_1_pat','r3xC24_2_pat','nC_r3xC24_2_pat','r3xC24_3_pat','nC_r3xC24_3_pat',
             'r3xC24_1_mat','nC_r3xC24_1_mat','r3xC24_2_mat','nC_r3xC24_2_mat','r3xC24_3_mat','nC_r3xC24_3_mat',
             'C24xr3_1_mat','nC_C24xr3_1_mat','C24xr3_2_mat','nC_C24xr3_2_mat','C24xr3_3_mat','nC_C24xr3_2_mat']

In [None]:
import scipy as sp

def get_correlation (df, col1, col2):
    r, p = sp.stats.pearsonr(x=df[col1], y=df[col2])
    return r, p

def scatter_dmrmatpat(df, x, y, xlabel, ylabel, hue='DMR class', xlim=[-0.02,1.02], ylim=[-0.02, 1.02],correlate=False, left_text=True, saveas=False):
    fig, ax = plt.subplots()
    
    if hue==None:
        sns.scatterplot(data=df, x=x, y=y, ax=ax)
    elif hue is not None:
        sns.scatterplot(data=df, x=x, y=y,hue=hue, ax=ax)
        sns.move_legend(ax, 'lower right')
        if hue=="_lefton_TE":
            legend_elements = [Line2D([0], [0], marker='o', color='w', label='Scatter',markerfacecolor='b', markersize=7),
                               Line2D([0], [0], marker='o', color='w', label='Scatter',markerfacecolor='g', markersize=7)]
            ax.legend(handles=legend_elements, labels=['no gene within 1kb','gene within 1kb or intersecting'])
        if hue=="_lefton_gene":
            legend_elements = [Line2D([0], [0], marker='o', color='w', label='Scatter',markerfacecolor='b', markersize=7),
                               Line2D([0], [0], marker='o', color='w', label='Scatter',markerfacecolor='g', markersize=7)]
            ax.legend(handles=legend_elements, labels=['no TE within 1kb','TE within 1kb or intersecting'])
      
        
    if correlate==True:
        r, p = get_correlation(df, x, y)
        print('r='+str(r))
        print('p='+str(p))
        if left_text==True:
            plt.text(.05, .8, "Pearson's r ={:.2f}".format(r), transform=ax.transAxes)
            plt.text(.05, .75, "p value ={:.6f}".format(p), transform=ax.transAxes)
            #plt.text(.05, .7, "n="+str(len(df)), transform=ax.transAxes)
        elif left_text==False:
            plt.text(.65, .4, "Pearson's r ={:.2f}".format(r), transform=ax.transAxes)
            plt.text(.65, .35, "p value ={:.6f}".format(p), transform=ax.transAxes)
            #plt.text(.65, .3, "n="+str(len(df)), transform=ax.transAxes)


        
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ax.set_xlabel(xlabel, fontsize=15)
    ax.set_ylabel(ylabel, fontsize=15)
    ax.tick_params(labelsize=12)
    

    if saveas!=False:
        print('plot saved')
        fig.savefig(saveas, dpi=350)
    else:
        print('plot not saved')
        return fig, ax
        plt.show()
            

### read in data

In [None]:
r3_pat_1_CG_sumby_ros1ant=readindata("C24xr3_1_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_1_CHG_sumby_ros1ant=readindata("C24xr3_1_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_1_CHH_sumby_ros1ant=readindata("C24xr3_1_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

C24_mat_1_CG_sumby_ros1ant=readindata("C24xr3_1_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_1_CHG_sumby_ros1ant=readindata("C24xr3_1_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_1_CHH_sumby_ros1ant=readindata("C24xr3_1_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

r3_pat_2_CG_sumby_ros1ant=readindata("C24xr3_2_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_2_CHG_sumby_ros1ant=readindata("C24xr3_2_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_2_CHH_sumby_ros1ant=readindata("C24xr3_2_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

C24_mat_2_CG_sumby_ros1ant=readindata("C24xr3_2_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_2_CHG_sumby_ros1ant=readindata("C24xr3_2_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_2_CHH_sumby_ros1ant=readindata("C24xr3_2_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

r3_pat_3_CG_sumby_ros1ant=readindata("C24xr3_3_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_3_CHG_sumby_ros1ant=readindata("C24xr3_3_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_3_CHH_sumby_ros1ant=readindata("C24xr3_3_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

C24_mat_3_CG_sumby_ros1ant=readindata("C24xr3_3_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_3_CHG_sumby_ros1ant=readindata("C24xr3_3_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_3_CHH_sumby_ros1ant=readindata("C24xr3_3_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")


r3_mat_1_CG_sumby_ros1ant=readindata("r3xC24_1_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_1_CHG_sumby_ros1ant=readindata("r3xC24_1_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_1_CHH_sumby_ros1ant=readindata("r3xC24_1_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

C24_pat_1_CG_sumby_ros1ant=readindata("r3xC24_1_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_1_CHG_sumby_ros1ant=readindata("r3xC24_1_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_1_CHH_sumby_ros1ant=readindata("r3xC24_1_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

r3_mat_2_CG_sumby_ros1ant=readindata("r3xC24_2_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_2_CHG_sumby_ros1ant=readindata("r3xC24_2_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_2_CHH_sumby_ros1ant=readindata("r3xC24_2_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

C24_pat_2_CG_sumby_ros1ant=readindata("r3xC24_2_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_2_CHG_sumby_ros1ant=readindata("r3xC24_2_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_2_CHH_sumby_ros1ant=readindata("r3xC24_2_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

r3_mat_3_CG_sumby_ros1ant=readindata("r3xC24_3_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_3_CHG_sumby_ros1ant=readindata("r3xC24_3_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_3_CHH_sumby_ros1ant=readindata("r3xC24_3_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

C24_pat_3_CG_sumby_ros1ant=readindata("r3xC24_3_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_3_CHG_sumby_ros1ant=readindata("r3xC24_3_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_3_CHH_sumby_ros1ant=readindata("r3xC24_3_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

r3_pat_1_CG_sumby_dmedom=readindata("C24xr3_1_Col_spiked_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_pat_1_CHG_sumby_dmedom=readindata("C24xr3_1_Col_spiked_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_pat_1_CHH_sumby_dmedom=readindata("C24xr3_1_Col_spiked_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")

C24_mat_1_CG_sumby_dmedom=readindata("C24xr3_1_C24_pseudo_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_mat_1_CHG_sumby_dmedom=readindata("C24xr3_1_C24_pseudo_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_mat_1_CHH_sumby_dmedom=readindata("C24xr3_1_C24_pseudo_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")

r3_pat_2_CG_sumby_dmedom=readindata("C24xr3_2_Col_spiked_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_pat_2_CHG_sumby_dmedom=readindata("C24xr3_2_Col_spiked_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_pat_2_CHH_sumby_dmedom=readindata("C24xr3_2_Col_spiked_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")

C24_mat_2_CG_sumby_dmedom=readindata("C24xr3_2_C24_pseudo_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_mat_2_CHG_sumby_dmedom=readindata("C24xr3_2_C24_pseudo_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_mat_2_CHH_sumby_dmedom=readindata("C24xr3_2_C24_pseudo_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")

r3_pat_3_CG_sumby_dmedom=readindata("C24xr3_3_Col_spiked_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_pat_3_CHG_sumby_dmedom=readindata("C24xr3_3_Col_spiked_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_pat_3_CHH_sumby_dmedom=readindata("C24xr3_3_Col_spiked_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")

C24_mat_3_CG_sumby_dmedom=readindata("C24xr3_3_C24_pseudo_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_mat_3_CHG_sumby_dmedom=readindata("C24xr3_3_C24_pseudo_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_mat_3_CHH_sumby_dmedom=readindata("C24xr3_3_C24_pseudo_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")

r3_mat_1_CG_sumby_dmedom=readindata("r3xC24_1_Col_spiked_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_mat_1_CHG_sumby_dmedom=readindata("r3xC24_1_Col_spiked_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_mat_1_CHH_sumby_dmedom=readindata("r3xC24_1_Col_spiked_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")

C24_pat_1_CG_sumby_dmedom=readindata("r3xC24_1_C24_pseudo_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_pat_1_CHG_sumby_dmedom=readindata("r3xC24_1_C24_pseudo_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_pat_1_CHH_sumby_dmedom=readindata("r3xC24_1_C24_pseudo_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")

r3_mat_2_CG_sumby_dmedom=readindata("r3xC24_2_Col_spiked_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_mat_2_CHG_sumby_dmedom=readindata("r3xC24_2_Col_spiked_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_mat_2_CHH_sumby_dmedom=readindata("r3xC24_2_Col_spiked_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")

C24_pat_2_CG_sumby_dmedom=readindata("r3xC24_2_C24_pseudo_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_pat_2_CHG_sumby_dmedom=readindata("r3xC24_2_C24_pseudo_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_pat_2_CHH_sumby_dmedom=readindata("r3xC24_2_C24_pseudo_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")

r3_mat_3_CG_sumby_dmedom=readindata("r3xC24_3_Col_spiked_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_mat_3_CHG_sumby_dmedom=readindata("r3xC24_3_Col_spiked_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
r3_mat_3_CHH_sumby_dmedom=readindata("r3xC24_3_Col_spiked_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")

C24_pat_3_CG_sumby_dmedom=readindata("r3xC24_3_C24_pseudo_CG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_pat_3_CHG_sumby_dmedom=readindata("r3xC24_3_C24_pseudo_CHG_sumby_WT_and_ros1_matVpat_hypo_allC.bed")
C24_pat_3_CHH_sumby_dmedom=readindata("r3xC24_3_C24_pseudo_CHH_sumby_WT_and_ros1_matVpat_hypo_allC.bed")


### make averaged dfs from 1parent data

In [None]:
def makedatadf(r3_pat1,r3_pat2,r3_pat3,C24_pat1,C24_pat2,C24_pat3,r3_mat1,r3_mat2,r3_mat3,C24_mat1,C24_mat2,C24_mat3):
    out=pd.DataFrame()
    
    r3pat=r3_pat1.merge(r3_pat2, on=['chr','start','end'], how="outer", suffixes=["_1", "_2"]).merge(r3_pat3, how="outer")
    C24pat=C24_pat1.merge(C24_pat2, on=['chr','start','end'], how="outer", suffixes=["_1", "_2"]).merge(C24_pat3, how="outer")
    r3mat=r3_mat1.merge(r3_mat2, on=['chr','start','end'], how="outer", suffixes=["_1", "_2"]).merge(r3_mat3, how="outer")
    C24mat=C24_mat1.merge(C24_mat2, on=['chr','start','end'], how="outer", suffixes=["_1", "_2"]).merge(C24_mat3, how="outer")


    out['chr']=r3pat['chr']
    out['start']=r3pat['start']
    out['end']=r3pat['end']
    out['avg_methy_r3pat_1']=r3pat['avg_methy_1']
    out['nC_r3pat_1']=r3pat['nC_1']
    out['avg_methy_r3pat_2']=r3pat['avg_methy_2']
    out['nC_r3pat_2']=r3pat['nC_2']
    out['avg_methy_r3pat_3']=r3pat['avg_methy']
    out['nC_r3pat_3']=r3pat['nC']
    
    out=out.merge(C24pat, on=['chr','start','end'], how="outer")
    out=out.rename(columns={"avg_methy": "avg_methy_C24pat_3", "nC": "nC_C24pat_3"})

    out=out.merge(r3mat, on=['chr','start','end'], how="outer")
    out=out.rename(columns={"avg_methy": "avg_methy_r3mat_3", "nC": "nC_r3mat_3"})

    out=out.merge(C24mat, on=['chr','start','end'], how="outer")
    out=out.rename(columns={"avg_methy": "avg_methy_C24mat_3", "nC": "nC_C24mat_3"})

    dropNA_data=out.dropna()
    return(dropNA_data)
    #return(out)
    
    
    

In [None]:
r3ant_POI_mCG=makedatadf(r3_pat_1_CG_sumby_ros1ant, r3_pat_2_CG_sumby_ros1ant, r3_pat_3_CG_sumby_ros1ant, 
                          C24_pat_1_CG_sumby_ros1ant, C24_pat_2_CG_sumby_ros1ant, C24_pat_3_CG_sumby_ros1ant,
                          r3_mat_1_CG_sumby_ros1ant, r3_mat_2_CG_sumby_ros1ant, r3_mat_3_CG_sumby_ros1ant, 
                          C24_mat_1_CG_sumby_ros1ant, C24_mat_2_CG_sumby_ros1ant, C24_mat_3_CG_sumby_ros1ant).set_axis(poi_columns, axis='columns')

r3ant_POI_mCG_avg=makeavgdf(r3ant_POI_mCG) 
r3ant_POI_mCG_avg['DMR class']="ROS1-antagonized"

In [None]:
dmedom_POI_mCG=makedatadf(r3_pat_1_CG_sumby_dmedom, r3_pat_2_CG_sumby_dmedom, r3_pat_3_CG_sumby_dmedom, 
                          C24_pat_1_CG_sumby_dmedom, C24_pat_2_CG_sumby_dmedom, C24_pat_3_CG_sumby_dmedom,
                          r3_mat_1_CG_sumby_dmedom, r3_mat_2_CG_sumby_dmedom, r3_mat_3_CG_sumby_dmedom, 
                          C24_mat_1_CG_sumby_dmedom, C24_mat_2_CG_sumby_dmedom, C24_mat_3_CG_sumby_dmedom).set_axis(poi_columns, axis='columns')

dmedom_POI_mCG_avg=makeavgdf(dmedom_POI_mCG) 
dmedom_POI_mCG_avg['DMR class']="DME-dominant"

In [None]:
mathypo_avgmCG=pd.concat([dmedom_POI_mCG_avg,r3ant_POI_mCG_avg])

In [None]:
r3ant_POI_mCHG=makedatadf(r3_pat_1_CHG_sumby_ros1ant, r3_pat_2_CHG_sumby_ros1ant, r3_pat_3_CHG_sumby_ros1ant, 
                          C24_pat_1_CHG_sumby_ros1ant, C24_pat_2_CHG_sumby_ros1ant, C24_pat_3_CHG_sumby_ros1ant,
                          r3_mat_1_CHG_sumby_ros1ant, r3_mat_2_CHG_sumby_ros1ant, r3_mat_3_CHG_sumby_ros1ant, 
                          C24_mat_1_CHG_sumby_ros1ant, C24_mat_2_CHG_sumby_ros1ant, C24_mat_3_CHG_sumby_ros1ant).set_axis(poi_columns, axis='columns')

r3ant_POI_mCHG_avg=makeavgdf(r3ant_POI_mCHG) 
r3ant_POI_mCHG_avg['DMR class']="ROS1-antagonized"

In [None]:
dmedom_POI_mCHG=makedatadf(r3_pat_1_CHG_sumby_dmedom, r3_pat_2_CHG_sumby_dmedom, r3_pat_3_CHG_sumby_dmedom, 
                          C24_pat_1_CHG_sumby_dmedom, C24_pat_2_CHG_sumby_dmedom, C24_pat_3_CHG_sumby_dmedom,
                          r3_mat_1_CHG_sumby_dmedom, r3_mat_2_CHG_sumby_dmedom, r3_mat_3_CHG_sumby_dmedom, 
                          C24_mat_1_CHG_sumby_dmedom, C24_mat_2_CHG_sumby_dmedom, C24_mat_3_CHG_sumby_dmedom).set_axis(poi_columns, axis='columns')

dmedom_POI_mCHG_avg=makeavgdf(dmedom_POI_mCHG) 
dmedom_POI_mCHG_avg['DMR class']="DME-dominant"

In [None]:
mathypo_avgmCHG=pd.concat([dmedom_POI_mCHG_avg,r3ant_POI_mCHG_avg])


In [None]:
r3ant_POI_mCHH=makedatadf(r3_pat_1_CHH_sumby_ros1ant, r3_pat_2_CHH_sumby_ros1ant, r3_pat_3_CHH_sumby_ros1ant, 
                          C24_pat_1_CHH_sumby_ros1ant, C24_pat_2_CHH_sumby_ros1ant, C24_pat_3_CHH_sumby_ros1ant,
                          r3_mat_1_CHH_sumby_ros1ant, r3_mat_2_CHH_sumby_ros1ant, r3_mat_3_CHH_sumby_ros1ant, 
                          C24_mat_1_CHH_sumby_ros1ant, C24_mat_2_CHH_sumby_ros1ant, C24_mat_3_CHH_sumby_ros1ant).set_axis(poi_columns, axis='columns')

r3ant_POI_mCHH_avg=makeavgdf(r3ant_POI_mCHH) 
r3ant_POI_mCHH_avg['DMR class']="ROS1-antagonized"

dmedom_POI_mCHH=makedatadf(r3_pat_1_CHH_sumby_dmedom, r3_pat_2_CHH_sumby_dmedom, r3_pat_3_CHH_sumby_dmedom, 
                          C24_pat_1_CHH_sumby_dmedom, C24_pat_2_CHH_sumby_dmedom, C24_pat_3_CHH_sumby_dmedom,
                          r3_mat_1_CHH_sumby_dmedom, r3_mat_2_CHH_sumby_dmedom, r3_mat_3_CHH_sumby_dmedom, 
                          C24_mat_1_CHH_sumby_dmedom, C24_mat_2_CHH_sumby_dmedom, C24_mat_3_CHH_sumby_dmedom).set_axis(poi_columns, axis='columns')

dmedom_POI_mCHH_avg=makeavgdf(dmedom_POI_mCHH) 
dmedom_POI_mCHH_avg['DMR class']="DME-dominant"

mathypo_avgmCHH=pd.concat([dmedom_POI_mCHH_avg,r3ant_POI_mCHH_avg])

### pull back in data from 2 parent ros1

In [None]:
r3_pat_1_CG_sumby_ros1ant=readindata("r1xr3_1_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_1_CHG_sumby_ros1ant=readindata("r1xr3_1_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_1_CHH_sumby_ros1ant=readindata("r1xr3_1_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

r3_pat_2_CG_sumby_ros1ant=readindata("r1xr3_2_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_2_CHG_sumby_ros1ant=readindata("r1xr3_2_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_2_CHH_sumby_ros1ant=readindata("r1xr3_2_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

r3_pat_3_CG_sumby_ros1ant=readindata("r1xr3_3_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_3_CHG_sumby_ros1ant=readindata("r1xr3_3_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_pat_3_CHH_sumby_ros1ant=readindata("r1xr3_3_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")


C24_mat_1_CG_sumby_ros1ant=readindata("C24xCol_1_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_1_CHG_sumby_ros1ant=readindata("C24xCol_1_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_1_CHH_sumby_ros1ant=readindata("C24xCol_1_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

C24_mat_2_CG_sumby_ros1ant=readindata("C24xCol_2_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_2_CHG_sumby_ros1ant=readindata("C24xCol_2_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_2_CHH_sumby_ros1ant=readindata("C24xCol_2_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

C24_mat_3_CG_sumby_ros1ant=readindata("C24xCol_3_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_3_CHG_sumby_ros1ant=readindata("C24xCol_3_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_mat_3_CHH_sumby_ros1ant=readindata("C24xCol_3_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")


r3_mat_1_CG_sumby_ros1ant=readindata("r3xr1_1_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_1_CHG_sumby_ros1ant=readindata("r3xr1_1_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_1_CHH_sumby_ros1ant=readindata("r3xr1_1_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

r3_mat_2_CG_sumby_ros1ant=readindata("r3xr1_2_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_2_CHG_sumby_ros1ant=readindata("r3xr1_2_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_2_CHH_sumby_ros1ant=readindata("r3xr1_2_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

r3_mat_3_CG_sumby_ros1ant=readindata("r3xr1_3_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_3_CHG_sumby_ros1ant=readindata("r3xr1_3_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_mat_3_CHH_sumby_ros1ant=readindata("r3xr1_3_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")


C24_pat_1_CG_sumby_ros1ant=readindata("ColxC24_1_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_1_CHG_sumby_ros1ant=readindata("ColxC24_1_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_1_CHH_sumby_ros1ant=readindata("ColxC24_1_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

C24_pat_2_CG_sumby_ros1ant=readindata("ColxC24_2_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_2_CHG_sumby_ros1ant=readindata("ColxC24_2_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_2_CHH_sumby_ros1ant=readindata("ColxC24_2_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

C24_pat_3_CG_sumby_ros1ant=readindata("ColxC24_3_C24_pseudo_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_3_CHG_sumby_ros1ant=readindata("ColxC24_3_C24_pseudo_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
C24_pat_3_CHH_sumby_ros1ant=readindata("ColxC24_3_C24_pseudo_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")



Col_mat_1_CG_sumby_ros1ant=readindata("ColxC24_1_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_mat_1_CHG_sumby_ros1ant=readindata("ColxC24_1_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_mat_1_CHH_sumby_ros1ant=readindata("ColxC24_1_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

Col_mat_2_CG_sumby_ros1ant=readindata("ColxC24_2_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_mat_2_CHG_sumby_ros1ant=readindata("ColxC24_2_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_mat_2_CHH_sumby_ros1ant=readindata("ColxC24_2_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

Col_mat_3_CG_sumby_ros1ant=readindata("ColxC24_3_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_mat_3_CHG_sumby_ros1ant=readindata("ColxC24_3_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_mat_3_CHH_sumby_ros1ant=readindata("ColxC24_3_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")


Col_pat_1_CG_sumby_ros1ant=readindata("C24xCol_1_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_pat_1_CHG_sumby_ros1ant=readindata("C24xCol_1_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_pat_1_CHH_sumby_ros1ant=readindata("C24xCol_1_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

Col_pat_2_CG_sumby_ros1ant=readindata("C24xCol_2_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_pat_2_CHG_sumby_ros1ant=readindata("C24xCol_2_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_pat_2_CHH_sumby_ros1ant=readindata("C24xCol_2_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

Col_pat_3_CG_sumby_ros1ant=readindata("C24xCol_3_Col_spiked_CG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_pat_3_CHG_sumby_ros1ant=readindata("C24xCol_3_Col_spiked_CHG_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
Col_pat_3_CHH_sumby_ros1ant=readindata("C24xCol_3_Col_spiked_CHH_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")


In [None]:
r3ant_POI_mCG=makedatadf(r3_pat_1_CG_sumby_ros1ant, r3_pat_2_CG_sumby_ros1ant, r3_pat_3_CG_sumby_ros1ant, 
                          C24_pat_1_CG_sumby_ros1ant, C24_pat_2_CG_sumby_ros1ant, C24_pat_3_CG_sumby_ros1ant,
                          r3_mat_1_CG_sumby_ros1ant, r3_mat_2_CG_sumby_ros1ant, r3_mat_3_CG_sumby_ros1ant, 
                          C24_mat_1_CG_sumby_ros1ant, C24_mat_2_CG_sumby_ros1ant, C24_mat_3_CG_sumby_ros1ant).set_axis(poi_columns, axis='columns')

r3ant_POI_mCG_avg=makeavgdf(r3ant_POI_mCG).set_axis(['chr','start','end','r3_pat','C24_pat','r3_mat','C24_mat'], axis="columns")

In [None]:
CG_f1_data_merge=r3ant_POI_mCG_avg.merge(mathypo_avgmCG.drop(labels=['DMR class'], axis=1), on= ['chr','start','end'])

In [None]:
r3ant_POI_mCHG=makedatadf(r3_pat_1_CHG_sumby_ros1ant, r3_pat_2_CHG_sumby_ros1ant, r3_pat_3_CHG_sumby_ros1ant, 
                          C24_pat_1_CHG_sumby_ros1ant, C24_pat_2_CHG_sumby_ros1ant, C24_pat_3_CHG_sumby_ros1ant,
                          r3_mat_1_CHG_sumby_ros1ant, r3_mat_2_CHG_sumby_ros1ant, r3_mat_3_CHG_sumby_ros1ant, 
                          C24_mat_1_CHG_sumby_ros1ant, C24_mat_2_CHG_sumby_ros1ant, C24_mat_3_CHG_sumby_ros1ant).set_axis(poi_columns, axis='columns')

r3ant_POI_mCHG_avg=makeavgdf(r3ant_POI_mCHG).set_axis(['chr','start','end','r3_pat','C24_pat','r3_mat','C24_mat'], axis="columns")

In [None]:
CHG_f1_data_merge=r3ant_POI_mCHG_avg.merge(mathypo_avgmCHG.drop(labels=['DMR class'], axis=1), on= ['chr','start','end'])

In [None]:
r3ant_POI_mCHH=makedatadf(r3_pat_1_CHH_sumby_ros1ant, r3_pat_2_CHH_sumby_ros1ant, r3_pat_3_CHH_sumby_ros1ant, 
                          C24_pat_1_CHH_sumby_ros1ant, C24_pat_2_CHH_sumby_ros1ant, C24_pat_3_CHH_sumby_ros1ant,
                          r3_mat_1_CHH_sumby_ros1ant, r3_mat_2_CHH_sumby_ros1ant, r3_mat_3_CHH_sumby_ros1ant, 
                          C24_mat_1_CHH_sumby_ros1ant, C24_mat_2_CHH_sumby_ros1ant, C24_mat_3_CHH_sumby_ros1ant).set_axis(poi_columns, axis='columns')

r3ant_POI_mCHH_avg=makeavgdf(r3ant_POI_mCHH).set_axis(['chr','start','end','r3_pat','C24_pat','r3_mat','C24_mat'], axis="columns")

In [None]:
CHH_f1_data_merge=r3ant_POI_mCHH_avg.merge(mathypo_avgmCHH.drop(labels=['DMR class'], axis=1), on= ['chr','start','end'])

### scatterplot figures

In [None]:
scatter_dmrmatpat(CG_f1_data_merge, x="C24xr3_mat", y="C24_mat", xlabel="C24xr3 maternal allele mCG", ylabel="C24xCol maternal allele mCG", hue=None,correlate=True, left_text=False, saveas=figurepath+"C24mat_F1s_mCG_in_ros1ant_scatter.png")


In [None]:
scatter_dmrmatpat(CG_f1_data_merge, x="C24xr3_mat", y="C24_mat", xlabel="C24xr3 maternal allele mCG", ylabel="C24xCol maternal allele mCG", hue=None,correlate=True, left_text=False, saveas=figurepath+"C24mat_F1s_mCG_in_ros1ant_scatter.png")


In [None]:
scatter_dmrmatpat(CG_f1_data_merge, x="r3xC24_pat", y="C24_pat", xlabel="r3xC24 paternal allele mCG", ylabel="ColxC24 paternal allele mCG", hue=None,correlate=True, left_text=False, saveas=figurepath+"C24pat_F1s_mCG_in_ros1ant_scatter.png")


In [None]:
scatter_dmrmatpat(CG_f1_data_merge, x="r3xC24_mat", y="r3_mat", xlabel="r3xC24 maternal allele mCG", ylabel="r3xr1 maternal allele mCG", hue=None,correlate=True, left_text=False, saveas=figurepath+"r3mat_F1s_mCG_in_ros1ant_scatter.png")


In [None]:
scatter_dmrmatpat(CG_f1_data_merge, x="C24xr3_pat", y="r3_pat", xlabel="C24xr3 paternal allele mCG", ylabel="r1xr3 paternal allele mCG", hue=None,correlate=True, left_text=False, saveas=figurepath+"r3pat_F1s_mCG_in_ros1ant_scatter.png")


In [None]:
#Filter regions of interest for browsing
f1_data_merge_1pt5diffpat=f1_data_merge[(f1_data_merge["r3_pat"]/f1_data_merge["C24xr3_pat"])>1.5]

f1_data_merge_1pt25diffpat=f1_data_merge[(f1_data_merge["r3_pat"]/f1_data_merge["C24xr3_pat"])>1.25]

make_bed(f1_data_merge_1pt5diffpat, figurepath+"f1_r1r3pat_greaterthan_C24r3pat_1pt5diff.bed")
make_bed(f1_data_merge_1pt25diffpat, figurepath+"f1_r1r3pat_greaterthan_C24r3pat_1pt25diff.bed")


In [None]:
scatter_dmrmatpat(CHG_f1_data_merge, x="C24xr3_mat", y="C24_mat", xlabel="C24xr3 mat avg mCHG", ylabel="C24xCol mat avg mCHG", hue=None,correlate=True, left_text=False, saveas=figurepath+"C24mat_F1s_mCHG_in_ros1ant_scatter.png")


In [None]:
scatter_dmrmatpat(CHG_f1_data_merge, x="r3xC24_pat", y="C24_pat", xlabel="r3xC24 pat avg mCHG", ylabel="ColxC24 pat avg mCHG", hue=None,correlate=True, left_text=False, saveas=figurepath+"C24pat_F1s_mCHG_in_ros1ant_scatter.png")


In [None]:
scatter_dmrmatpat(CHG_f1_data_merge, x="r3xC24_mat", y="r3_mat", xlabel="r3xC24 mat avg mCHG", ylabel="r3xr1 mat avg mCHG", hue=None,correlate=True, left_text=False, saveas=figurepath+"r3mat_F1s_mCHG_in_ros1ant_scatter.png")


In [None]:
scatter_dmrmatpat(CHG_f1_data_merge, x="C24xr3_pat", y="r3_pat", xlabel="C24xr3 pat avg mCHG", ylabel="r1xr3 pat avg mCHG", hue=None,correlate=True, left_text=False, saveas=figurepath+"r3pat_F1s_mCHG_in_ros1ant_scatter.png")


In [None]:
scatter_dmrmatpat(CHH_f1_data_merge, x="C24xr3_mat", y="C24_mat", xlabel="C24xr3 mat avg mCHH", ylabel="C24xCol mat avg mCHH", hue=None,correlate=True, left_text=False, saveas=figurepath+"C24mat_F1s_mCHH_in_ros1ant_scatter.png")


In [None]:
scatter_dmrmatpat(CHH_f1_data_merge, x="r3xC24_pat", y="C24_pat", xlabel="r3xC24 pat avg mCHH", ylabel="ColxC24 pat avg mCHH", hue=None,correlate=True, left_text=False, saveas=figurepath+"C24pat_F1s_mCHH_in_ros1ant_scatter.png")


In [None]:
scatter_dmrmatpat(CHH_f1_data_merge, x="r3xC24_mat", y="r3_mat", xlabel="r3xC24 mat avg mCHH", ylabel="r3xr1 mat avg mCHH", hue=None,correlate=True, left_text=False, saveas=figurepath+"r3mat_F1s_mCHH_in_ros1ant_scatter.png")


In [None]:
scatter_dmrmatpat(CHH_f1_data_merge, x="C24xr3_pat", y="r3_pat", xlabel="C24xr3 pat avg mCHH", ylabel="r1xr3 pat avg mCHH", hue=None,correlate=True, left_text=False, saveas=figurepath+"r3pat_F1s_mCHH_in_ros1ant_scatter.png")


## summed mat-hypo DMRs

In [None]:
colG_columns=['chr','start','end','r1xr3_1_pat','nC_r1xr3_1_pat','r1xr3_2_pat','nC_r1xr3_2_pat','r1xr3_3_pat','nC_r1xr3_3_pat',
             'C24xCol_1_pat','nC_C24xCol_1_pat','C24xCol_2_pat','nC_C24xCol_2_pat','C24xCol_3_pat','nC_C24xCol_3_pat',
             'ColxC24_1_mat','nC_ColxC24_1_mat','ColxC24_2_mat','nC_ColxC24_2_mat','ColxC24_3_mat','nC_ColxC24_3_mat',
             'r3xr1_1_mat','nC_r3xr1_1_mat','r3xr1_2_mat','nC_r3xr1_2_mat','r3xr1_3_mat','nC_r3xr1_2_mat']

In [None]:
r3ant_colG_mCG=makedatadf(r3_pat_1_CG_sumby_ros1ant, r3_pat_2_CG_sumby_ros1ant, r3_pat_3_CG_sumby_ros1ant, 
                          Col_pat_1_CG_sumby_ros1ant, Col_pat_2_CG_sumby_ros1ant, Col_pat_3_CG_sumby_ros1ant,
                          r3_mat_1_CG_sumby_ros1ant, r3_mat_2_CG_sumby_ros1ant, r3_mat_3_CG_sumby_ros1ant, 
                          Col_mat_1_CG_sumby_ros1ant, Col_mat_2_CG_sumby_ros1ant, Col_mat_3_CG_sumby_ros1ant).set_axis(colG_columns, axis='columns')

r3ant_colG_mCHG=makedatadf(r3_pat_1_CHG_sumby_ros1ant, r3_pat_2_CHG_sumby_ros1ant, r3_pat_3_CHG_sumby_ros1ant, 
                          Col_pat_1_CHG_sumby_ros1ant, Col_pat_2_CHG_sumby_ros1ant, Col_pat_3_CHG_sumby_ros1ant,
                          r3_mat_1_CHG_sumby_ros1ant, r3_mat_2_CHG_sumby_ros1ant, r3_mat_3_CHG_sumby_ros1ant, 
                          Col_mat_1_CHG_sumby_ros1ant, Col_mat_2_CHG_sumby_ros1ant, Col_mat_3_CHG_sumby_ros1ant).set_axis(colG_columns, axis='columns')

r3ant_colG_mCHH=makedatadf(r3_pat_1_CHH_sumby_ros1ant, r3_pat_2_CHH_sumby_ros1ant, r3_pat_3_CHH_sumby_ros1ant, 
                          Col_pat_1_CHH_sumby_ros1ant, Col_pat_2_CHH_sumby_ros1ant, Col_pat_3_CHH_sumby_ros1ant,
                          r3_mat_1_CHH_sumby_ros1ant, r3_mat_2_CHH_sumby_ros1ant, r3_mat_3_CHH_sumby_ros1ant, 
                          Col_mat_1_CHH_sumby_ros1ant, Col_mat_2_CHH_sumby_ros1ant, Col_mat_3_CHH_sumby_ros1ant).set_axis(colG_columns, axis='columns')

In [None]:
sumpath=path+"sumby_features/sperm_sumby_mathypo_dmrs/"

In [None]:
r3_sc_1_CG_sumby_ros1ant=readindata("ros1_sc_1_all_CpG_min5_CGcon_pass_fixed_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_sc_1_CHG_sumby_ros1ant=readindata("ros1_sc_1_all_CHG_min5_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_sc_1_CHH_sumby_ros1ant=readindata("ros1_sc_1_all_CHH_min5_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

r3_sc_2_CG_sumby_ros1ant=readindata("ros1_sc_2_all_CpG_min5_CGcon_pass_fixed_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_sc_2_CHG_sumby_ros1ant=readindata("ros1_sc_2_all_CHG_min5_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
r3_sc_2_CHH_sumby_ros1ant=readindata("ros1_sc_2_all_CHH_min5_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

wt_sc_1_CG_sumby_ros1ant=readindata("wt_sc_1_all_CpG_min5_CGcon_pass_fixed_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
wt_sc_1_CHG_sumby_ros1ant=readindata("wt_sc_1_all_CHG_min5_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
wt_sc_1_CHH_sumby_ros1ant=readindata("wt_sc_1_all_CHH_min5_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")

wt_sc_2_CG_sumby_ros1ant=readindata("wt_sc_2_all_CpG_min5_CGcon_pass_fixed_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
wt_sc_2_CHG_sumby_ros1ant=readindata("wt_sc_2_all_CHG_min5_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")
wt_sc_2_CHH_sumby_ros1ant=readindata("wt_sc_2_all_CHH_min5_sumby_ros1_only_NEITHERcheck_matVpat_hypo_allC.bed")


In [None]:
def makedatadf_sperm(r3_sc1,r3_sc2,wt_sc1,wt_sc2):
    out=pd.DataFrame()
    
    r3sc=r3_sc1.merge(r3_sc2, on=['chr','start','end'], how="outer", suffixes=["_1", "_2"])
    wtsc=wt_sc1.merge(wt_sc2, on=['chr','start','end'], how="outer", suffixes=["_1", "_2"])
    
    out['chr']=r3sc['chr']
    out['start']=r3sc['start']
    out['end']=r3sc['end']
    out['avg_methy_r3sc_1']=r3sc['avg_methy_1']
    out['nC_r3sc_1']=r3sc['nC_1']
    out['avg_methy_r3sc_2']=r3sc['avg_methy_2']
    out['nC_r3sc_2']=r3sc['nC_2']

    out=out.merge(wtsc, on=['chr','start','end'], how="outer")
    out=out.rename(columns={"avg_methy_1": "avg_methy_wtsc_1", "nC_1": "nC_wtsc_1", "avg_methy_2": "avg_methy_wtsc_2", "nC_2": "nC_wtsc_2"})

    dropNA_data=out.dropna()
    return(dropNA_data)
    #return(out)
    
def makeavgdf_sperm(datadf):
    bed=['chr','start','end']
    avgdf = datadf[bed].copy()
    avgdf['r3_sc']=((datadf['avg_methy_r3sc_1']+datadf['avg_methy_r3sc_2'])/2)
    avgdf['r3_sc_totalnC']=(datadf['nC_r3sc_1']+datadf['nC_r3sc_2'])
    avgdf['wt_sc']=((datadf['avg_methy_wtsc_1']+datadf['avg_methy_wtsc_2'])/2)
    avgdf['wt_sc_totalnC']=(datadf['nC_wtsc_1']+datadf['nC_wtsc_2'])
    return (avgdf)
   

In [None]:
sperm_CG_ros1_ant=makedatadf_sperm(r3_sc_1_CG_sumby_ros1ant,r3_sc_2_CG_sumby_ros1ant,wt_sc_1_CG_sumby_ros1ant,wt_sc_2_CG_sumby_ros1ant)

sperm_ColG_CG_ros1_ant=sperm_CG_ros1_ant.merge(r3ant_colG_mCG, on=['chr','start','end'], how="left")

In [None]:
sperm_CHG_ros1_ant=makedatadf_sperm(r3_sc_1_CHG_sumby_ros1ant,r3_sc_2_CHG_sumby_ros1ant,wt_sc_1_CHG_sumby_ros1ant,wt_sc_2_CHG_sumby_ros1ant)

sperm_ColG_CHG_ros1_ant=sperm_CHG_ros1_ant.merge(r3ant_colG_mCHG, on=['chr','start','end'], how="left")

In [None]:
sperm_CHH_ros1_ant=makedatadf_sperm(r3_sc_1_CHH_sumby_ros1ant,r3_sc_2_CHH_sumby_ros1ant,wt_sc_1_CHH_sumby_ros1ant,wt_sc_2_CHH_sumby_ros1ant)

sperm_ColG_CHH_ros1_ant=sperm_CHH_ros1_ant.merge(r3ant_colG_mCHH, on=['chr','start','end'], how="left")

In [None]:
sperm_ColG_ros1_ant_dict={"Col mat 1":'ColxC24_1_mat',"Col mat 2":'ColxC24_2_mat',"Col mat 3":'ColxC24_3_mat',
"r3 mat 1":'r3xr1_1_mat',"r3 mat 2":'r3xr1_2_mat',"r3 mat 3":'r3xr1_3_mat',
"Col pat 1":'C24xCol_1_pat',"Col pat 2":'C24xCol_2_pat',"Col pat 3": 'C24xCol_3_pat',
"r3 pat 1":'r1xr3_1_pat',"r3 pat 2":'r1xr3_2_pat',"r3 pat 3":'r1xr3_3_pat',
'Col sperm 1':'avg_methy_wtsc_1','Col sperm 2':'avg_methy_wtsc_2','r3 sperm 1':'avg_methy_r3sc_1','r3 sperm 2':'avg_methy_r3sc_2' }

sperm_ColG_ros1_ant_1rep_dict={"Col mat 1":'ColxC24_1_mat',"r3 mat 1":'r3xr1_1_mat',"Col pat 1":'C24xCol_1_pat',"r3 pat 1":'r1xr3_1_pat','Col sperm 1':'avg_methy_wtsc_1','r3 sperm 1':'avg_methy_r3sc_1'}

In [None]:
fig,ax=sumby_boxplot(sperm_ColG_CG_ros1_ant,sperm_ColG_ros1_ant_dict,"CG")
ax.tick_params(axis='x', labelsize=15, labelrotation=35)
fig.set_size_inches(10,6)
plt.tight_layout()
plt.savefig(figurepath+'ros1_ant_sumbymCG_sperm_ColG_allreps.pdf', dpi=350)


In [None]:
fig,ax=sumby_boxplot(sperm_ColG_CG_ros1_ant,sperm_ColG_ros1_ant_1rep_dict,"CG")
ax.tick_params(axis='x', labelsize=15, labelrotation=35)
plt.savefig(figurepath+'ros1_ant_sumbymCG_sperm_ColG_1rep.pdf', dpi=350)


In [None]:
fig,ax=sumby_boxplot(sperm_ColG_CHG_ros1_ant,sperm_ColG_ros1_ant_dict,"CHG")
ax.tick_params(axis='x', labelsize=15, labelrotation=35)
fig.set_size_inches(10,6)
plt.tight_layout()
plt.savefig(figurepath+'ros1_ant_sumbymCHG_sperm_ColG_allreps.pdf', dpi=350)


In [None]:
fig,ax=sumby_boxplot(sperm_ColG_CHH_ros1_ant,sperm_ColG_ros1_ant_dict,"CHH")
ax.tick_params(axis='x', labelsize=15, labelrotation=35)
fig.set_size_inches(10,6)
plt.tight_layout()
plt.savefig(figurepath+'ros1_ant_sumbymCHH_sperm_ColG_allreps.pdf', dpi=350)


# Demethylase expression in Picard dataset




In [None]:
sc="/lab/solexa_gehring/elizabeth/ros1_endo_code_and_underlying_data/expression_in_scdata/"
gene_exp_picard=pd.read_csv(sc+"GSE157145_CPM_total_expression.txt",
            sep=tab, header=0, index_col="locus_name")
clusters=pd.read_csv(sc+"nuclei_code.txt", sep=tab, header=0)

In [None]:
endoclusters=clusters[clusters['tissu']=="endo"]
seedcoatclusters=clusters[clusters['tissu']=="seedcoat"]

endodict=endoclusters.set_index('nuclei')['tissu'].to_dict()
seedcoatdict=seedcoatclusters.set_index('nuclei')['tissu'].to_dict()

In [None]:
endo_data= gene_exp_picard[list(endodict.keys())]
seedcoat_data= gene_exp_picard[list(seedcoatdict.keys())]


In [None]:
demethylases=['AT2G36490', 'AT3G10010', 'AT4G34060', 'AT5G04560']

In [None]:
endo_sums=pd.DataFrame(endo_data.loc[demethylases].sum(axis=1), columns=['endo_sum'])

In [None]:
seedcoat_sums=pd.DataFrame(seedcoat_data.loc[demethylases].sum(axis=1), columns=['seedcoat_sum'])

In [None]:
seed_sums=endo_sums.join(seedcoat_sums)

In [None]:
seed_sums
## plotting in prism