In [None]:
# import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import mannwhitneyu

In [None]:
# setting
plt.rcParams['font.family']= 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial']
plt.rcParams['font.size'] = 18
plt.rcParams["figure.dpi"] = 200

In [None]:
# Supplementary Fig. 3a
file_path = "../data/ChIP-seq/ChIP_H3K4me2_H2A.Z_H2Aub_H3K27me3_WT_atx345.rpkm.tsv"
df_chip = pd.read_csv(file_path, sep="\t", index_col=0)
atx345_target = pd.read_csv("../data/list_of_target_genes/ATX345-target(n=5842).bed",sep="\t",header=None).iloc[:,3]

In [None]:
hm_list = ["H3K4me2","H2A.Z","H2Aub","H3K27me3"]
n = len(hm_list)

mask = df_chip.index.isin(atx345_target)

fig, ax = plt.subplots(1,n,figsize=(5*n,5))

for i in range(n):
    hm = hm_list[i]

    # plot
    # non ldl3-target genes
    sns.scatterplot(data=df_chip[~mask],
                    x=f"{hm}_WT_rep2",
                    y=f"{hm}_atx3/4/5_rep2",
                    s=2,alpha=0.5,ax=ax[i],color="tab:grey")
    # ldl3 target genes
    sns.scatterplot(data=df_chip[mask],
                    x=f"{hm}_WT_rep2",
                    y=f"{hm}_atx3/4/5_rep2",
                    s=2,alpha=0.5,ax=ax[i],color="blue")

    # format figure
    x = df_chip[f"{hm}_WT_rep2"]
    ax[i].set_xlim(0,np.percentile(x,99))
    ax[i].set_ylim(0,np.percentile(x,99))
    #ax[i].set_title(hm)
    ax[i].set_xlabel("")
    ax[i].set_ylabel("")
    ax[i].set_aspect('equal', adjustable='box')

In [None]:
# Supplementary Fig. 3c

df_tmp = df_chip.loc[atx345_target]

plt.figure(figsize=(6,6))
plt.rcParams['font.size'] = 15

hm_list = ["H2A.Z", "H2Aub", "H3K27me3"]
color_list = ["#3B88BD", "#66A2CB", "#A3C7DF", "#EC9FA0"] 

total_genes = len(df_tmp)

for k in range(3):
    hm = hm_list[k]
    for i in range(2):
        # classify genes into four group
        a = (df_tmp[f"{hm}_atx3/4/5_rep{i+1}"] - df_tmp[f"{hm}_WT_rep{i+1}"] < -5).sum()
        b = (df_tmp[f"{hm}_atx3/4/5_rep{i+1}"] - df_tmp[f"{hm}_WT_rep{i+1}"] < -2.5).sum()
        c = (df_tmp[f"{hm}_atx3/4/5_rep{i+1}"] - df_tmp[f"{hm}_WT_rep{i+1}"] < 0).sum()
        d = total_genes
        
        heights = [a, b - a, c - b, d - c]

        bottom = 0
        for j in range(4):
            h = heights[j]
            if h > 0:
                # plot
                plt.bar(2*k + i, h, bottom=bottom, color=color_list[j])

                # show ratio of genes
                percent = h / total_genes * 100
                plt.text(2*k + i, bottom + h/2,
                         f"{percent:.1f}%",
                         ha='center', va='center',
                         fontsize=12,
                         color="white" if j<2 else "black")
                bottom += h

# format figure
plt.xticks(np.arange(6), ["rep1", "rep2"] * 3)
plt.show()

In [None]:
# Supplementary Fig. 3e
file_path = "../previous_data/ChIP_H3K4me123_WT_atx345.rpkm.tsv"
df_chip = pd.read_csv(file_path,sep="\t",index_col=0)

In [None]:
# data
x = df_chip["H3K4me2_atx3-1/4/5"] - df_chip["H3K4me2_WT"]
y = df_chip["H3K4me3_atx3-1/4/5"] - df_chip["H3K4me3_WT"]

# classification
mask1 = (x<-5)&(y<-5) # Group A
mask2 = (x>=-5)&(y<-5) # Group B
mask3 = (x<-5)&(y>=-5) # Group C
mask4 = (x>=-5)&(y>=-5) # Group D
masks = [mask1,mask2,mask3,mask4]

# plot
color_list = ["tab:red","tab:orange","tab:blue","tab:grey"]
plt.figure(figsize=(5,5),dpi=200)
for i in range(4):
    mask = masks[i]
    color = color_list[i]
    print(f"Group {chr(65+i)} : {mask.sum()}")
    plt.scatter(x[mask],y[mask],s=1,alpha=0.5,c=color)

# format figure
plt.vlines(-5,-30,30,color="k")
plt.hlines(-5,-60,60,color="k")
plt.xlim(-60,60)
plt.ylim(-30,30)
plt.xlabel("ΔH3K4me2")
plt.ylabel("ΔH3K4me3")

In [None]:
# Supplementary Fig. 3f
file_path = "../data/ChIP-seq/ChIP_H3K4me2_H2A.Z_H2Aub_H3K27me3_WT_atx345.rpkm.tsv"
df_chip = pd.read_csv(file_path, sep="\t", index_col=0)

In [None]:
hm_list = ["H2A.Z","H2Aub","H3K27me3"]
y_list = [30,30,30]

fig, ax = plt.subplots(1,3,figsize=(12,4.5))
fig.subplots_adjust(wspace=0.5)

for i in range(3):
    hm = hm_list[i]
    colors = ["tab:red","tab:orange","tab:blue","tab:grey"]
    df_tmp = df_chip[f"{hm}_atx3/4/5_rep1"] - df_chip[f"{hm}_WT_rep1"]
    
    data = {"a":df_tmp[mask1],
            "b":df_tmp[mask2],
            "c":df_tmp[mask3],
            "d":df_tmp[mask4]}

    # plot
    sns.boxplot(data,ax=ax[i],palette=colors,fill=False,zorder=1)

    # format figure
    ax[i].hlines(0,-0.5,3.5,color="k",ls="--",zorder=2)
    ax[i].set_xlim(-0.5,3.5)
    ax[i].set_ylim(-y_list[i],y_list[i])
    ax[i].spines['right'].set_visible(False)
    ax[i].spines['top'].set_visible(False)
    ax[i].set_xticks(np.arange(4))
    ax[i].set_xticklabels(["A","B","C","D"])
    ax[i].set_title(hm)

    # Bonferroni-corrected p-values from two-sided Mann-Whitney U tests
    p1 = mannwhitneyu(data["a"], data["d"], alternative="two-sided")[1]*3
    p2 = mannwhitneyu(data["b"], data["d"], alternative="two-sided")[1]*3
    p3 = mannwhitneyu(data["c"], data["d"], alternative="two-sided")[1]*3

    print(hm)
    print(f"A-D : {p1:.2e}")
    print(f"C-D : {p2:.2e}")
    print(f"B-D : {p3:.2e}")