In [None]:
# import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
from scipy.stats import mannwhitneyu
from matplotlib.gridspec import GridSpec
import matplotlib.cm as cm

In [None]:
# setting
plt.rcParams['font.family']= 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial']
plt.rcParams['font.size'] = 18
plt.rcParams["figure.dpi"] = 200

In [None]:
# Fig. 2b
file_path = "../data/ChIP-seq/ChIP_H3K4me2_H2A.Z_H2Aub_H3K27me3_WT_atx345.rpkm.tsv"
df_chip = pd.read_csv(file_path, sep="\t", index_col=0)
atx345_target = pd.read_csv("../data/list_of_target_genes/ATX345-target(n=5842).bed",sep="\t",header=None).iloc[:,3]

In [None]:
hm_list = ["H3K4me2","H2A.Z","H2Aub","H3K27me3"]
n = len(hm_list)

mask = df_chip.index.isin(atx345_target)

fig, ax = plt.subplots(1,n,figsize=(5*n,5))

for i in range(n):
    hm = hm_list[i]

    # plot
    # non atx345-target genes
    sns.scatterplot(data=df_chip[~mask],
                    x=f"{hm}_WT_rep1",
                    y=f"{hm}_atx3/4/5_rep1",
                    s=2,alpha=0.5,ax=ax[i],color="tab:grey")
    # atx345 target genes
    sns.scatterplot(data=df_chip[mask],
                    x=f"{hm}_WT_rep1",
                    y=f"{hm}_atx3/4/5_rep1",
                    s=2,alpha=0.5,ax=ax[i],color="blue")

    # format figure
    x = df_chip[f"{hm}_WT_rep1"]
    ax[i].set_xlim(0,np.percentile(x,99))
    ax[i].set_ylim(0,np.percentile(x,99))
    #ax[i].set_title(hm)
    ax[i].set_xlabel("")
    ax[i].set_ylabel("")
    ax[i].set_aspect('equal', adjustable='box')

In [None]:
# Fig. 2c
random.seed(1)

mask1 = df_chip.index.isin(atx345_target)
N = mask1.sum()
mask2 = df_chip.index.isin(random.sample(list(df_chip.index[~mask1]),N))

df_chip1 = df_chip[mask1]
df_chip2 = df_chip[mask2]

hm_list = ["H3K4me2","H2A.Z","H2Aub","H3K27me3"]
n = len(hm_list)
y_list = [50,20,20,30]

fig, ax = plt.subplots(1,n,figsize=(5*n,5))

for i in range(n):
    hm = hm_list[i]
    data = {"a":df_chip2[f"{hm}_atx3/4/5_rep1"]-df_chip2[f"{hm}_WT_rep1"],
            "b":df_chip1[f"{hm}_atx3/4/5_rep1"]-df_chip1[f"{hm}_WT_rep1"],
            "c":df_chip2[f"{hm}_atx3/4/5_rep2"]-df_chip2[f"{hm}_WT_rep2"],
            "d":df_chip1[f"{hm}_atx3/4/5_rep2"]-df_chip1[f"{hm}_WT_rep2"]}

    # plot
    sns.boxplot(data,ax=ax[i],width=0.7,fill=False,palette=["tab:grey","tab:blue"]*2)

    # format figure
    ax[i].hlines(0,-0.5,3.5,color="k",ls="--")
    ax[i].set_xlim(-0.5,3.5)
    ax[i].set_ylim(-y_list[i],y_list[i])
    ax[i].set_xticks([0.5,2.5])
    ax[i].set_xticklabels(["Replicate 1","Replicate 2"])
    ax[i].spines['top'].set_visible(False)
    ax[i].spines['right'].set_visible(False)

    # Mann-Whitney U test
    p1 = mannwhitneyu(data["a"], data["b"], alternative="two-sided")[1]
    p2 = mannwhitneyu(data["c"], data["d"], alternative="two-sided")[1]
    print(f"{hm:10s} rep1 : p={p1:.2e}")
    print(f"{hm:10s} rep2 : p={p2:.2e}")

In [None]:
# Fig. 2e
def plot_box_with_hist(x,y,np_arange,xlab="",ylab=""):
    bins = np.concatenate(([-np.inf], np_arange, [np.inf]))
    labels = [f"({bins[i]}, {bins[i+1]}]" for i in range(len(bins) - 1)]
    x_binned = pd.cut(x, bins=bins, labels=labels)
    data = pd.DataFrame({'x_binned': x_binned, 'y': y})
    boxplot_data = [data[data['x_binned'] == label]['y'] for label in labels]
    counts = [len(group) for group in boxplot_data]

    fig = plt.figure(figsize=(7, 7))
    gs = GridSpec(2, 1, height_ratios=[1, 4], hspace=0.1)

    # bar plot
    ax_bar = fig.add_subplot(gs[0])
    ax_bar.bar(range(len(counts)), counts, color='tab:grey', alpha=0.7, width=0.6)

    # format bar plot
    ax_bar.set_ylabel("The number of genes")
    ax_bar.set_xticks(range(len(labels)))
    ax_bar.set_xticklabels([])
    ax_bar.set_xlim(-0.5, len(labels) - 0.5)

    # box plot
    ax_box = fig.add_subplot(gs[1])
    bplot = ax_box.boxplot(boxplot_data, tick_labels=labels, patch_artist=True, )
    
    # format box plot
    colors = cm.bwr(np.linspace(0.1, 0.9, 8))
    for patch, color in zip(bplot['boxes'], colors):
        patch.set_facecolor(color)

    for median in bplot['medians']:
        median.set_color('black')
        median.set_linewidth(2)  
    
    ax_box.set_xlabel(xlab)
    ax_box.set_ylabel(ylab)
    ax_box.set_xticklabels(labels, rotation=45)
    ax_box.grid(axis="y")

    return fig, ax_box, ax_bar

In [None]:
hm_list = ["H2A.Z","H2Aub","H3K27me3"]
n = len(hm_list)

for i in range(n):
    hm = hm_list[i]
    x = df_chip["H3K4me2_atx3/4/5_rep1"] - df_chip["H3K4me2_WT_rep1"] 
    y = df_chip[f"{hm}_atx3/4/5_rep1"] - df_chip[f"{hm}_WT_rep1"] 

    # plot
    fig, ax_box, ax_bar = plot_box_with_hist(x,y,np.arange(-15, 16, 5),
                                        "ΔH3K4me2 (atx3/4/5-WT)",
                                        f"Δ{hm} (atx3/4/5-WT)")

    # format figure
    ymin = np.percentile(y,0.1)
    ymax = np.percentile(y,99.9)
    ax_box.set_ylim(ymin,ymax)
    ax_box.set_xlabel("")
    ax_box.set_ylabel("")
    ax_bar.set_ylabel("")
    #ax_bar.set_title(hm)