In [None]:
# import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# setting
plt.rcParams['font.family']= 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial']
plt.rcParams['font.size'] = 18
plt.rcParams["figure.dpi"] = 200

In [None]:
# Supplementary Fig. 2a
file_path = "../data/ChIP-seq/ChIP_H3K4me2_H2A.Z_H2Aub_H3K27me3_WT_ldl3.rpkm.tsv"
df_chip = pd.read_csv(file_path, sep="\t", index_col=0)
ldl3_target = pd.read_csv("../data/list_of_target_genes/LDL3-target(n=7115).bed",sep="\t",header=None).iloc[:,3]

In [None]:
hm_list = ["H3K4me2","H2A.Z","H2Aub","H3K27me3"]
n = len(hm_list)

mask = df_chip.index.isin(ldl3_target)

fig, ax = plt.subplots(1,n,figsize=(5*n,5))

for i in range(n):
    hm = hm_list[i]

    # plot
    # non ldl3-target genes
    sns.scatterplot(data=df_chip[~mask],
                    x=f"{hm}_WT_rep2",
                    y=f"{hm}_ldl3_rep2",
                    s=2,alpha=0.5,ax=ax[i],color="tab:grey")
    # ldl3 target genes
    sns.scatterplot(data=df_chip[mask],
                    x=f"{hm}_WT_rep2",
                    y=f"{hm}_ldl3_rep2",
                    s=2,alpha=0.5,ax=ax[i],color="tab:red")

    # format figure
    x = df_chip[f"{hm}_WT_rep2"]
    ax[i].set_xlim(0,np.percentile(x,99))
    ax[i].set_ylim(0,np.percentile(x,99))
    ax[i].set_title(hm)
    ax[i].set_xlabel("")
    ax[i].set_ylabel("")
    ax[i].set_aspect('equal', adjustable='box')

In [None]:
# Supplementary Fig. 2c

df_tmp = df_chip.loc[ldl3_target]

plt.figure(figsize=(6,6))
plt.rcParams['font.size'] = 15

hm_list = ["H2A.Z", "H2Aub", "H3K27me3"]
color_list = ["#A3C7DF", "#EC9FA0", "#E36F70", "#DC494B"]

total_genes = len(df_tmp)

for k in range(3):
    hm = hm_list[k]
    for i in range(2):
        # classify genes into four group
        a = (df_tmp[f"{hm}_ldl3_rep{i+1}"] - df_tmp[f"{hm}_WT_rep{i+1}"] < 0).sum()
        b = (df_tmp[f"{hm}_ldl3_rep{i+1}"] - df_tmp[f"{hm}_WT_rep{i+1}"] < 2.5).sum()
        c = (df_tmp[f"{hm}_ldl3_rep{i+1}"] - df_tmp[f"{hm}_WT_rep{i+1}"] < 5).sum()
        d = total_genes

        heights = [a, b - a, c - b, d - c]

        bottom = 0
        for j in range(4):
            h = heights[j]
            if h > 0:
                # plot
                plt.bar(2*k + i, h, bottom=bottom, color=color_list[j])

                # show ratio of genes
                percent = h / total_genes * 100
                diff = None # fine-tuning of text position
                color = None
                if (k==2)&(j==2):
                    diff = -100
                    color = "white" if j<1 else "black"
                elif (k==2)&(j==3):
                    diff = 100
                    color = "white" if j<1 else "black"                    
                else:
                    diff = 100
                    color = "black"

                plt.text(2*k + i, bottom + h/2 + diff,
                         f"{percent:.1f}%",
                         ha='center', va='center',
                         fontsize=12,
                         color=color)
                bottom += h

# format figure
plt.xticks(np.arange(6), ["rep1", "rep2"] * 3)
plt.show()