In [None]:
# import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
from matplotlib_venn import venn2
import scipy
from scipy.stats import mannwhitneyu

In [None]:
# setting
plt.rcParams['font.family']= 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial']
plt.rcParams['font.size'] = 18
plt.rcParams["figure.dpi"] = 200

In [None]:
# Time-course ChIP-seq data in WT and ldl3
file_path = "../data/ChIP-seq/Time-course-ChIP_WT_ldl3_rep1.rpkm.tsv"
df_chip = pd.read_csv(file_path, sep="\t", index_col=0)
df_chip.head()

In [None]:
# list of H3K4me2 diel oscillating genes (DOGs)
file_path = f"../data/list_of_DOGs/H3K4me2_DOGs.bed"
H3K4me2_DOGs = pd.read_csv(file_path,sep="\t",header=None).iloc[:,3].values

# list of LDL3-target genes
ldl3_target = pd.read_csv("../data/list_of_target_genes/LDL3-target(n=7115).bed",sep="\t",header=None).iloc[:,3]

In [None]:
# Supplementary Fig. 11a
venn2([set(H3K4me2_DOGs),set(ldl3_target)],
      set_colors=["red","blue"],
      set_labels=["H3K4me2 DOGs","LDL3-target"])


N = 27443 # All protein-coding genes
n1 = len(H3K4me2_DOGs)
n2 = len(ldl3_target)
m = len(set(H3K4me2_DOGs)&set(ldl3_target))

# Fisher's exact test
p = scipy.stats.fisher_exact(np.array([[m, n1-m],
                                       [n2-m,N-(n1+n2-m)]]),
                             alternative='greater')[1]
print(f"p={p:.3e}")

In [None]:
# Supplementary Fig. 11b
label_list = ["Amplitude","Max","Min"]

max_list = [20,40,40]
min_list = [-20,-40,-40]

mask_h3k4me2_dogs = df_chip.index.isin(H3K4me2_DOGs)
mask_ldl3_target = df_chip.index.isin(ldl3_target)
mask1 = mask_h3k4me2_dogs & mask_ldl3_target # n=199
mask2 = mask_h3k4me2_dogs & ~mask_ldl3_target # n=582
print(mask1.sum(),mask2.sum())

fig, ax = plt.subplots(1,3,figsize=(15,5))
for i in range(3):
    # H3K4me2 in WT and ldl3
    df_wt = df_chip.iloc[:,4:8]
    df_mt = df_chip.iloc[:,20:24]

    # calculation of amplitude, max, and min
    ser_wt = None
    ser_mt = None
    
    if i == 0:
        ser_wt = df_wt.max(axis=1) - df_wt.min(axis=1)
        ser_mt = df_mt.max(axis=1) - df_mt.min(axis=1)
    if i == 1:
        ser_wt = df_wt.max(axis=1)
        ser_mt = df_mt.max(axis=1)
    if i == 2:
        ser_wt = df_wt.min(axis=1)
        ser_mt = df_mt.min(axis=1)

    # H3K4me2 DOGs and LDL3-target
    a = ser_wt[mask1]
    b = ser_mt[mask1]
    
    # H3K4me2 DOGs but not LDL3-target
    c = ser_wt[mask2]
    d = ser_mt[mask2]

    # change (ldl3 - WT)
    e = b-a
    f = d-c

    # plot
    sns.violinplot({"1":f,
                    "2":e},
                   palette = ["tab:grey","tab:orange"],
                   fill=False,
                   cut=0,
                   ax=ax[i],
                   inner_kws=dict(box_width=10,whis_width=2,),
               )

    # format figure
    ax[i].set_ylim(min_list[i],max_list[i])
    ax[i].spines['right'].set_visible(False)
    ax[i].spines['top'].set_visible(False)
    ax[i].hlines(0,-0.5,1.5,color="black",linestyle="--",zorder=0)
    ax[i].set_xlim(-0.5,1.5)
    ax[i].set_xticks([0,1])
    ax[i].set_xticklabels([])
    ax[i].set_title(label_list[i])
    
    # Mann-Whitney U test
    p = mannwhitneyu(f, e, alternative="two-sided")[1]
    print(f"{label_list[i]:10s} p={p:.2e}")