In [None]:
# import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# setting
plt.rcParams['font.family']= 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial']
plt.rcParams['font.size'] = 18
plt.rcParams["figure.dpi"] = 200

In [None]:
# Supplementary Fig. 7a
class TimeCourseAnalysis:
    def __init__(self):
        self.tp_list = ["ZT0","ZT6","ZT12","ZT18"]
        self.hm_list = ["H3K4me2","H3K4me3","H2A.Z","H2Aub"]
        self.c_list = ["tab:orange","tab:cyan","tab:red","tab:green"]
        self.maker_list = ["o","^"]

        self.hm_n = len(self.hm_list)
        self.tp_n = len(self.tp_list)
        self.CHIP_REP_N = 2
        self.RNA_REP_N = 3

        # ChIP-seq data
        df_chip_rep1 = pd.read_csv("../data/ChIP-seq/Time-course-ChIP_WT_rep1.rpkm.tsv", 
                                        sep="\t", index_col=0)
        df_chip_rep2 = pd.read_csv("../data/ChIP-seq/Time-course-ChIP_WT_rep2.rpkm.tsv", 
                                        sep="\t", index_col=0)
        self.df_chip_list = [df_chip_rep1,df_chip_rep2]
        
        # RNA-seq data
        self.df_rna = pd.read_csv("../data/RNA-seq/Time-course-RNA_WT_rep1.rpkm.tsv", 
                                        sep="\t", index_col=0)     
        self.df_rna_mean = pd.DataFrame({tp:self.df_rna.iloc[:,self.RNA_REP_N*i:self.RNA_REP_N*(i+1)].mean(axis=1) for i,tp in enumerate(self.tp_list)},)
        self.df_rna_sem = pd.DataFrame({tp:self.df_rna.iloc[:,self.RNA_REP_N*i:self.RNA_REP_N*(i+1)].sem(axis=1) for i,tp in enumerate(self.tp_list)},)

        return

    # plot levels of H3K4me2, H3K4me3, H2A.Z, H2Aub, and mRNA per gene
    def plot(self,gene_id,ax,gene_name=""):
        # ChIP-seq
        for i in range(self.hm_n):
            hm = self.hm_list[i]
            c = self.c_list[i]
            
            x = np.arange(self.tp_n)
            Y = []
            for j in range(self.CHIP_REP_N):
                df_chip = self.df_chip_list[j]
                y_h3 = df_chip.iloc[:,0:self.tp_n].loc[gene_id].values
                y_hm = df_chip.iloc[:,self.tp_n*(i+1):self.tp_n*(i+2)].loc[gene_id].values
                y = y_hm / y_h3
                Y.append(y)
                m = self.maker_list[j]
                ax.scatter(x,y,c=c,marker=m)
            
            # mean of rep1 and rep2
            y_mean = (Y[0]+Y[1])/2
            ax.plot(x,y_mean,c=c,marker="",ls="-",label=hm)  
         
        # mRNA-seq
        ax2 = ax.twinx() # 2nd y-axis
        x = np.arange(self.tp_n)
        y = self.df_rna_mean.loc[gene_id].values
        y_sem = self.df_rna_sem.loc[gene_id].values

        # plot
        ax2.plot(x,y,c="tab:purple",marker="o",linestyle="-",label="mRNA")
        ax2.errorbar(x, y, yerr = y_sem, 
                    capsize=5,ecolor='tab:purple', color='tab:purple',linestyle=" ")
        
        # format figure
        ax.set_title(gene_name,fontdict=dict(family="arial",style="italic"))
        ax.set_xticks(x)
        ax.set_xticklabels(self.tp_list)
        X_MIN,X_MAX = -0.2,3.2
        ax.set_xlim(X_MIN,X_MAX)
        ax.axvspan(X_MIN, 0, color="gray", alpha=0.3, zorder=0)
        ax.axvspan(2, X_MAX, color="gray", alpha=0.3, zorder=0)
        
        return fig, ax

In [None]:
circadian_gene_dict = {
"LHY":"AT1G01060",
"CCA1":"AT2G46830",
"PRR9":"AT2G46790",
"PRR7":"AT5G02810",
"PRR5":"AT5G24470",
"GI":"AT1G22770",
"TOC1":"AT5G61380",
"LUX":"AT3G46640",
"ELF4":"AT2G40080",
"ELF3":"AT2G25930",
}

In [None]:
TCA = TimeCourseAnalysis()

fig, ax = plt.subplots(3,4,figsize=(25,12),
                       gridspec_kw=dict(hspace=0.4,wspace=0.3))

for i,(gene_name,gene_id) in enumerate(zip(circadian_gene_dict.keys(),circadian_gene_dict.values())):
    TCA.plot(gene_id,ax[i//4][i%4],gene_name)

# remove empty axes
ax[2][2].axis("off")
ax[2][3].axis("off")