In [2]:
# Import required libraries
import pandas as pd

In [3]:
# Load epi clock results from excel
clocks_excel_file = "sup_data_2_43587_2023_462_MOESM3_ESM.xlsx"

# Specify the sheets to import 
sheets_to_import = list(range(15, 18)) #  sheets 16 to 18 (table S3.1-3.3)

# Load the sheets, each as a separate data frame
clocks_dfs = [pd.read_excel(clocks_excel_file, sheet_name=sheet, skiprows=[0, 1, 3]) for sheet in sheets_to_import] # skipping rows 1, 2, and 4 to remove the legends and intercept rows
 
# Separate the data frames into individual data frames (clocks 1, 2, and 3)
clock_1_df, clock_2_df, clock_3_df = clocks_dfs

# Create the overlapped dfs for clocks 1, 2 and 3 
overlap_1_2_df = pd.merge(clock_1_df, clock_2_df, on='Gene', how='inner')
overlap_1_2_3_df = pd.merge(overlap_1_2_df, clock_3_df, on='Gene', how='inner')
overlap_1_2_3_df = overlap_1_2_3_df.drop_duplicates(subset='Gene') # Drop duplicate columns resulting from the merge

# Create the overlapped dfs for clocks 2 and 3
overlap_2_3_df = pd.merge(clock_2_df, clock_3_df, on='Gene', how='inner')
overlap_2_3_df = overlap_2_3_df.drop_duplicates(subset='Gene') # Drop duplicate columns resulting from the merge

In [4]:
# Check size of data frames
print(f"Clock 1: {clock_1_df.shape}")
print(f"Clock 2: {clock_2_df.shape}")
print(f"Clock 3: {clock_3_df.shape}")
print(f"Overlap 1 and 2: {overlap_1_2_df.shape}")
print(f"Overlap 2 and 3: {overlap_2_3_df.shape}") # should be 401
print(f"Overlap 1, 2 and 3: {overlap_1_2_3_df.shape}") # should be 140

Clock 1: (335, 14)
Clock 2: (816, 14)
Clock 3: (760, 14)
Overlap 1 and 2: (435, 27)
Overlap 2 and 3: (401, 27)
Overlap 1, 2 and 3: (140, 40)


In [13]:
# Checking genes from Ciceri 2024 present in each clock

ciceri_genes = ['EZH2','MTA2','RBBP4','BRD1','RCOR2','SMARCE1',
                'KDM5B','SMARCD1','SMARCA4','MTF3', 'EPC1','RNF2',
                'CHD3','KMT5B','EPC2','SMARCAD2','CBX5','HDAC2',
                'DM1A','KLF12','SOX4','SOX11'
                ]

# Check if the genes are present in the clocks
ciceri_genes_in_clock_1 = clock_1_df[clock_1_df['Gene'].isin(ciceri_genes)]
ciceri_genes_in_clock_2 = clock_2_df[clock_2_df['Gene'].isin(ciceri_genes)]
ciceri_genes_in_clock_3 = clock_3_df[clock_3_df['Gene'].isin(ciceri_genes)]
ciceri_genes_in_overlap_1_2_3 = overlap_1_2_3_df[overlap_1_2_3_df['Gene'].isin(ciceri_genes)]
ciceri_genes_in_overlap_2_3 = overlap_2_3_df[overlap_2_3_df['Gene'].isin(ciceri_genes)]

# Make list of ciceri genes present in each clock
ciceri_genes_in_clock_1_list = ciceri_genes_in_clock_1['Gene'].tolist()
ciceri_genes_in_clock_2_list = ciceri_genes_in_clock_2['Gene'].tolist()
ciceri_genes_in_clock_3_list = ciceri_genes_in_clock_3['Gene'].tolist()
ciceri_genes_in_overlap_1_2_3_list = ciceri_genes_in_overlap_1_2_3['Gene'].tolist()
ciceri_genes_in_overlap_2_3_list = ciceri_genes_in_overlap_2_3['Gene'].tolist()

# Print the results
print(f"Ciceri genes in clock 1: {ciceri_genes_in_clock_1_list}")
print(f"Ciceri genes in clock 2: {ciceri_genes_in_clock_2_list}")
print(f"Ciceri genes in clock 3: {ciceri_genes_in_clock_3_list}")
print(f"Ciceri genes in overlap 1, 2 and 3: {ciceri_genes_in_overlap_1_2_3_list}")
print(f"Ciceri genes in overlap 2 and 3: {ciceri_genes_in_overlap_2_3_list}")

Ciceri genes in clock 1: []
Ciceri genes in clock 2: ['CHD3', 'SOX4', 'CHD3', 'SOX4', 'SOX4']
Ciceri genes in clock 3: ['SOX4', 'SOX4']
Ciceri genes in overlap 1, 2 and 3: []
Ciceri genes in overlap 2 and 3: ['SOX4']
