In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import math
from scipy.stats import pearsonr
from sklearn.metrics import mean_absolute_error
from statannotations.Annotator import Annotator
import omniage
import scanpy as sc

sns.set(style="whitegrid")

In [2]:
# --- 1. Loading data ---
print("1. Loading Blood Tissue Dataset...")

beta_df = pd.read_csv("../example/data/hannum_50samples_beta.csv", index_col=0)
pheno_df = pd.read_csv("../example/data/hannum_50samples_pheno.csv", index_col=0)
pheno_df['Age'] = pd.to_numeric(pheno_df['age'], errors='coerce')

1. Loading Blood Tissue Dataset...


In [3]:
# --- 2. Predict Chronological Age ---
print("\n2. Predicting Epigenetic Age...")
target_clocks = ["Horvath2013", "ZhangClock"] 

pred_ages = omniage.cal_epimarker(beta_df, clocks="all",ages=pheno_df['Age'],sex=pheno_df['sex'],return_dict=False)

print("Prediction preview:")
print(pred_ages.head())


2. Predicting Epigenetic Age...
Calculating 76 clocks: AdaptAge, Bernabeu_cAge, BohlinGA, CausalAge, CentenarianClock_100, CentenarianClock_40, CompCHIP, CompCRP, CompIL6, CorticalClock, DNAmCTFClock, DNAmTL, DamAge, DunedinPACE, EPICGA, EnsembleAgeDynamic, EnsembleAgeHumanMouse, EnsembleAgeStatic, EpiCMIT_Hyper, EpiCMIT_Hypo, EpiScores, EpiTOC1, EpiTOC2, EpiTOC3, GliaIn, GliaSin, GrimAge1, GrimAge2, Hannum, Hep, Horvath2013, Horvath2018, HypoClock, IC_Clock, KnightGA, LeeControl, LeeRefinedRobust, LeeRobust, Lin, MayneGA, McCartneyAlcohol, McCartneyBMI, McCartneyBodyFat, McCartneyEducation, McCartneyHDL, McCartneyLDL, McCartneySmoking, McCartneyTotalCholesterol, McCartneyTotalHDLRatio, McCartneyWHR, NeuIn, NeuSin, PCDNAmTL, PCGrimAge1, PCHannum, PCHorvath2013, PCHorvath2018, PCPhenoAge, PanMammalianBlood, PanMammalianSkin, PanMammalianUniversal, PedBE, PhenoAge, RepliTali, Retro_age_V1, Retro_age_V2, StemTOC, StemTOCvitro, StocH, StocP, StocZ, SystemsAge, VidalBralo, Zhang10, ZhangCl

Running DNAmCTFClock:  13%|█▎        | 10/76 [00:00<00:03, 18.52it/s]       

[CorticalClock] Imputing 2 missing probes with reference values.


Running DunedinPACE:  16%|█▌        | 12/76 [00:12<01:46,  1.66s/it] 

Skipping DNAmCTFClock: Requires 'ctf' input.


Running EnsembleAgeDynamic:  18%|█▊        | 14/76 [00:12<01:15,  1.22s/it]

[EnsembleAge] Found 50 sub-clocks for version 'Dynamic'. Loading...


Running EpiScores:  25%|██▌       | 19/76 [00:14<00:38,  1.48it/s]            

[EnsembleAge] Found 1 sub-clocks for version 'HumanMouse'. Loading...
[EnsembleAge] Found 2 sub-clocks for version 'Static'. Loading...
[EpiScores] Loaded 109 protein scores covering 9101 CpGs.


Running Hep:  38%|███▊      | 29/76 [00:16<00:13,  3.39it/s]      


[Error] GliaIn failed execution: [Glia-In] 'ctf' (Cell Type Fractions) is required for bulk tissue predictions with Intrinsic clocks.


Running PCDNAmTL:  68%|██████▊   | 52/76 [00:16<00:01, 19.28it/s]                 


[Error] NeuIn failed execution: [Neu-In] 'ctf' (Cell Type Fractions) is required for bulk tissue predictions with Intrinsic clocks.


Running Retro_age_V2:  84%|████████▍ | 64/76 [00:24<00:03,  3.27it/s]         

Skipping PanMammalianBlood: Missing required 'sample_info'.
Skipping PanMammalianSkin: Missing required 'sample_info'.
Skipping PanMammalianUniversal: Missing required 'sample_info'.


Running DNAmFitAge: 100%|██████████| 76/76 [00:34<00:00,  2.20it/s]  

   -> Using dependency: GrimAge1['DNAmGrimAge1']
Prediction preview:
            AdaptAge  Bernabeu_cAge   BohlinGA  CausalAge  \
GSM990532  67.939661      43.795505  41.353410  32.857355   
GSM990292  44.817962      38.594093  41.546013  24.570410   
GSM989979  74.196327      62.331541  43.780415  48.357441   
GSM989900  72.089042      87.425859  40.424986  71.994774   
GSM990054  60.789089      79.084025  42.272794  69.184372   

           CentenarianClock_100  CentenarianClock_40  CompCHIP_TET2  \
GSM990532            101.899257            45.444042      -0.187777   
GSM990292             99.200049            38.534047      -0.541435   
GSM989979            101.366379            63.871899       0.267374   
GSM989900            100.839813            87.695101      -0.504409   
GSM990054            101.212559            83.958868       0.178283   

           CompCHIP_AnyCHIP  CompCHIP_DNMT3A  CompCHIP_ASXL1  ...   Zhang10  \
GSM990532         -0.503069        -1.568649       -0.1334




In [4]:
plot_data = pd.concat([pheno_df, pred_ages], axis=1, join='inner')


In [5]:
plot_data.columns

Index(['SampleID', 'age', 'sex', 'Age', 'AdaptAge', 'Bernabeu_cAge',
       'BohlinGA', 'CausalAge', 'CentenarianClock_100', 'CentenarianClock_40',
       ...
       'Zhang10', 'ZhangClock', 'DNAmGait_noAge', 'DNAmGrip_noAge',
       'DNAmGait_wAge', 'DNAmGrip_wAge', 'DNAmFEV1_wAge', 'DNAmVO2max',
       'DNAmFitAge', 'FitAgeAccel'],
      dtype='object', length=296)

In [6]:
plot_data.to_csv('/mnt/local-disk/data/duzhaozhen/AgingBiomarker_work/Check_consistency/OmniAge_python_Hannum50_res.csv')

In [7]:
##### 检查一些遗漏的时钟，即Ensemble age 和 PanMammalianBlood，还有GliaIn 

In [8]:
## 先是CTS clock

In [8]:
# --- 1. Loading data ---
print("1. Loading Murphy Dataset...")

beta_df = pd.read_csv("../example/data/Murphy_beta_m.csv", index_col=0)
ctf_df= pd.read_csv("../example/data/Murphy_CTF_m.csv", index_col=0)

1. Loading Murphy Dataset...


In [10]:
pred_ages = omniage.cal_epimarker(beta_df,ctf=ctf_df, clocks=["NeuIn","NeuSin"],data_type="bulk",return_dict=False)

print("Prediction preview:")
print(pred_ages.head())

Calculating 2 clocks: NeuIn, NeuSin


Running NeuSin: 100%|██████████| 2/2 [00:00<00:00, 35.65it/s]

[Neu-In] Regressing out cell type effects for 38 samples...
Prediction preview:
                NeuIn     NeuSin
GSM2350800  46.861960  32.793556
GSM2350802  76.091003  73.838187
GSM2350804  42.489604  35.701511
GSM2350808  37.547937  19.754201
GSM2350809  28.147765  18.796313





In [11]:
pred_ages.to_csv('/mnt/local-disk/data/duzhaozhen/AgingBiomarker_work/Check_consistency/Neu_In_Sin_Murphy_res.csv')

In [12]:
beta_df = pd.read_csv("../example/data/Pai_beta_m.csv", index_col=0)
pred_ages = omniage.cal_epimarker(beta_df, clocks=["NeuIn","NeuSin"],data_type="sorted",return_dict=False)


Calculating 2 clocks: NeuIn, NeuSin


Running NeuSin: 100%|██████████| 2/2 [00:00<00:00, 87.07it/s]


In [14]:
pred_ages.head()

Unnamed: 0,NeuIn,NeuSin
GSM3059446,71.550636,94.362779
GSM3059459,61.288175,80.396628
GSM3059460,71.124285,92.874396
GSM3059461,59.837719,80.213533
GSM3059462,52.124075,71.409499


In [15]:
pred_ages.to_csv('/mnt/local-disk/data/duzhaozhen/AgingBiomarker_work/Check_consistency/Neu_In_Sin_Pai_res.csv')

In [None]:
#### ensemble

In [16]:
beta_df = pd.read_csv("../example/data/GSE223748_10_mouse.csv", index_col=0)

In [17]:
beta_df.head()

Unnamed: 0,cg00000165,cg00001209,cg00001364,cg00001582,cg00002920,cg00003994,cg00004555,cg00005112,cg00005271,cg00006213,...,rs7746156_II_F_C_37550,rs798149_II_F_C_37528,rs845016_II_F_C_37529,rs877309_II_F_C_37552,rs9292570_I_F_C_37499,rs9363764_II_F_C_37541,rs939290_II_F_C_37535,rs951295_I_F_C_37507,rs966367_II_F_C_37551,rs9839873_II_F_C_37532
202897220093_R01C01,0.48099,0.934681,0.911639,0.060784,0.655778,0.029122,0.059606,0.866695,0.967462,0.803999,...,0.503242,0.50786,0.510814,0.505766,0.459316,0.478933,0.626849,0.47896,0.516974,0.503603
202897220093_R02C01,0.483132,0.946624,0.907298,0.057917,0.628036,0.027994,0.061197,0.874569,0.967608,0.802775,...,0.496286,0.513652,0.494576,0.488226,0.48424,0.477256,0.605927,0.511858,0.509821,0.513164
202897220093_R03C01,0.494042,0.940368,0.912401,0.055478,0.697029,0.027779,0.062433,0.87936,0.957365,0.799859,...,0.475041,0.510109,0.496343,0.485479,0.486639,0.493799,0.59379,0.486963,0.495449,0.50427
202897220093_R04C01,0.505628,0.935963,0.90147,0.064311,0.690823,0.025568,0.060846,0.865023,0.962606,0.798414,...,0.492356,0.504153,0.486705,0.489979,0.500472,0.505229,0.600432,0.501462,0.509107,0.508495
202897220093_R05C01,0.496484,0.926762,0.91235,0.060102,0.678101,0.026342,0.060356,0.877332,0.958239,0.803769,...,0.481962,0.510084,0.486484,0.492601,0.495459,0.476187,0.568832,0.508603,0.501258,0.514085


In [20]:
pred_ages = omniage.cal_epimarker(beta_df.T, clocks=["EnsembleAgeHumanMouse", "EnsembleAgeStatic"],return_dict=False)

print("Prediction preview:")
print(pred_ages.head())

Calculating 2 clocks: EnsembleAgeHumanMouse, EnsembleAgeStatic


Running EnsembleAgeStatic: 100%|██████████| 2/2 [00:00<00:00, 37.82it/s]

[EnsembleAge] Found 1 sub-clocks for version 'HumanMouse'. Loading...
[EnsembleAge] Found 2 sub-clocks for version 'Static'. Loading...
Prediction preview:
                     HumanMouse_HumanMouse  Static_Static_Top  Static_Static
202897220093_R01C01               0.149667           0.395621       0.379750
202897220093_R02C01               0.139055           0.512518       0.488571
202897220093_R03C01               0.129409           0.539827       0.525144
202897220093_R04C01               0.123896           0.426661       0.444212
202897220093_R05C01               0.110012           0.455485       0.427305





In [21]:
pred_ages.to_csv('/mnt/local-disk/data/duzhaozhen/AgingBiomarker_work/Check_consistency/EnsembleAge_py_res.csv')

In [None]:
### PanMammalian

In [None]:
beta_df = pd.read_csv("../example/data/PanMammalian_example_beta_m.csv", index_col=0)


In [22]:
beta_df = pd.read_csv("../example/data/PanMammalian_example_beta_m.csv", index_col=0)
sample_info = pd.read_csv("../example/data/PanMammalian_example_info.csv", index_col=0)

In [27]:
pred_ages = omniage.cal_epimarker(beta_df, clocks=["PanMammalianUniversal", "PanMammalianBlood", "PanMammalianSkin"],sample_info=sample_info,return_dict=False)

Calculating 3 clocks: PanMammalianUniversal, PanMammalianBlood, PanMammalianSkin


Running PanMammalianSkin: 100%|██████████| 3/3 [00:00<00:00, 23.94it/s]


In [28]:
pred_ages.head()

Unnamed: 0,Sample,SpeciesLatinName,DNAmAgePanMammalianClock1,DNAmRelativeAge,DNAmAgePanMammalianClock2,DNAmRelativeAdultAge,DNAmAgePanMammalianClock3,Sample.1,SpeciesLatinName.1,DNAmRelativeAge_Blood,DNAmAgePanMammalianBlood2,DNAmRelativeAdultAge_Blood,DNAmAgePanMammalianBlood3,Sample.2,SpeciesLatinName.2,DNAmRelativeAge_Skin,DNAmAgePanMammalianSkin2,DNAmRelativeAdultAge_Skin,DNAmAgePanMammalianSkin3
0,202894750036_R05C02,Tursiops truncatus,12.579836,0.192074,15.897342,1.358574,12.503491,202894750036_R05C02,Tursiops truncatus,0.006499,-0.457382,1.778671,16.688355,202894750036_R05C02,Tursiops truncatus,0.738541,64.057575,1.0544,9.473421
1,202897220027_R01C02,Tursiops truncatus,14.117004,0.23474,19.657555,2.305307,21.934506,202897220027_R01C02,Tursiops truncatus,0.009667,-0.17814,3.060126,29.453752,202897220027_R01C02,Tursiops truncatus,0.762979,66.211337,1.339767,12.316146
2,202897220027_R02C02,Tursiops truncatus,18.797731,0.25795,21.70302,2.332115,22.201559,202897220027_R02C02,Tursiops truncatus,0.018608,0.609817,3.264395,31.488605,202897220027_R02C02,Tursiops truncatus,0.790722,68.656286,1.76269,16.529149
3,202897220027_R06C01,Tursiops truncatus,16.069038,0.212144,17.666178,1.506717,13.97924,202897220027_R06C01,Tursiops truncatus,0.006816,-0.429403,1.721757,16.121395,202897220027_R06C01,Tursiops truncatus,0.764205,66.319384,1.330296,12.221801
4,202897220040_R02C01,Tursiops truncatus,13.503053,0.18959,15.678451,1.442985,13.344361,202897220040_R02C01,Tursiops truncatus,0.005795,-0.519383,1.941106,18.306473,202897220040_R02C01,Tursiops truncatus,0.726966,63.037484,1.014021,9.071183


In [25]:
pred_ages.to_csv('/mnt/local-disk/data/duzhaozhen/AgingBiomarker_work/Check_consistency/PanMammalian_py_res.csv')