In [9]:
import numpy as np
import cptac
import pandas as pd
from predict_protein import get_proteins, learn_cptac
from sklearn.preprocessing import StandardScaler, RobustScaler

In [10]:
# List current CPTAC datasets
cptac.list_datasets()


Unnamed: 0_level_0,Description,Data reuse status,Publication link
Dataset name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Brca,breast cancer,no restrictions,https://pubmed.ncbi.nlm.nih.gov/33212010/
Ccrcc,clear cell renal cell carcinoma (kidney),no restrictions,https://pubmed.ncbi.nlm.nih.gov/31675502/
Colon,colorectal cancer,no restrictions,https://pubmed.ncbi.nlm.nih.gov/31031003/
Endometrial,endometrial carcinoma (uterine),no restrictions,https://pubmed.ncbi.nlm.nih.gov/32059776/
Gbm,glioblastoma,no restrictions,https://pubmed.ncbi.nlm.nih.gov/33577785/
Hnscc,head and neck squamous cell carcinoma,no restrictions,https://pubmed.ncbi.nlm.nih.gov/33417831/
Lscc,lung squamous cell carcinoma,no restrictions,https://pubmed.ncbi.nlm.nih.gov/34358469/
Luad,lung adenocarcinoma,no restrictions,https://pubmed.ncbi.nlm.nih.gov/32649874/
Ovarian,high grade serous ovarian cancer,no restrictions,https://pubmed.ncbi.nlm.nih.gov/27372738/
Pdac,pancreatic ductal adenocarcinoma,password access only,unpublished


In [11]:
cptac.download(dataset="endometrial")
cptac.download(dataset="ovarian")
cptac.download(dataset="colon")
cptac.download(dataset="brca")
cptac.download(dataset="luad")
cptac.download(dataset="ccrcc")
cptac.download(dataset="gbm")

Checking that gbm index is up-to-date.....                                                                                                                                                                                                                                                             

True

In [12]:
# Download and format. These are currently done one by one, we may want to turn this into a function

en = cptac.Endometrial()
ov = cptac.Ovarian()
co = cptac.Colon()
br = cptac.Brca()
lu = cptac.Luad()
cc = cptac.Ccrcc()
gb = cptac.Gbm()

# For endometrial, try getting the RNA and protein data
en_rna = en.get_transcriptomics()
en_pro = en.get_proteomics()
a = en.join_omics_to_omics('transcriptomics', 'proteomics')

ov_rna = ov.get_transcriptomics()
ov_pro = ov.get_proteomics()
b = ov.join_omics_to_omics('transcriptomics', 'proteomics')
b.columns = b.columns.droplevel(1)

co_rna = co.get_transcriptomics()
co_pro = co.get_proteomics()
c = co.join_omics_to_omics('transcriptomics', 'proteomics')

br_rna = br.get_transcriptomics()
br_pro = br.get_proteomics()
d = br.join_omics_to_omics('transcriptomics', 'proteomics')
d.columns = d.columns.droplevel(1)

lu_rna = lu.get_transcriptomics()
lu_pro = lu.get_proteomics()
e = br.join_omics_to_omics('transcriptomics', 'proteomics')
e.columns = e.columns.droplevel(1)

cc_rna = cc.get_transcriptomics()
cc_pro = cc.get_proteomics()
f = cc.join_omics_to_omics('transcriptomics', 'proteomics')
f.columns = f.columns.droplevel(1)

gb_rna = gb.get_transcriptomics()
gb_pro = gb.get_proteomics()
g = gb.join_omics_to_omics('transcriptomics', 'proteomics')
g.columns = g.columns.droplevel(1)

Formatting dataframes..........date.....   Loading endometrial v2.1.1....Loading endometrial v2.1.1.......Loading endometrial v2.1.1.........Loading endometrial v2.1.1............                                                                    Loading ovarian v0.0.1...Loading ovarian v0.0.1......Loading ovarian v0.0.1........                                                                  Loading colon v0.0.1....Loading colon v0.0.1......Loading colon v0.0.1........Loading colon v0.0.1...........                                                                 Loading brca v5.4..Loading brca v5.4....Loading brca v5.4......                                                                 Loading luad v3.1.1...Loading luad v3.1.1.....Loading luad v3.1.1.......Loading luad v3.1.1.........Loading luad v3.1.1...........                                                                  Loading ccrcc v0.1.1..Loading ccrcc v0.1.1....Loading ccrcc v0.1.1......Loading ccrcc v0.1.1.........



# Transform

Note: The transcriptomics data are in some sort of log or VST values, but
the proteomics data are standardized protein-wise.

In [13]:
a_std = a.copy()
a_tx_cols = [col for col in a_std.columns if col.endswith('transcriptomics')]
a_std[a_tx_cols] = StandardScaler().fit_transform(a_std[a_tx_cols])
a_std.index = 'EN' + a_std.index

b_std = b.copy()
b_std = b_std.loc[:, ~b_std.columns.duplicated(keep='first')]
b_tx_cols = [col for col in b_std.columns if col.endswith('transcriptomics')]
b_std[b_tx_cols] = StandardScaler().fit_transform(b_std[b_tx_cols])
b_std.index = 'OV' + b_std.index

c_std = c.copy()
c_tx_cols = [col for col in c_std.columns if col.endswith('transcriptomics')]
c_std[c_tx_cols] = StandardScaler().fit_transform(c_std[c_tx_cols])
c_std.index = 'CO' + c_std.index

d_std = d.copy()
d_std = d_std.loc[:, ~d_std.columns.duplicated(keep='first')]
d_tx_cols = [col for col in d_std.columns if col.endswith('transcriptomics')]
d_std[d_tx_cols] = StandardScaler().fit_transform(d_std[d_tx_cols])
d_std.index = 'BR' + d_std.index

e_std = e.copy()
e_std = e_std.loc[:, ~e_std.columns.duplicated(keep='first')]
e_tx_cols = [col for col in e_std.columns if col.endswith('transcriptomics')]
e_std[e_tx_cols] = StandardScaler().fit_transform(e_std[e_tx_cols])
e_std.index = 'LU' + e_std.index

f_std = f.copy()
f_std = f_std.loc[:, ~f_std.columns.duplicated(keep='first')]
f_tx_cols = [col for col in f_std.columns if col.endswith('transcriptomics')]
f_std[f_tx_cols] = StandardScaler().fit_transform(f_std[f_tx_cols])
f_std.index = 'CC' + f_std.index

g_std = g.copy()
g_std = g_std.loc[:, ~g_std.columns.duplicated(keep='first')]
g_tx_cols = [col for col in g_std.columns if col.endswith('transcriptomics')]
g_std[g_tx_cols] = StandardScaler().fit_transform(g_std[g_tx_cols])
g_std.index = 'CC' + g_std.index

print(True)

True


In [14]:
# Example combining 3 tumors then learn against self using an elastic net
# TODO: Can we speed this up?

z_df_3tumors =  pd.concat([a_std, b_std, c_std])
comb_3tumors = learn_cptac.LearnCPTAC(z_df_3tumors)
self_elastic_result = comb_3tumors.learn_all_proteins(tx_to_include="self",
                                                      train_method="elastic")


  0%|          | 1/11924 [00:00<3:09:40,  1.05it/s]

0: A1BG, r: 0.69, R2: 0.305, med.r: 0.69, med.R2: 0.305, med.NRMSE: 0.391


  1%|          | 101/11924 [00:48<1:38:25,  2.00it/s]

100: ACOT13, r: 0.255, R2: 0.034, med.r: 0.452, med.R2: 0.116, med.NRMSE: 0.282


  2%|▏         | 201/11924 [01:35<1:33:33,  2.09it/s]

200: ADH4, r: 0.072, R2: -0.029, med.r: 0.47, med.R2: 0.124, med.NRMSE: 0.29


  3%|▎         | 301/11924 [02:22<1:34:16,  2.05it/s]

300: AKAP9, r: 0.477, R2: 0.226, med.r: 0.469, med.R2: 0.118, med.NRMSE: 0.286


  3%|▎         | 401/11924 [03:10<1:35:56,  2.00it/s]

400: ANAPC4, r: 0.226, R2: 0.04, med.r: 0.463, med.R2: 0.114, med.NRMSE: 0.289


  4%|▍         | 501/11924 [03:58<1:31:21,  2.08it/s]

500: AP4S1, r: 0.109, R2: -0.114, med.r: 0.463, med.R2: 0.107, med.NRMSE: 0.286


  5%|▌         | 601/11924 [04:45<1:21:46,  2.31it/s]

600: ARHGAP32, r: 0.38, R2: 0.053, med.r: 0.451, med.R2: 0.098, med.NRMSE: 0.284


  6%|▌         | 701/11924 [05:33<1:35:23,  1.96it/s]

700: ARX, r: 0.227, R2: -1.684, med.r: 0.445, med.R2: 0.092, med.NRMSE: 0.282


  7%|▋         | 801/11924 [06:23<1:29:38,  2.07it/s]

800: ATP5MF, r: 0, R2: -0.098, med.r: 0.431, med.R2: 0.077, med.NRMSE: 0.284


  8%|▊         | 901/11924 [07:12<1:28:58,  2.06it/s]

900: BARD1, r: 0.666, R2: 0.313, med.r: 0.426, med.R2: 0.074, med.NRMSE: 0.285


  8%|▊         | 1001/11924 [08:00<1:32:43,  1.96it/s]

1000: BMT2, r: -0.314, R2: -1.982, med.r: 0.423, med.R2: 0.077, med.NRMSE: 0.285


  9%|▉         | 1101/11924 [08:47<1:30:21,  2.00it/s]

1100: C12orf43, r: 0.207, R2: -0.006, med.r: 0.416, med.R2: 0.074, med.NRMSE: 0.284


 10%|█         | 1201/11924 [09:34<1:10:35,  2.53it/s]

1200: C5orf15, r: 0.374, R2: -0.083, med.r: 0.413, med.R2: 0.071, med.NRMSE: 0.286


 11%|█         | 1301/11924 [10:20<1:26:43,  2.04it/s]

1300: CANX, r: 0.037, R2: -0.064, med.r: 0.415, med.R2: 0.072, med.NRMSE: 0.288


 12%|█▏        | 1401/11924 [11:09<1:24:42,  2.07it/s]

1400: CCDC167, r: 0.247, R2: 0.052, med.r: 0.42, med.R2: 0.074, med.NRMSE: 0.286


 13%|█▎        | 1501/11924 [11:56<1:27:05,  1.99it/s]

1500: CD2BP2, r: 0.31, R2: 0.071, med.r: 0.415, med.R2: 0.072, med.NRMSE: 0.288


 13%|█▎        | 1601/11924 [12:45<1:29:39,  1.92it/s]

1600: CDK5RAP1, r: -0.168, R2: -0.064, med.r: 0.416, med.R2: 0.074, med.NRMSE: 0.29


 14%|█▍        | 1701/11924 [13:33<1:28:55,  1.92it/s]

1700: CFAP298, r: 0.845, R2: 0.689, med.r: 0.417, med.R2: 0.075, med.NRMSE: 0.291


 15%|█▌        | 1801/11924 [14:22<1:25:27,  1.97it/s]

1800: CHURC1, r: 0.034, R2: -0.032, med.r: 0.416, med.R2: 0.075, med.NRMSE: 0.291


 16%|█▌        | 1901/11924 [15:10<1:26:55,  1.92it/s]

1900: CMTR1, r: 0.847, R2: 0.534, med.r: 0.417, med.R2: 0.076, med.NRMSE: 0.291


 17%|█▋        | 2001/11924 [15:53<1:28:18,  1.87it/s]

2000: COPB1, r: 0.506, R2: 0.006, med.r: 0.416, med.R2: 0.074, med.NRMSE: 0.291


 18%|█▊        | 2101/11924 [16:41<1:21:30,  2.01it/s]

2100: CRCP, r: 0, R2: -0.018, med.r: 0.413, med.R2: 0.071, med.NRMSE: 0.292


 18%|█▊        | 2201/11924 [17:27<1:21:25,  1.99it/s]

2200: CTNNAL1, r: 0.512, R2: 0.076, med.r: 0.414, med.R2: 0.071, med.NRMSE: 0.292


 19%|█▉        | 2301/11924 [18:11<1:06:07,  2.43it/s]

2300: CYP4F12, r: 0.211, R2: -0.363, med.r: 0.417, med.R2: 0.074, med.NRMSE: 0.292


 20%|██        | 2401/11924 [18:58<1:20:09,  1.98it/s]

2400: DDT, r: 0.457, R2: 0.1, med.r: 0.417, med.R2: 0.074, med.NRMSE: 0.292


 21%|██        | 2501/11924 [19:45<1:17:49,  2.02it/s]

2500: DHRS3, r: 0.537, R2: 0.256, med.r: 0.415, med.R2: 0.073, med.NRMSE: 0.291


 22%|██▏       | 2601/11924 [20:32<1:15:46,  2.05it/s]

2600: DNAJC2, r: 0.479, R2: 0.207, med.r: 0.417, med.R2: 0.074, med.NRMSE: 0.291


 23%|██▎       | 2701/11924 [21:18<1:16:49,  2.00it/s]

2700: DTNB, r: 0.274, R2: 0.062, med.r: 0.418, med.R2: 0.074, med.NRMSE: 0.29


 23%|██▎       | 2801/11924 [22:04<57:43,  2.63it/s]  

2800: EFCAB14, r: 0.518, R2: 0.229, med.r: 0.418, med.R2: 0.074, med.NRMSE: 0.289


 24%|██▍       | 2901/11924 [22:51<57:32,  2.61it/s]  

2900: ELL, r: 0.111, R2: -0.057, med.r: 0.416, med.R2: 0.073, med.NRMSE: 0.289


 25%|██▌       | 3001/11924 [23:40<1:15:04,  1.98it/s]

3000: EPM2AIP1, r: 0.688, R2: 0.434, med.r: 0.416, med.R2: 0.073, med.NRMSE: 0.289


 26%|██▌       | 3101/11924 [24:47<1:19:38,  1.85it/s]

3100: EXOG, r: 0.322, R2: 0.065, med.r: 0.417, med.R2: 0.074, med.NRMSE: 0.288


 27%|██▋       | 3201/11924 [25:35<1:13:01,  1.99it/s]

3200: FAM198B, r: 0.68, R2: 0.459, med.r: 0.416, med.R2: 0.073, med.NRMSE: 0.289


 28%|██▊       | 3301/11924 [26:22<1:16:20,  1.88it/s]

3300: FBXO22, r: 0.324, R2: 0.008, med.r: 0.416, med.R2: 0.074, med.NRMSE: 0.289


 29%|██▊       | 3401/11924 [27:10<1:11:15,  1.99it/s]

3400: FKBP1B, r: 0.005, R2: -0.079, med.r: 0.416, med.R2: 0.074, med.NRMSE: 0.289


 29%|██▉       | 3501/11924 [27:58<1:11:49,  1.95it/s]

3500: FUK, r: 0.706, R2: 0.482, med.r: 0.416, med.R2: 0.076, med.NRMSE: 0.289


 30%|███       | 3601/11924 [28:44<1:11:16,  1.95it/s]

3600: GBP3, r: 0.457, R2: 0.163, med.r: 0.417, med.R2: 0.076, med.NRMSE: 0.289


 31%|███       | 3701/11924 [29:31<1:10:14,  1.95it/s]

3700: GLG1, r: 0.074, R2: -0.024, med.r: 0.418, med.R2: 0.078, med.NRMSE: 0.289


 32%|███▏      | 3801/11924 [30:20<1:07:40,  2.00it/s]

3800: GP6, r: 0.202, R2: -0.176, med.r: 0.419, med.R2: 0.079, med.NRMSE: 0.289


 33%|███▎      | 3901/11924 [31:04<43:29,  3.07it/s]  

3900: GSTA3, r: 0.633, R2: 0.31, med.r: 0.42, med.R2: 0.081, med.NRMSE: 0.289


 34%|███▎      | 4001/11924 [31:50<1:06:57,  1.97it/s]

4000: HAUS6, r: 0.468, R2: 0.196, med.r: 0.42, med.R2: 0.079, med.NRMSE: 0.289


 34%|███▍      | 4101/11924 [32:37<1:07:14,  1.94it/s]

4100: HIST1H1T, r: -0.056, R2: -0.722, med.r: 0.418, med.R2: 0.078, med.NRMSE: 0.289


 35%|███▌      | 4201/11924 [33:26<1:09:05,  1.86it/s]

4200: HOXA10, r: 0.385, R2: 0.09, med.r: 0.417, med.R2: 0.078, med.NRMSE: 0.289


 36%|███▌      | 4301/11924 [34:12<49:14,  2.58it/s]  

4300: HYOU1, r: 0.637, R2: 0.4, med.r: 0.418, med.R2: 0.079, med.NRMSE: 0.289


 37%|███▋      | 4401/11924 [34:58<49:41,  2.52it/s]  

4400: IKZF5, r: 0.727, R2: 0.305, med.r: 0.42, med.R2: 0.081, med.NRMSE: 0.289


 38%|███▊      | 4501/11924 [35:44<53:36,  2.31it/s]  

4500: IRAK4, r: 0.262, R2: -0.232, med.r: 0.419, med.R2: 0.08, med.NRMSE: 0.289


 39%|███▊      | 4601/11924 [36:32<1:01:46,  1.98it/s]

4600: JMJD8, r: 0.034, R2: -0.351, med.r: 0.42, med.R2: 0.082, med.NRMSE: 0.289


 39%|███▉      | 4701/11924 [37:18<1:00:42,  1.98it/s]

4700: KIAA0319L, r: 0.667, R2: 0.393, med.r: 0.42, med.R2: 0.082, med.NRMSE: 0.289


 41%|████      | 4901/11924 [38:45<37:58,  3.08it/s]  

4900: KYAT1, r: 0.342, R2: 0.087, med.r: 0.42, med.R2: 0.08, med.NRMSE: 0.289


 42%|████▏     | 5001/11924 [39:32<58:51,  1.96it/s]  

5000: LGALSL, r: 0.643, R2: 0.413, med.r: 0.421, med.R2: 0.08, med.NRMSE: 0.289


 43%|████▎     | 5101/11924 [40:18<58:44,  1.94it/s]  

5100: LRP10, r: 0.268, R2: 0.053, med.r: 0.423, med.R2: 0.082, med.NRMSE: 0.29


 44%|████▎     | 5201/11924 [41:04<58:43,  1.91it/s]  

5200: LZTR1, r: -0.156, R2: -0.1, med.r: 0.423, med.R2: 0.082, med.NRMSE: 0.29


 44%|████▍     | 5301/11924 [41:51<59:28,  1.86it/s]  

5300: MAPK1IP1L, r: 0, R2: -0.121, med.r: 0.423, med.R2: 0.082, med.NRMSE: 0.289


 45%|████▌     | 5401/11924 [42:38<55:48,  1.95it/s]  

5400: ME2, r: 0.307, R2: -0.152, med.r: 0.423, med.R2: 0.082, med.NRMSE: 0.289


 46%|████▌     | 5501/11924 [43:26<54:51,  1.95it/s]  

5500: MGP, r: 0.714, R2: 0.158, med.r: 0.421, med.R2: 0.081, med.NRMSE: 0.289


 47%|████▋     | 5601/11924 [44:13<58:15,  1.81it/s]

5600: MOB2, r: 0.478, R2: 0.208, med.r: 0.421, med.R2: 0.081, med.NRMSE: 0.289


 48%|████▊     | 5701/11924 [45:01<53:21,  1.94it/s]

5700: MRPL49, r: 0.011, R2: -0.044, med.r: 0.419, med.R2: 0.079, med.NRMSE: 0.289


 49%|████▊     | 5801/11924 [45:48<53:08,  1.92it/s]

5800: MTHFD1, r: 0.734, R2: 0.436, med.r: 0.418, med.R2: 0.078, med.NRMSE: 0.289


 49%|████▉     | 5901/11924 [46:37<56:36,  1.77it/s]

5900: MYSM1, r: 0.126, R2: 0.006, med.r: 0.418, med.R2: 0.079, med.NRMSE: 0.289


 50%|█████     | 6001/11924 [47:26<52:19,  1.89it/s]  

6000: NCL, r: 0.33, R2: -0.09, med.r: 0.418, med.R2: 0.079, med.NRMSE: 0.289


 51%|█████     | 6101/11924 [48:10<51:06,  1.90it/s]

6100: NELFB, r: 0.375, R2: -0.014, med.r: 0.416, med.R2: 0.078, med.NRMSE: 0.289


 52%|█████▏    | 6201/11924 [48:57<48:34,  1.96it/s]

6200: NMT1, r: 0.392, R2: -0.041, med.r: 0.416, med.R2: 0.078, med.NRMSE: 0.289


 53%|█████▎    | 6301/11924 [49:46<53:45,  1.74it/s]

6300: NSFL1C, r: 0.631, R2: 0.349, med.r: 0.416, med.R2: 0.078, med.NRMSE: 0.289


 54%|█████▎    | 6401/11924 [50:34<47:51,  1.92it/s]

6400: OAS1, r: 0.64, R2: 0.395, med.r: 0.416, med.R2: 0.078, med.NRMSE: 0.288


 55%|█████▍    | 6501/11924 [51:22<47:03,  1.92it/s]

6500: P4HA1, r: 0.689, R2: 0.356, med.r: 0.416, med.R2: 0.078, med.NRMSE: 0.288


 55%|█████▌    | 6601/11924 [52:06<46:09,  1.92it/s]

6600: PBDC1, r: 0.461, R2: 0.212, med.r: 0.418, med.R2: 0.079, med.NRMSE: 0.288


 56%|█████▌    | 6701/11924 [52:51<50:22,  1.73it/s]

6700: PDIA2, r: 0.862, R2: 0.497, med.r: 0.417, med.R2: 0.079, med.NRMSE: 0.288


 57%|█████▋    | 6801/11924 [53:40<45:42,  1.87it/s]

6800: PGP, r: 0.532, R2: 0.19, med.r: 0.418, med.R2: 0.079, med.NRMSE: 0.288


 58%|█████▊    | 6901/11924 [54:28<41:48,  2.00it/s]

6900: PIP4K2B, r: 0.545, R2: 0.219, med.r: 0.416, med.R2: 0.079, med.NRMSE: 0.288


 59%|█████▊    | 7001/11924 [55:18<46:34,  1.76it/s]

7000: PLLP, r: 0.716, R2: 0.328, med.r: 0.419, med.R2: 0.081, med.NRMSE: 0.288


 60%|█████▉    | 7101/11924 [56:06<43:35,  1.84it/s]

7100: POLR2F, r: 0, R2: -0.228, med.r: 0.419, med.R2: 0.082, med.NRMSE: 0.287


 60%|██████    | 7201/11924 [56:55<43:22,  1.81it/s]

7200: PPP1R16A, r: -0.058, R2: -0.112, med.r: 0.418, med.R2: 0.081, med.NRMSE: 0.287


 61%|██████    | 7301/11924 [57:43<49:17,  1.56it/s]

7300: PRKAR2A, r: 0.4, R2: 0.1, med.r: 0.417, med.R2: 0.079, med.NRMSE: 0.287


 62%|██████▏   | 7401/11924 [58:29<40:34,  1.86it/s]

7400: PSMA4, r: 0.319, R2: 0.001, med.r: 0.417, med.R2: 0.079, med.NRMSE: 0.287


 63%|██████▎   | 7501/11924 [59:19<39:40,  1.86it/s]

7500: PTPRE, r: 0.511, R2: 0.184, med.r: 0.416, med.R2: 0.078, med.NRMSE: 0.287


 64%|██████▎   | 7601/11924 [1:00:08<39:46,  1.81it/s]

7600: RAB29, r: 0.58, R2: 0.296, med.r: 0.416, med.R2: 0.078, med.NRMSE: 0.287


 65%|██████▍   | 7701/11924 [1:00:55<37:53,  1.86it/s]

7700: RAP1A, r: -0.04, R2: -0.326, med.r: 0.416, med.R2: 0.078, med.NRMSE: 0.286


 65%|██████▌   | 7801/11924 [1:01:43<30:42,  2.24it/s]

7800: RBP5, r: 0.725, R2: 0.375, med.r: 0.415, med.R2: 0.077, med.NRMSE: 0.286


 66%|██████▋   | 7901/11924 [1:02:31<34:18,  1.95it/s]

7900: RHBDF1, r: 0.682, R2: 0.281, med.r: 0.416, med.R2: 0.077, med.NRMSE: 0.286


 67%|██████▋   | 8001/11924 [1:03:14<34:48,  1.88it/s]

8000: RNF220, r: 0.136, R2: -0.088, med.r: 0.415, med.R2: 0.077, med.NRMSE: 0.286


 68%|██████▊   | 8101/11924 [1:04:03<34:38,  1.84it/s]

8100: RPP25, r: 0.58, R2: 0.226, med.r: 0.412, med.R2: 0.075, med.NRMSE: 0.286


 69%|██████▉   | 8201/11924 [1:04:50<31:54,  1.94it/s]

8200: RTF2, r: 0.433, R2: 0.173, med.r: 0.41, med.R2: 0.073, med.NRMSE: 0.286


 70%|██████▉   | 8301/11924 [1:05:38<34:21,  1.76it/s]

8300: SBDS, r: 0.468, R2: 0.051, med.r: 0.411, med.R2: 0.074, med.NRMSE: 0.286


 71%|███████▏  | 8501/11924 [1:07:13<30:37,  1.86it/s]

8500: SETD1B, r: -0.026, R2: -0.18, med.r: 0.411, med.R2: 0.073, med.NRMSE: 0.286


 72%|███████▏  | 8601/11924 [1:08:00<29:46,  1.86it/s]

8600: SHROOM1, r: 0.065, R2: -0.081, med.r: 0.411, med.R2: 0.074, med.NRMSE: 0.286


 73%|███████▎  | 8701/11924 [1:08:48<29:36,  1.81it/s]

8700: SLC25A44, r: 0.23, R2: -0.66, med.r: 0.411, med.R2: 0.073, med.NRMSE: 0.286


 74%|███████▍  | 8801/11924 [1:09:36<34:51,  1.49it/s]

8800: SMARCD3, r: 0.532, R2: 0.246, med.r: 0.411, med.R2: 0.074, med.NRMSE: 0.286


 75%|███████▍  | 8901/11924 [1:10:24<28:04,  1.79it/s]

8900: SNX13, r: 0.414, R2: 0.102, med.r: 0.409, med.R2: 0.072, med.NRMSE: 0.286


 76%|███████▋  | 9101/11924 [1:11:59<25:28,  1.85it/s]

9100: SSBP3, r: 0.602, R2: 0.36, med.r: 0.409, med.R2: 0.072, med.NRMSE: 0.286


 77%|███████▋  | 9201/11924 [1:12:45<20:59,  2.16it/s]

9200: STRN4, r: 0.063, R2: -0.081, med.r: 0.409, med.R2: 0.073, med.NRMSE: 0.286


 78%|███████▊  | 9301/11924 [1:13:31<23:57,  1.82it/s]

9300: SYT2, r: 0.12, R2: -0.414, med.r: 0.409, med.R2: 0.073, med.NRMSE: 0.286


 79%|███████▉  | 9401/11924 [1:14:19<23:05,  1.82it/s]

9400: TBCD, r: 0.305, R2: -0.121, med.r: 0.409, med.R2: 0.072, med.NRMSE: 0.286


 80%|███████▉  | 9501/11924 [1:15:06<20:57,  1.93it/s]

9500: TFAP2A, r: 0.486, R2: 0.202, med.r: 0.409, med.R2: 0.072, med.NRMSE: 0.286


 81%|████████  | 9601/11924 [1:15:54<21:16,  1.82it/s]

9600: TJAP1, r: -0.185, R2: -0.124, med.r: 0.41, med.R2: 0.073, med.NRMSE: 0.286


 81%|████████▏ | 9701/11924 [1:16:41<20:05,  1.84it/s]

9700: TMEM219, r: 0.335, R2: -0.084, med.r: 0.409, med.R2: 0.073, med.NRMSE: 0.286


 82%|████████▏ | 9801/11924 [1:17:29<19:31,  1.81it/s]

9800: TNS3, r: 0.518, R2: 0.258, med.r: 0.409, med.R2: 0.073, med.NRMSE: 0.287


 83%|████████▎ | 9901/11924 [1:18:33<18:27,  1.83it/s]  

9900: TRAPPC4, r: 0.121, R2: -0.036, med.r: 0.409, med.R2: 0.073, med.NRMSE: 0.287


 84%|████████▍ | 10001/11924 [1:19:20<14:33,  2.20it/s]

10000: TSKU, r: 0.594, R2: 0.143, med.r: 0.409, med.R2: 0.073, med.NRMSE: 0.286


 85%|████████▍ | 10101/11924 [1:20:04<17:13,  1.76it/s]

10100: TUT1, r: 0.159, R2: -0.074, med.r: 0.409, med.R2: 0.073, med.NRMSE: 0.286


 86%|████████▌ | 10201/11924 [1:20:52<15:32,  1.85it/s]

10200: UBE4B, r: 0.476, R2: -0.029, med.r: 0.409, med.R2: 0.073, med.NRMSE: 0.286


 86%|████████▋ | 10301/11924 [1:21:39<15:02,  1.80it/s]

10300: USB1, r: 0.03, R2: -0.098, med.r: 0.408, med.R2: 0.073, med.NRMSE: 0.286


 87%|████████▋ | 10401/11924 [1:22:28<14:31,  1.75it/s]

10400: VIM, r: 0.819, R2: 0.559, med.r: 0.408, med.R2: 0.073, med.NRMSE: 0.285


 88%|████████▊ | 10501/11924 [1:23:16<14:07,  1.68it/s]

10500: WDR36, r: 0.007, R2: -0.158, med.r: 0.407, med.R2: 0.073, med.NRMSE: 0.285


 89%|████████▉ | 10601/11924 [1:24:03<12:31,  1.76it/s]

10600: XRCC6, r: 0.186, R2: -0.191, med.r: 0.407, med.R2: 0.073, med.NRMSE: 0.285


 90%|████████▉ | 10701/11924 [1:24:52<11:13,  1.82it/s]

10700: ZCCHC10, r: 0, R2: -0.0, med.r: 0.405, med.R2: 0.071, med.NRMSE: 0.285


 91%|█████████▏| 10901/11924 [1:26:19<09:05,  1.87it/s]

10900: ZNF618, r: 0.279, R2: 0.026, med.r: 0.403, med.R2: 0.07, med.NRMSE: 0.285


 92%|█████████▏| 11001/11924 [1:27:04<08:21,  1.84it/s]

11000: ADAMTS4, r: 0.626, R2: -0.264, med.r: 0.402, med.R2: 0.068, med.NRMSE: 0.285


 93%|█████████▎| 11101/11924 [1:27:40<03:56,  3.49it/s]

11100: CETP, r: -0.353, R2: -0.801, med.r: 0.4, med.R2: 0.067, med.NRMSE: 0.286


 94%|█████████▍| 11201/11924 [1:28:20<06:08,  1.96it/s]

11200: FASTKD3, r: 0.765, R2: -0.05, med.r: 0.399, med.R2: 0.066, med.NRMSE: 0.286


 95%|█████████▍| 11301/11924 [1:28:53<04:17,  2.42it/s]

11300: LMF1, r: 0.072, R2: -0.375, med.r: 0.398, med.R2: 0.064, med.NRMSE: 0.287


 96%|█████████▌| 11401/11924 [1:29:31<04:22,  2.00it/s]

11400: P2RX7, r: 0, R2: -0.119, med.r: 0.397, med.R2: 0.063, med.NRMSE: 0.287


 96%|█████████▋| 11501/11924 [1:30:00<03:04,  2.30it/s]

11500: SLC23A2, r: 0.27, R2: 0.055, med.r: 0.397, med.R2: 0.062, med.NRMSE: 0.288


 98%|█████████▊| 11701/11924 [1:31:16<00:55,  4.03it/s]

11700: AQP8, r: 0.257, R2: -3.732, med.r: 0.394, med.R2: 0.06, med.NRMSE: 0.289


100%|█████████▉| 11901/11924 [1:32:03<00:08,  2.86it/s]

11900: UGT1A10, r: 0.857, R2: -1.178, med.r: 0.395, med.R2: 0.059, med.NRMSE: 0.289


100%|██████████| 11924/11924 [1:32:07<00:00,  2.16it/s]
