In [52]:
from itertools import product
import polars as pl

In [53]:
tissues = ["BAT", "BLOOD", "LIVER", "HEART", "WAT-SC", "SKM-GN", "HYPOTH"]
assays = ["ATAC", "TRNSCRPT", "METHYL"]

products = list(product(tissues, assays))

In [11]:
annotations = {
    "ATAC": pl.read_csv(
        "data/ATAC_feature_annotation.csv", schema_overrides={"chrom": pl.String}
    ).select(pl.col("feature_ID", "ensembl_gene")),
    "TRNSCRPT": pl.read_csv("data/TRNSCRPT_feature_annotation.csv").select(
        pl.col("feature_ID", "gene_id")
    ),
    "METHYL": pl.read_csv(
        "data/METHYL_feature_annotation.csv", schema_overrides={"EntrezID": pl.String}
    ).select(pl.col("feature_ID", "EntrezID")),
}

In [61]:
genomic_data = pl.read_csv("data/TRAINING_REGULATED_NORM_DATA.csv", null_values=["NA"]).filter(
    pl.col("assay").is_in(assays)
).drop("dataset")
feature_to_gene = pl.read_csv("data/FEATURE_TO_GENE.csv", null_values=["NA"])

In [62]:
data: pl.DataFrame = genomic_data.join(
    feature_to_gene.select(pl.col("feature_ID", "ensembl_gene")),
    on="feature_ID",
    how="left",
).drop(pl.selectors.by_index(0))

In [69]:
data.drop("tissue", "assay", "feature_ID").group_by("feature", "ensembl_gene").mean().sort("ensembl_gene")

feature,ensembl_gene,10027327,10043527,10502300,10411690,10185585,10046461,10024735,10871349,10700102,10561072,10337199,10023259,10046380,10025707,10734945,10641858,10045228,10953744,10106383,10971645,10025626,10044922,10027599,10950486,10486003,10306471,10044841,10026193,10901434,10026355,10729518,10095241,10044337,10589325,10044760,10381414,10503110,10046119,10220917,10680837,10027165,10335064,10587543,10934529,10044256,10672656,10043950,10026517,10314733,10204989,10045309,10422463,10833331,10761160,10025979,10026789,10139044,10417702,10424911,10027408,10046542,10025464,10026274,10043799,10046704,10059369,10152148,10315624,10027912,10677887,10619259
str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""TRNSCRPT;ADRNL;ENSRNOG00000000…","""ENSRNOG00000000008""",-0.7387,-0.94139,-0.59007,-2.14966,-1.43455,-0.76078,-0.63198,-0.62297,-1.79689,-1.08953,-0.02867,-0.58012,1.2856,-1.14739,-0.66794,-0.94411,-1.2895,2.72672,-1.40085,0.263,-0.48365,,-0.20343,-1.67123,-0.77388,-1.20951,,-0.80985,-0.92411,-0.43588,-1.63783,-1.02324,0.764,,-0.76695,-1.50401,-0.18138,1.08491,-3.51724,,,-0.94709,-0.76845,,-2.44742,-1.35228,-1.05139,,,,-1.5542,-0.85559,-1.7128,-3.51724,,,,,,,,,,,,,,,,,
"""TRNSCRPT;ADRNL;ENSRNOG00000000…","""ENSRNOG00000000012""",-0.96029,-1.35582,0.02768,-0.86926,-0.61576,-1.56168,1.69249,-0.74119,-0.30582,1.73951,1.10789,-2.29582,3.03771,-2.00485,-2.91582,-1.94174,-2.90721,4.1858,0.06307,1.43275,-1.31742,,-0.87082,-0.24669,-0.36551,-0.27441,,0.28046,-0.92411,-1.27477,-0.63691,-2.16988,2.02577,,-0.76695,-0.47941,3.52712,1.74053,-2.37123,,,-0.37653,0.57194,,-2.44742,-1.71444,-0.93783,,,,-1.78647,2.4819,-0.06152,-1.06518,,,,,,,,,,,,,,,,,
"""TRNSCRPT;SKM-VL;ENSRNOG0000000…","""ENSRNOG00000000012""",3.33676,2.72366,3.34911,3.22432,3.07093,2.56057,3.01327,2.65165,3.39886,2.80459,3.19826,1.91861,2.85838,3.07432,2.81308,2.84335,3.41182,2.69129,3.17792,3.05436,2.83334,,2.79843,3.00107,2.74596,3.75096,,2.77907,2.97214,2.41595,2.97353,3.129,2.40007,,3.33575,3.20343,2.51432,3.00123,2.40892,,,3.00997,2.32237,,2.95545,2.87391,3.10745,,,,2.77594,2.63467,2.89851,2.84704,,,,,,,,,,,,,,,,,
"""TRNSCRPT;BLOOD;ENSRNOG00000000…","""ENSRNOG00000000033""",10.32644,10.51219,10.61489,10.6903,10.24134,11.42502,10.7625,10.73769,11.28948,10.79336,10.68634,10.62481,10.59316,10.71052,10.79884,10.7835,10.78103,11.80194,10.64148,10.80058,11.88018,,9.96763,10.93355,10.98993,11.07955,,11.02782,11.068,10.35532,10.83963,10.73762,11.3904,,11.27262,10.49174,10.7671,10.66741,10.45913,,,10.7103,11.06256,,10.93289,11.03946,10.82259,,,,11.0404,10.66166,11.17609,11.30269,,,,,,,,,,,,,,,,,
"""TRNSCRPT;SPLEEN;ENSRNOG0000000…","""ENSRNOG00000000034""",4.92402,5.25481,4.91776,4.95928,4.98145,4.93014,5.14171,4.76962,4.94224,4.89234,4.828,5.01863,4.96145,5.00147,5.05976,4.82992,4.87617,4.964,4.88907,4.75455,5.11117,,4.77346,4.85299,4.90152,4.83097,,5.14089,4.89248,4.83576,4.93872,4.94595,5.06155,,4.67974,4.87997,4.89266,4.89037,5.09387,,,4.91719,4.7266,,5.08951,4.83033,4.83019,,,,4.83834,4.84122,4.91153,4.90224,,,,,,,,,,,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TRNSCRPT;CORTEX;ENSRNOG0000006…","""ENSRNOG00000062298""",7.80153,7.86228,7.95315,7.98049,7.90078,7.95287,7.84731,7.78866,7.73203,7.85542,7.82086,7.8974,7.89062,7.76701,7.74935,7.78271,7.95547,7.80914,7.80084,7.76056,7.90315,,7.91171,7.72874,7.83716,7.83823,,7.83111,7.83367,7.93508,7.86892,7.78294,7.96183,,7.95439,7.92551,7.8466,7.88103,7.86496,,,7.77309,7.74828,,8.05267,7.86358,7.94966,,,,7.87404,7.93179,7.86661,7.93896,,,,,,,,,,,,,,,,,
"""TRNSCRPT;BLOOD;ENSRNOG00000062…","""ENSRNOG00000062298""",9.73794,9.72359,9.72547,9.6957,9.67605,10.14496,10.22612,9.71573,9.85822,9.66096,9.54961,9.62056,9.55499,10.09507,9.55427,9.86695,9.81229,9.9568,10.26058,9.87158,10.17272,,9.57886,9.72736,9.97563,9.85875,,10.28257,9.61318,9.65669,9.46311,9.80264,9.8856,,9.93612,9.49881,9.8243,9.69264,9.74389,,,9.65447,9.61257,,9.76,9.57007,9.75,,,,9.72034,9.73638,9.70099,9.82906,,,,,,,,,,,,,,,,,
"""TRNSCRPT;HEART;ENSRNOG00000062…","""ENSRNOG00000062298""",7.93414,7.90423,7.8066,7.87698,7.84096,7.83103,7.90266,7.8111,7.90872,7.84575,7.93611,7.92842,7.91631,7.83973,7.82637,7.82781,7.84139,7.78762,7.82569,7.83604,7.84247,,7.90824,7.88587,7.85107,7.9139,,7.84159,7.8302,7.90798,7.8593,7.79202,7.90021,,7.75029,7.89467,7.80775,7.8895,7.82621,,,7.86794,7.87521,,7.73686,7.79369,7.84159,,,,7.85342,7.84073,7.87179,7.82162,,,,,,,,,,,,,,,,,
"""TRNSCRPT;KIDNEY;ENSRNOG0000006…","""ENSRNOG00000062298""",8.25875,8.40485,8.22936,8.37623,8.2416,8.53105,8.25432,8.44644,8.46474,8.21529,8.39677,8.24983,8.38762,8.36441,8.5007,8.30406,8.47471,8.45478,8.38536,8.32947,8.34141,,8.17706,8.54627,8.32635,8.40126,,8.36505,8.41879,8.148,8.50455,8.27352,8.40506,,8.52065,8.47443,8.35024,8.36505,8.19715,,,8.42142,8.2137,,8.41388,8.2989,8.36806,,,,8.56321,8.25514,8.4836,8.43781,,,,,,,,,,,,,,,,,
