In [2]:
import pandas as pd
import polars as pl


In [None]:
#cat Homo_sapiens.GRCh38.114.gtf | awk '$3=="gene"' | tr ';' '\t' | tr ' ' '\t' | sed 's/"//g' | cut -f1,4,5,7,10 | awk '{print $1"\t"$2"\t"$3"\t"$5"\t0\t"$4}' > Homo_sapiens.GRCh38.114.bed

In [56]:
# Import BED file from GTF file
df = pd.read_csv("Homo_sapiens.GRCh38.114.bed", sep="\t", header=None,
                 names=["chr", "start", "end", "gene_id", "score", "strand"])

In [57]:
# Create promotor region +- 500 bp from TSS
def get_promoter(row):
    if row["strand"] == "+":
        tss = row["start"]
    else:
        tss = row["end"]
    start = max(tss - 500, 0)  # unikamy ujemnych wartości
    end = tss + 500
    return pd.Series([row["chr"], start, end, row["gene_id"], row["strand"]])

promoters = df.apply(get_promoter, axis=1)
promoters.columns = ["chr", "prom_start", "prom_end", "gene_id", "strand"]

In [58]:
# Save BED file
promoters[["chr", "prom_start", "prom_end", "gene_id"]].to_csv("promoters_500bp.bed",
                                                               sep="\t", header=False, index=False)

In [6]:
# 1. Import cellid from Seurat stst file
whitelist = pd.read_csv("k562.dis.stat.txt", sep="\t", usecols=["cellid"])
cellids = set(whitelist["cellid"])

In [None]:
# Creating ATAC BED file: cat k562.allele.flt.M.fragment.unsorted.tsv | sed 's/chr//' | cut -f1-4 > k562.allele.flt.M.fragment.unsorted.bed

In [7]:
# 2. Import ATAC BED file

atac_cols = ["chr", "start", "end", "barcode", "count"]
atac = pd.read_csv("k562.allele.flt.M.fragment.unsorted.bed", sep="\t", names=atac_cols, header=None, dtype={"chr": str} )

In [8]:
# 3. Filter ATAC by cellid
atac_filtered = atac[atac["barcode"].isin(cellids)]

In [9]:
# 4. Save filtered file
atac_filtered.to_csv("k562.atac.filtered.bed", sep="\t", index=False, header=False)

In [None]:
# make TSV file showiong localisation od ATAC signal on chromosomes and in gene promotores
#bedtools intersect -a k562.atac.filtered.bed -b promoters_500bp.bed -wa -wb > atac_mapped_to_genes.tsv

In [10]:
# Read the ATAC localisation file
cols = ["chr_a", "start_a", "end_a", "barcode", "count",
        "chr_b", "prom_start", "prom_end", "gene_id"]
mapped = pd.read_csv("atac_mapped_to_genes.tsv", sep="\t", names=cols, header=None, dtype={"chr_a": str, "chr_b": str})

In [12]:
# Count number of ATAC peaks for every gene in every cell
atac_counts = mapped.groupby(["barcode", "gene_id"]).size().unstack(fill_value=0)

In [13]:
# NORMALIZACJA
atac_norm = atac_counts.div(atac_counts.sum(axis=1), axis=0) * 1e4

In [14]:
# SAve matrix cells x genes
atac_norm.to_csv("atac_norm_matrix.tsv", sep="\t")

In [3]:
atac_norm = pl.read_csv("atac_norm_matrix.tsv", separator="\t")

In [4]:
atac_norm

barcode,ENSG00000000460,ENSG00000000971,ENSG00000001460,ENSG00000001561,ENSG00000002330,ENSG00000002726,ENSG00000002745,ENSG00000002822,ENSG00000002834,ENSG00000002919,ENSG00000003509,ENSG00000003987,ENSG00000004139,ENSG00000004455,ENSG00000004487,ENSG00000004766,ENSG00000004809,ENSG00000005102,ENSG00000005108,ENSG00000005175,ENSG00000005206,ENSG00000005249,ENSG00000005421,ENSG00000005448,ENSG00000005486,ENSG00000005700,ENSG00000006025,ENSG00000006059,ENSG00000006071,ENSG00000006116,ENSG00000006125,ENSG00000006459,ENSG00000006606,ENSG00000006740,ENSG00000006747,ENSG00000007062,…,ENSG00000310399,ENSG00000310402,ENSG00000310409,ENSG00000310411,ENSG00000310414,ENSG00000310415,ENSG00000310419,ENSG00000310420,ENSG00000310422,ENSG00000310425,ENSG00000310427,ENSG00000310435,ENSG00000310436,ENSG00000310437,ENSG00000310438,ENSG00000310441,ENSG00000310457,ENSG00000310458,ENSG00000310471,ENSG00000310476,ENSG00000310477,ENSG00000310483,ENSG00000310486,ENSG00000310489,ENSG00000310493,ENSG00000310500,ENSG00000310501,ENSG00000310504,ENSG00000310507,ENSG00000310511,ENSG00000310515,ENSG00000310516,ENSG00000310525,ENSG00000310529,ENSG00000310564,ENSG00000310566,ENSG00000310567
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""SCG0074_AAACAGCCAAACATAG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""SCG0074_AAACAGCCACCGTTCC-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""SCG0074_AAACAGCCATCGCTTT-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""SCG0074_AAACAGCCATTGTGAT-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""SCG0074_AAACATGCAACATAAG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""SCG0080_TTTGTGTTCTACCTCA-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""SCG0080_TTTGTGTTCTCAATAG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""SCG0080_TTTGTTGGTTAGGTGC-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""SCG0080_TTTGTTGGTTAGTACG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# Save to Parquet 
atac_norm.write_parquet("atac_norm_matrix.parquet")

In [None]:
# Import RNAseq expression and combine RNA with ATAC

In [6]:
# Import RNAseq expression 
df_expr_norm = pl.read_csv("k562.allele.flt.M_df_expr_norm.tsv", separator="\t", has_header=True)
df_expr_norm = df_expr_norm.rename({df_expr_norm.columns[0]: "barcode"})

In [7]:
df_expr_norm

barcode,ENSG00000000457,ENSG00000000460,ENSG00000000971,ENSG00000001036,ENSG00000001084,ENSG00000001167,ENSG00000001460,ENSG00000001461,ENSG00000001630,ENSG00000002330,ENSG00000002746,ENSG00000002822,ENSG00000002834,ENSG00000002919,ENSG00000003147,ENSG00000003249,ENSG00000003436,ENSG00000003509,ENSG00000003987,ENSG00000004139,ENSG00000004142,ENSG00000004455,ENSG00000004468,ENSG00000004487,ENSG00000004766,ENSG00000004779,ENSG00000004864,ENSG00000004866,ENSG00000004897,ENSG00000004939,ENSG00000004948,ENSG00000005001,ENSG00000005007,ENSG00000005020,ENSG00000005059,ENSG00000005102,…,ENSG00000287820,ENSG00000287832,ENSG00000287836,ENSG00000287839,ENSG00000287845,ENSG00000287860,ENSG00000287865,ENSG00000287877,ENSG00000287883,ENSG00000287896,ENSG00000287907,ENSG00000287919,ENSG00000287920,ENSG00000287935,ENSG00000287937,ENSG00000287938,ENSG00000287943,ENSG00000287972,ENSG00000287973,ENSG00000287976,ENSG00000288002,ENSG00000288005,ENSG00000288009,ENSG00000288061,ENSG00000288066,ENSG00000288067,ENSG00000288071,ENSG00000288075,ENSG00000288091,ENSG00000288093,ENSG00000288156,ENSG00000288253,ENSG00000288321,ENSG00000288380,ENSG00000288398,Phase,metacell
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str
"""SCG0074_AAACAGCCAAACATAG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""G1""","""G1_3"""
"""SCG0074_AAACAGCCACCGTTCC-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""S""","""S_34"""
"""SCG0074_AAACAGCCATCGCTTT-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""G2M""","""G2M_35"""
"""SCG0074_AAACAGCCATTGTGAT-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""S""","""S_15"""
"""SCG0074_AAACATGCAACATAAG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""S""","""S_36"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""SCG0080_TTTGTGTTCTACCTCA-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""G1""","""G1_7"""
"""SCG0080_TTTGTGTTCTCAATAG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""S""","""S_22"""
"""SCG0080_TTTGTTGGTTAGGTGC-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""S""","""S_32"""
"""SCG0080_TTTGTTGGTTAGTACG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""G2M""","""G2M_28"""


In [8]:
# extracting metadeta from table
df_metadata = df_expr_norm.select([
    pl.col("barcode"),
    pl.col("Phase"),
    pl.col("metacell"),
    pl.col("metacell").str.extract(r"_(\d+)", 1).cast(pl.Int32).alias("Pseudotime")
])


In [9]:
df_metadata

barcode,Phase,metacell,Pseudotime
str,str,str,i32
"""SCG0074_AAACAGCCAAACATAG-1""","""G1""","""G1_3""",3
"""SCG0074_AAACAGCCACCGTTCC-1""","""S""","""S_34""",34
"""SCG0074_AAACAGCCATCGCTTT-1""","""G2M""","""G2M_35""",35
"""SCG0074_AAACAGCCATTGTGAT-1""","""S""","""S_15""",15
"""SCG0074_AAACATGCAACATAAG-1""","""S""","""S_36""",36
…,…,…,…
"""SCG0080_TTTGTGTTCTACCTCA-1""","""G1""","""G1_7""",7
"""SCG0080_TTTGTGTTCTCAATAG-1""","""S""","""S_22""",22
"""SCG0080_TTTGTTGGTTAGGTGC-1""","""S""","""S_32""",32
"""SCG0080_TTTGTTGGTTAGTACG-1""","""G2M""","""G2M_28""",28


In [11]:
df_merged = atac_norm.join(
    df_metadata.select(["barcode", "Phase", "metacell", "Pseudotime"]),
    on="barcode",
    how="inner"
)


In [12]:
df_merged

barcode,ENSG00000000460,ENSG00000000971,ENSG00000001460,ENSG00000001561,ENSG00000002330,ENSG00000002726,ENSG00000002745,ENSG00000002822,ENSG00000002834,ENSG00000002919,ENSG00000003509,ENSG00000003987,ENSG00000004139,ENSG00000004455,ENSG00000004487,ENSG00000004766,ENSG00000004809,ENSG00000005102,ENSG00000005108,ENSG00000005175,ENSG00000005206,ENSG00000005249,ENSG00000005421,ENSG00000005448,ENSG00000005486,ENSG00000005700,ENSG00000006025,ENSG00000006059,ENSG00000006071,ENSG00000006116,ENSG00000006125,ENSG00000006459,ENSG00000006606,ENSG00000006740,ENSG00000006747,ENSG00000007062,…,ENSG00000310411,ENSG00000310414,ENSG00000310415,ENSG00000310419,ENSG00000310420,ENSG00000310422,ENSG00000310425,ENSG00000310427,ENSG00000310435,ENSG00000310436,ENSG00000310437,ENSG00000310438,ENSG00000310441,ENSG00000310457,ENSG00000310458,ENSG00000310471,ENSG00000310476,ENSG00000310477,ENSG00000310483,ENSG00000310486,ENSG00000310489,ENSG00000310493,ENSG00000310500,ENSG00000310501,ENSG00000310504,ENSG00000310507,ENSG00000310511,ENSG00000310515,ENSG00000310516,ENSG00000310525,ENSG00000310529,ENSG00000310564,ENSG00000310566,ENSG00000310567,Phase,metacell,Pseudotime
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str,str,i32
"""SCG0074_AAACAGCCAAACATAG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""G1""","""G1_3""",3
"""SCG0074_AAACAGCCACCGTTCC-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""S""","""S_34""",34
"""SCG0074_AAACAGCCATCGCTTT-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""G2M""","""G2M_35""",35
"""SCG0074_AAACAGCCATTGTGAT-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""S""","""S_15""",15
"""SCG0074_AAACATGCAACATAAG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""S""","""S_36""",36
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""SCG0080_TTTGTGTTCTACCTCA-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""G1""","""G1_7""",7
"""SCG0080_TTTGTGTTCTCAATAG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""S""","""S_22""",22
"""SCG0080_TTTGTTGGTTAGGTGC-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""S""","""S_32""",32
"""SCG0080_TTTGTTGGTTAGTACG-1""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"""G2M""","""G2M_28""",28


In [23]:
# Group by "metacell" and calculate the mean of all other columns
atac_by_metacell = (
    df_merged
    .group_by("metacell")
    .agg(
        pl.all().exclude(["barcode", "Phase", "Pseudotime", "metacell"]).mean()
    )
)


In [24]:
atac_by_metacell

metacell,ENSG00000000460,ENSG00000000971,ENSG00000001460,ENSG00000001561,ENSG00000002330,ENSG00000002726,ENSG00000002745,ENSG00000002822,ENSG00000002834,ENSG00000002919,ENSG00000003509,ENSG00000003987,ENSG00000004139,ENSG00000004455,ENSG00000004487,ENSG00000004766,ENSG00000004809,ENSG00000005102,ENSG00000005108,ENSG00000005175,ENSG00000005206,ENSG00000005249,ENSG00000005421,ENSG00000005448,ENSG00000005486,ENSG00000005700,ENSG00000006025,ENSG00000006059,ENSG00000006071,ENSG00000006116,ENSG00000006125,ENSG00000006459,ENSG00000006606,ENSG00000006740,ENSG00000006747,ENSG00000007062,…,ENSG00000310399,ENSG00000310402,ENSG00000310409,ENSG00000310411,ENSG00000310414,ENSG00000310415,ENSG00000310419,ENSG00000310420,ENSG00000310422,ENSG00000310425,ENSG00000310427,ENSG00000310435,ENSG00000310436,ENSG00000310437,ENSG00000310438,ENSG00000310441,ENSG00000310457,ENSG00000310458,ENSG00000310471,ENSG00000310476,ENSG00000310477,ENSG00000310483,ENSG00000310486,ENSG00000310489,ENSG00000310493,ENSG00000310500,ENSG00000310501,ENSG00000310504,ENSG00000310507,ENSG00000310511,ENSG00000310515,ENSG00000310516,ENSG00000310525,ENSG00000310529,ENSG00000310564,ENSG00000310566,ENSG00000310567
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""G2M_4""",0.0,0.0,0.726111,0.0,0.0,0.0,0.0,0.0,1.395015,1.880927,0.0,0.0,0.0,0.0,0.84076,1.521375,0.0,0.679763,0.0,0.0,1.064963,1.645851,0.0,0.726111,3.279378,1.362685,1.064963,0.0,0.0,0.0,1.37899,0.0,0.0,0.0,0.570516,3.331568,…,0.998403,0.0,0.0,0.0,0.0,2.438204,0.287828,0.0,0.0,0.0,0.0,0.0,2.785287,0.0,1.364318,0.0,0.0,0.0,0.0,0.0,0.0,1.331203,0.0,2.110444,0.819202,1.030609,0.0,0.0,2.282063,0.0,0.0,0.0,1.996805,0.0,1.558482,3.991585,0.0
"""G2M_28""",1.619171,0.0,0.0,0.386668,0.0,0.0,0.0,1.559603,1.233654,2.374784,0.0,0.0,1.12638,0.0,1.103372,0.835701,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.590674,2.081791,3.330866,0.0,0.0,0.0,1.77204,0.0,0.0,0.0,0.498206,0.0,…,0.0,0.0,0.0,0.551207,0.0,0.815327,1.992252,0.0,0.0,0.37546,0.411218,0.0,0.0,1.363512,1.239981,0.0,0.809585,0.0,0.761963,0.0,0.0,0.925241,0.0,0.681756,0.0,0.893336,0.0,5.590945,1.978987,0.863558,0.0,2.158895,0.0,0.0,0.0,3.097635,0.0
"""G1_27""",2.828054,0.0,0.0,0.0,3.480682,0.0,0.0,2.262443,4.211475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.508296,0.0,1.103631,0.0,1.028383,0.0,2.619868,0.76693,1.160227,0.0,0.0,4.691703,3.770739,0.0,0.0,0.0,0.0,…,2.513826,0.0,0.0,0.0,0.0,0.0,3.878474,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.967342,0.0,0.0,1.560306,0.0,0.0,6.464124,5.923729,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.47323,0.0
"""G2M_19""",0.915751,0.0,0.981162,0.0,0.947329,0.0,0.0,0.0,2.930955,0.0,0.0,0.0,0.450369,0.0,5.037753,0.538677,0.0,0.0,0.0,0.0,0.538677,0.610501,6.729382,0.0,1.017501,0.981162,0.886211,1.017501,0.0,0.0,1.308216,0.0,0.0,0.0,2.087932,1.528578,…,0.0,0.0,0.0,0.4995,0.0,0.947329,2.407315,0.0,0.0,0.0,0.0,0.597229,6.033183,0.0,0.0,0.0,1.098901,0.0,0.0,1.056636,0.0,0.0,0.0,1.194458,0.670062,0.981162,0.0,2.655037,0.0,0.528318,0.0,0.0,6.024677,0.0,0.0,4.701073,0.0
"""S_10""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.771426,0.486334,3.089379,0.0,0.0,0.0,0.0,0.520264,0.0,0.0,0.0,0.0,0.0,0.0,3.173201,0.0,0.0,0.828569,2.457461,0.721657,0.0,0.0,0.0,0.93214,0.828569,0.828569,0.0,0.324223,0.520264,…,0.0,0.0,0.0,0.0,0.0,0.699105,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.485707,2.689115,0.497141,0.0,0.860437,1.402194,2.485707,0.67792,0.0,10.918303,0.860437,2.237136,0.0,1.190633,0.0,0.972668,2.60233,0.0,0.0,0.0,0.324223,5.698926,1.48655
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""G2M_32""",0.0,1.047669,1.235712,0.0,0.0,0.0,0.688468,0.669344,0.0,0.0,0.777303,0.0,0.0,0.730194,0.0,0.708717,0.0,0.0,0.0,0.0,0.0,3.228667,0.0,0.0,0.617856,1.417434,1.57774,0.0,0.0,0.0,0.634115,3.466702,0.0,0.0,1.338688,0.0,…,0.0,0.0,0.0,0.0,0.491763,0.0,2.51004,0.0,0.0,0.0,0.0,0.560381,0.388651,0.0,0.708717,0.0,0.0,0.0,0.0,0.0,0.0,1.09529,0.83091,1.660136,0.376506,0.0,0.803213,9.942333,0.803213,0.803213,1.807229,0.0,0.0,0.0,0.926784,2.17177,0.0
"""G1_24""",0.0,0.0,0.0,1.182732,1.02145,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.898876,0.0,0.0,0.0,1.248439,1.02145,1.955348,0.0,0.0,2.425945,6.36746,1.070091,0.0,0.0,0.0,3.487404,0.93633,0.0,0.0,0.0,0.93633,…,0.0,0.0,0.0,0.0,0.0,0.0,1.884979,0.0,0.0,0.0,0.0,1.123596,0.548095,0.0,1.728608,1.315023,0.468165,0.0,0.432152,1.212972,0.0,0.875529,1.498127,1.43143,2.475053,0.0,1.123596,2.80266,0.898876,0.0,0.0,0.0,0.749064,0.0,2.196502,1.510717,0.898876
"""S_5""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.413976,0.598659,0.0,0.0,0.0,0.0,1.38626,1.674386,4.285522,0.0,0.0,0.0,0.0,2.282572,1.539409,0.0,2.025702,0.0,2.521505,0.448994,0.0,0.0,0.0,1.845016,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,1.465139,0.0,0.0,1.045073,0.0,0.0,0.718391,2.062048,0.0,0.0,0.0,0.0,0.0,0.828912,0.0,0.653083,0.0,0.0,0.0,0.0,0.0,2.787642,0.0,0.489812,0.0,0.0,1.632706,0.0,0.862069,3.139032,3.592879
"""G2M_30""",0.0,0.0,0.0,0.731636,0.0,0.0,0.0,1.510306,0.0,0.802439,0.0,0.0,0.0,0.0,0.995025,4.580638,0.0,0.802439,0.0,0.0,2.715949,2.763958,0.0,0.0,1.441207,0.0,1.381979,0.0,0.0,0.0,0.0,0.273358,0.0,0.888415,0.0,1.243781,…,0.0,0.0,0.0,0.802439,0.0,0.690989,1.463272,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.478377,0.0,0.0,0.518242,0.637836,1.13071,0.0,0.0,0.299706,0.0,0.0,0.690989,0.606722,0.0,0.0,1.13071,0.0,2.460814,0.0,0.303361,0.956755,0.518242


In [25]:
avg_by_metacell = pl.read_csv("k562.allele.flt.M_avg_expression_by_metacell.tsv", separator="\t")

In [26]:
avg_by_metacell

metacell,ENSG00000000457,ENSG00000000460,ENSG00000000971,ENSG00000001036,ENSG00000001084,ENSG00000001167,ENSG00000001460,ENSG00000001461,ENSG00000001630,ENSG00000002330,ENSG00000002746,ENSG00000002822,ENSG00000002834,ENSG00000002919,ENSG00000003147,ENSG00000003249,ENSG00000003436,ENSG00000003509,ENSG00000003987,ENSG00000004139,ENSG00000004142,ENSG00000004455,ENSG00000004468,ENSG00000004487,ENSG00000004766,ENSG00000004779,ENSG00000004864,ENSG00000004866,ENSG00000004897,ENSG00000004939,ENSG00000004948,ENSG00000005001,ENSG00000005007,ENSG00000005020,ENSG00000005059,ENSG00000005102,…,ENSG00000287820,ENSG00000287832,ENSG00000287836,ENSG00000287839,ENSG00000287845,ENSG00000287860,ENSG00000287865,ENSG00000287877,ENSG00000287883,ENSG00000287896,ENSG00000287907,ENSG00000287919,ENSG00000287920,ENSG00000287935,ENSG00000287937,ENSG00000287938,ENSG00000287943,ENSG00000287972,ENSG00000287973,ENSG00000287976,ENSG00000288002,ENSG00000288005,ENSG00000288009,ENSG00000288061,ENSG00000288066,ENSG00000288067,ENSG00000288071,ENSG00000288075,ENSG00000288091,ENSG00000288093,ENSG00000288156,ENSG00000288253,ENSG00000288321,ENSG00000288380,ENSG00000288398,n_cells,metacell_duplicated_0
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,str
"""G1_0""",0.370865,0.305418,0.479272,16.673967,1.539511,0.0,0.677231,4.84134,0.0,0.0,0.0,1.838872,1.278494,0.420982,0.0,0.0,0.394337,0.0,0.0,0.0,0.519211,0.0,0.0,0.662822,0.0,0.0,2.174577,0.0,1.777231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,1.582475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.003154,0.0,0.0,0.0,0.0,0.610836,0.0,0.0,0.47201,0.0,0.0,0.0,0.285804,0.0,0.0,0.0,0.48676,321,"""G1_0"""
"""G1_1""",0.0,1.04295,1.038497,10.669787,0.0,0.0,0.0,1.446281,0.0,0.0,0.631313,0.439174,0.473485,0.0,0.0,0.0,0.0,0.445633,0.0,0.0,0.340483,0.0,0.0,0.30303,0.273,0.0,1.821933,0.0,3.699755,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.513611,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.109338,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,330,"""G1_1"""
"""G1_10""",0.393871,0.0,0.0,16.50342,1.038643,0.577067,0.0,2.090662,0.0,0.0,0.0,0.590807,2.274607,0.0,0.620347,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.708968,0.427826,0.0,0.0,0.0,1.090721,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.435332,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.989517,0.993798,0.0,0.0,0.0,1.084305,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,403,"""G1_10"""
"""G1_11""",0.0,0.543478,0.0,13.889672,0.384615,0.735294,0.403226,1.653888,0.0,0.0,0.0,2.323546,0.0,0.438596,0.409836,0.0,0.0,0.0,0.0,0.0,2.401961,0.0,0.531915,0.0,0.0,0.0,1.388889,0.531915,1.656313,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.187579,0.888695,0.0,0.0,0.0,1.808054,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,400,"""G1_11"""
"""G1_12""",0.727167,0.688895,0.384971,16.934428,0.0,0.0,0.88909,2.137424,0.0,0.0,0.0,1.214182,4.134936,0.0,0.727167,0.0,0.707514,0.327225,0.0,1.67707,1.04712,0.0,0.0,0.0,1.026025,0.0,2.692928,0.0,1.587185,0.0,0.0,0.0,0.0,0.0,0.0,0.294135,…,0.0,0.0,0.0,0.0,0.0,0.513294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.888981,0.384971,0.0,0.0,0.0,1.871016,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,382,"""G1_12"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""S_5""",0.0,0.828347,0.0,8.581129,0.366059,0.0,0.493754,2.276732,0.0,0.0,0.0,0.0,1.990446,0.606612,0.0,0.0,1.080195,1.169267,0.0,0.0,0.0,0.0,0.0,0.0,2.075508,0.0,1.754417,0.493754,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.732118,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.183175,0.482532,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,471,"""S_5"""
"""S_6""",0.0,1.122827,0.0,11.985345,0.0,0.0,1.528651,5.978404,0.0,0.0,0.0,0.698812,0.960277,1.103387,0.0,0.0,0.776458,1.546778,0.0,0.0,0.0,0.0,0.0,0.0,0.598982,0.0,2.091142,0.0,1.159393,0.524109,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.470956,0.0,0.0,0.0,0.0,1.464251,0.0,0.0,0.0,0.806322,0.0,0.0,0.0,0.0,0.0,0.0,0.0,477,"""S_6"""
"""S_7""",0.93423,0.0,0.747384,10.128937,0.0,0.0,0.605987,1.86846,0.0,0.0,0.0,0.533846,0.0,1.711116,0.0,0.0,0.896861,0.0,0.0,0.0,0.62282,0.0,0.0,0.0,1.508065,0.0,2.810669,0.0,0.521431,0.93423,0.0,0.0,0.0,0.0,1.323493,0.0,…,0.0,0.0,0.0,0.0,0.0,1.373868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.396212,0.533846,0.0,0.0,0.0,0.560538,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,446,"""S_7"""
"""S_8""",0.0,0.717223,0.0,15.113024,0.0,0.577501,0.436072,1.780627,0.0,0.0,0.763126,0.994376,2.929138,1.286339,0.0,0.0,0.0,0.0,0.0,0.0,0.508751,0.0,0.0,0.0,0.0,0.0,3.156248,0.0,1.417004,0.0,0.0,0.0,0.0,0.0,0.508751,0.0,…,0.0,0.0,0.0,0.0,0.0,0.736811,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.372155,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,468,"""S_8"""


In [38]:
# Extract only common genes between ATAC and RNAseq
common_genes = list(set(avg_by_metacell.columns) & set(atac_by_metacell.columns))
# exclude columns
exclude_cols = {"n_cells", "metacell_duplicated_0", "metacell"}
common_genes = [g for g in common_genes if g not in exclude_cols]

In [41]:
rna_filtered = avg_by_metacell.select(["metacell"] + common_genes)

atac_filtered = atac_by_metacell.select(["metacell"] + common_genes)

In [42]:
rna_filtered

metacell,ENSG00000225830,ENSG00000273925,ENSG00000159348,ENSG00000166816,ENSG00000172497,ENSG00000142634,ENSG00000186174,ENSG00000118898,ENSG00000127578,ENSG00000276449,ENSG00000172575,ENSG00000187260,ENSG00000138780,ENSG00000143486,ENSG00000184937,ENSG00000253976,ENSG00000168300,ENSG00000198865,ENSG00000178028,ENSG00000254418,ENSG00000169330,ENSG00000147684,ENSG00000135362,ENSG00000141219,ENSG00000070729,ENSG00000286409,ENSG00000221990,ENSG00000168275,ENSG00000287424,ENSG00000055070,ENSG00000138028,ENSG00000271806,ENSG00000213793,ENSG00000164406,ENSG00000128951,ENSG00000115317,…,ENSG00000112425,ENSG00000205937,ENSG00000161405,ENSG00000251438,ENSG00000117242,ENSG00000144278,ENSG00000250447,ENSG00000162947,ENSG00000176401,ENSG00000183597,ENSG00000248150,ENSG00000166352,ENSG00000109171,ENSG00000115355,ENSG00000263327,ENSG00000113758,ENSG00000275580,ENSG00000171222,ENSG00000198589,ENSG00000049541,ENSG00000155849,ENSG00000180104,ENSG00000150783,ENSG00000169914,ENSG00000228496,ENSG00000250244,ENSG00000112319,ENSG00000105568,ENSG00000251022,ENSG00000108883,ENSG00000074695,ENSG00000117298,ENSG00000133401,ENSG00000143420,ENSG00000113163,ENSG00000148719,ENSG00000120915
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""G1_0""",0.273269,0.0,0.0,0.0,0.0,3.272833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.587786,1.693571,0.78469,0.0,0.0,0.0,0.0,0.839503,0.0,0.0,0.0,2.803738,0.0,0.841963,0.0,0.0,0.0,0.291146,0.890076,0.0,…,0.291146,0.275687,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.318839,0.0,0.0,1.269182,0.0,0.458127,0.0,0.420982,0.0,0.981481,0.0,0.0,6.679282,0.0,0.0,0.0,0.0,0.0,0.819807,0.451488,0.0,0.0,1.433736,0.0,1.359237,0.342337,0.0,0.677543
"""G1_1""",0.0,0.0,0.356506,0.0,0.0,1.887511,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.865801,0.631313,1.604278,0.0,0.0,0.0,0.0,0.445633,0.0,0.426803,0.0,1.011225,0.0,0.0,0.0,0.0,0.420875,0.0,0.0,0.0,…,0.0,2.591159,0.0,0.0,0.0,0.0,0.0,0.0,0.369549,2.281611,0.0,0.473485,0.325839,0.0,0.333,0.0,0.0,0.0,2.005066,0.0,0.541126,3.702337,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.138801,0.0,1.256205,0.0,0.550964,1.188354,0.0,0.439174
"""G1_10""",0.217666,0.0,0.0,0.0,0.0,1.666194,0.0,0.0,0.0,0.0,0.0,0.527955,0.0,0.0,0.0,0.0,0.708968,1.127904,2.229672,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.954381,0.670646,0.0,0.0,0.0,0.516956,0.0,0.0,…,0.516956,0.760256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.832825,0.0,0.0,3.926721,0.0,2.023201,0.0,0.0,0.0,2.422309,0.0,0.0,3.260438,0.0,0.0,0.0,0.0,0.0,0.0,0.55142,0.0,0.0,0.516956,0.0,1.557438,0.913444,0.187984,0.0
"""G1_11""",0.5,0.0,0.595238,0.0,0.0,1.520649,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.785714,0.0,2.132227,0.892857,1.322751,0.510204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,1.061508,0.0,0.0,0.0,0.0,0.0,0.0,0.555556,5.460329,0.0,0.0,2.092052,0.0,0.0,0.0,0.0,0.297619,0.845509,0.0,0.0,1.675214,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.892857,0.0,2.065587,0.0,0.625,0.727901,0.510204,0.0
"""G1_12""",0.390717,0.0,0.0,0.0,0.0,2.515142,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.817784,1.5714,2.092687,0.0,0.0,0.0,0.0,0.233732,0.0,0.671231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,1.942966,0.0,0.0,0.0,0.0,0.0,0.0,1.048798,3.748429,0.0,0.0,1.4939,0.0,1.381617,0.0,0.0,0.0,0.0,0.0,0.0,1.290721,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.707514,0.0,1.266186,0.0,4.034542,0.379391,1.071614,0.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""S_5""",0.884643,0.0,0.758265,0.0,0.0,4.566113,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.346723,1.592357,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.51784,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,1.695905,0.0,0.0,0.0,0.0,0.0,0.0,1.061571,3.515741,0.0,0.0,1.214215,0.0,0.0,0.0,0.0,0.0,0.544395,0.0,0.0,2.014572,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.061571,1.998143,0.758265,0.386026,2.229429,0.849257,1.061571
"""S_6""",0.0,0.0,0.0,0.0,0.0,2.499902,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.44605,1.11575,0.0,0.0,0.0,0.0,0.0,0.698812,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,1.259525,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.317983,0.0,0.952925,1.043365,0.0,1.321891,0.0,0.0,0.0,0.842829,0.0,0.0,3.07131,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.655136,0.455747,2.399821,0.0,2.319246,0.873515,0.0,0.0
"""S_7""",0.0,0.0,0.477054,0.0,0.0,1.994009,0.0,0.0,0.0,0.0,0.67944,0.0,0.0,0.0,0.0,0.0,2.629771,0.546866,1.18008,0.0,0.700673,0.0,0.0,0.0,0.0,0.0,0.477054,0.0,0.50958,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.747384,2.018638,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.617891,0.896861,0.50958,0.0,0.0,0.0,0.747384,0.605987,0.0,3.302133,0.0,0.0,0.0,0.0,0.0,0.700673,0.0,2.648434,0.659457,0.546866,1.86846,2.854141,0.67944,0.605987,0.477054
"""S_8""",0.0,0.0,0.0,0.0,0.0,1.834729,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.446679,0.0,0.0,0.0,0.0,0.0,0.0,0.577501,0.0,0.0,1.017501,0.0,0.0,0.0,0.0,0.0,0.0,0.610501,0.0,0.0,…,0.610501,3.161358,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.153682,0.0,0.929023,0.586402,0.0,0.837942,0.0,0.0,0.0,0.890313,0.0,0.0,1.16866,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.821828,3.118641,0.0,1.171043,2.881041,1.712598,0.356125


In [43]:
atac_filtered

metacell,ENSG00000225830,ENSG00000273925,ENSG00000159348,ENSG00000166816,ENSG00000172497,ENSG00000142634,ENSG00000186174,ENSG00000118898,ENSG00000127578,ENSG00000276449,ENSG00000172575,ENSG00000187260,ENSG00000138780,ENSG00000143486,ENSG00000184937,ENSG00000253976,ENSG00000168300,ENSG00000198865,ENSG00000178028,ENSG00000254418,ENSG00000169330,ENSG00000147684,ENSG00000135362,ENSG00000141219,ENSG00000070729,ENSG00000286409,ENSG00000221990,ENSG00000168275,ENSG00000287424,ENSG00000055070,ENSG00000138028,ENSG00000271806,ENSG00000213793,ENSG00000164406,ENSG00000128951,ENSG00000115317,…,ENSG00000112425,ENSG00000205937,ENSG00000161405,ENSG00000251438,ENSG00000117242,ENSG00000144278,ENSG00000250447,ENSG00000162947,ENSG00000176401,ENSG00000183597,ENSG00000248150,ENSG00000166352,ENSG00000109171,ENSG00000115355,ENSG00000263327,ENSG00000113758,ENSG00000275580,ENSG00000171222,ENSG00000198589,ENSG00000049541,ENSG00000155849,ENSG00000180104,ENSG00000150783,ENSG00000169914,ENSG00000228496,ENSG00000250244,ENSG00000112319,ENSG00000105568,ENSG00000251022,ENSG00000108883,ENSG00000074695,ENSG00000117298,ENSG00000133401,ENSG00000143420,ENSG00000113163,ENSG00000148719,ENSG00000120915
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""G2M_4""",1.064963,0.0,0.0,0.0,0.819202,0.580889,0.0,0.0,0.0,0.0,1.452222,0.0,0.0,1.101686,1.962533,0.0,1.375146,0.912825,0.614402,1.141031,0.0,0.833935,0.0,2.057304,0.0,0.0,2.950162,4.091266,0.0,0.0,0.0,0.0,2.484913,1.414326,0.0,0.0,…,5.451595,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.141031,2.537341,0.0,1.210185,0.0,1.228803,0.570516,0.476849,0.0,0.0,0.0,0.0,0.614402,2.950162,1.101686,0.84076,0.0,0.0,1.521375,0.0,0.476849,0.0,0.0,1.730765,0.968148,3.098491,4.798021,5.678778,0.0
"""G2M_28""",0.0,0.0,1.518906,0.996413,0.0,0.700182,0.681756,0.0,0.0,0.0,0.0,0.0,0.0,0.925241,3.806472,0.0,0.631872,1.036269,0.0,0.0,0.0,1.568254,0.0,0.616827,0.0,0.0,0.46262,3.165336,0.0,0.551207,0.0,0.539724,2.590674,0.0,0.0,0.0,…,0.719632,0.0,0.996413,0.0,0.0,0.0,0.0,0.740192,0.0,1.453949,0.0,1.364338,0.518135,0.0,0.551207,0.0,0.0,0.0,1.156551,2.004688,0.0,0.46262,0.0,5.990933,0.0,0.0,0.0,0.0,2.610858,1.523926,0.0,0.863558,0.616827,0.700182,0.0,6.975403,0.0
"""G1_27""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.277729,0.0,0.822707,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.88537,5.00732,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,1.371178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.524887,0.0,0.0,0.0,0.0,0.0,4.524887,0.0,0.0,5.00732,0.0,16.718576,0.0,0.0,0.0,0.0,1.19076,1.740341,0.0,1.616031,0.0,0.0,0.0,5.236776,0.0
"""G2M_19""",0.654108,0.0,4.197192,0.0,0.0,0.0,1.056636,0.0,0.597229,0.0,1.557829,0.0,0.0,0.0,2.219688,0.0,0.624376,2.875469,0.0,0.422654,0.0,0.41625,0.0,1.486087,0.0,2.918956,2.486788,0.0,0.0,0.0,0.0,3.098664,0.0,1.717033,0.549451,0.0,…,0.0,0.0,0.0,0.0,0.0,1.717033,0.0,0.0,0.981162,1.373626,0.0,5.152533,0.51835,0.0,1.962323,0.0,0.704424,0.0,0.410038,1.556136,0.0,2.486788,0.0,6.410256,0.0,0.0,0.584522,0.0,0.0,0.704424,0.538677,0.808016,0.981162,0.0,4.177055,1.569859,0.0
"""S_10""",0.0,0.0,0.0,0.46607,0.972668,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.438159,0.438654,0.0,1.315963,0.520264,1.616512,0.0,0.0,1.984233,0.0,0.0,0.699105,0.0,3.449943,0.894855,0.0,0.798977,0.0,1.065303,1.32571,1.39821,0.621427,0.0,…,1.01688,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.860437,0.0,0.0,1.403693,1.572775,0.0,0.0,0.0,0.0,0.0,0.0,1.911735,0.621427,3.449943,0.497141,0.0,0.0,0.860437,0.0,0.0,0.0,0.50844,0.721657,2.858563,0.0,0.798977,1.335901,5.250865,0.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""G2M_32""",0.0,0.547645,4.908523,0.0,0.0,2.309893,0.0,0.0,0.0,0.0,0.0,0.0,1.047669,0.0,4.99868,0.651254,2.409639,0.0,0.0,0.0,0.0,4.107338,0.83091,0.0,0.0,0.0,0.502008,0.634115,1.204819,0.0,0.0,0.463392,0.512689,0.0,0.0,0.0,…,1.09529,0.688468,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.417434,0.0,1.116043,0.708717,0.0,0.0,0.0,0.0,0.0,0.0,0.502008,0.0,5.129988,0.0,0.0,0.0,0.0,2.731393,2.019844,0.708717,1.022767,0.0,0.334672,0.0,6.408974,0.0
"""G1_24""",0.0,0.0,1.464335,0.749064,0.749064,0.0,0.0,1.301103,0.0,0.0,0.0,0.0,1.884979,0.774893,0.7249,0.0,0.0,1.321877,0.0,0.0,0.0,1.159265,0.0,0.591366,1.02145,0.0,0.307834,1.702417,0.0,0.0,0.548095,0.535045,0.478126,0.944595,0.0,0.0,…,0.0,0.0,0.832293,0.0,0.0,0.387447,1.605136,0.0,0.832293,0.0,1.180373,1.592694,1.182732,0.591366,0.0,0.0,0.0,0.0,0.0,1.096191,2.496879,0.307834,0.0,4.187161,0.0,0.0,0.0,0.749064,0.0,0.722602,0.93633,0.0,0.0,1.040366,0.0,5.115316,0.0
"""S_5""",0.0,0.0,0.0,2.561833,0.0,0.828912,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.307882,1.959248,0.0,1.657825,1.782673,0.0,0.0,0.0,1.744456,0.0,1.267748,0.695217,0.0,3.883796,0.695217,0.0,3.176668,0.0,0.653083,0.653083,1.346983,0.0,0.0,…,2.481284,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.145195,0.891237,0.0,0.0,0.798212,0.0,0.0,0.769704,0.0,0.0,0.489812,0.0,0.0,3.883796,0.0,0.695217,0.0,0.0,0.0,0.0,1.562103,0.633874,0.0,0.633874,0.0,2.394636,1.248632,6.132046,0.0
"""G2M_30""",0.921319,0.0,0.421621,0.0,0.0,0.777363,0.0,0.0,0.0,1.243781,0.0,0.0,0.518242,0.0,3.533196,0.0,0.0,0.0,0.0,0.0,0.0,1.961917,0.0,0.0,0.0,1.554726,1.131294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,…,0.0,0.672314,0.0,0.0,0.0,0.0,0.0,0.0,1.309243,1.858912,0.0,1.878074,0.956755,1.236984,0.637836,0.0,0.0,0.0,0.0,0.376903,1.391643,1.131294,0.621891,1.948826,0.0,0.0,0.0,0.0,0.578503,1.081549,1.184553,2.544821,0.690989,0.0,4.975124,3.645238,0.0


In [22]:
# Make tables
rna_long = rna_filtered.reset_index().melt(id_vars="Phase", var_name="gene", value_name="RNA_expr")
atac_long = atac_filtered.reset_index().melt(id_vars="Phase", var_name="gene", value_name="ATAC_access")

In [47]:
rna_long = rna_filtered.melt(
    id_vars="metacell",
    variable_name="gene",
    value_name="RNA_expr"
)

atac_long = atac_filtered.melt(
    id_vars="metacell",
    variable_name="gene",
    value_name="ATAC_access"
)


  rna_long = rna_filtered.melt(
  atac_long = atac_filtered.melt(


In [48]:
rna_long

metacell,gene,RNA_expr
str,str,f64
"""G1_0""","""ENSG00000225830""",0.273269
"""G1_1""","""ENSG00000225830""",0.0
"""G1_10""","""ENSG00000225830""",0.217666
"""G1_11""","""ENSG00000225830""",0.5
"""G1_12""","""ENSG00000225830""",0.390717
…,…,…
"""S_5""","""ENSG00000120915""",1.061571
"""S_6""","""ENSG00000120915""",0.0
"""S_7""","""ENSG00000120915""",0.477054
"""S_8""","""ENSG00000120915""",0.356125


In [49]:
atac_long

metacell,gene,ATAC_access
str,str,f64
"""G2M_4""","""ENSG00000225830""",1.064963
"""G2M_28""","""ENSG00000225830""",0.0
"""G1_27""","""ENSG00000225830""",0.0
"""G2M_19""","""ENSG00000225830""",0.654108
"""S_10""","""ENSG00000225830""",0.0
…,…,…
"""G2M_32""","""ENSG00000120915""",0.0
"""G1_24""","""ENSG00000120915""",0.0
"""S_5""","""ENSG00000120915""",0.0
"""G2M_30""","""ENSG00000120915""",0.0


In [None]:
# Merge RNA and ATAC in one table
merged = rna_long.join(atac_long, on=["metacell", "gene"], how="inner")

In [51]:
merged

metacell,gene,RNA_expr,ATAC_access
str,str,f64,f64
"""G2M_4""","""ENSG00000225830""",0.0,1.064963
"""G2M_28""","""ENSG00000225830""",0.0,0.0
"""G1_27""","""ENSG00000225830""",0.0,0.0
"""G2M_19""","""ENSG00000225830""",0.782779,0.654108
"""S_10""","""ENSG00000225830""",0.0,0.0
…,…,…,…
"""G2M_32""","""ENSG00000120915""",0.485909,0.0
"""G1_24""","""ENSG00000120915""",0.633513,0.0
"""S_5""","""ENSG00000120915""",1.061571,0.0
"""G2M_30""","""ENSG00000120915""",0.0,0.0


In [53]:
# SAve merged table
merged.write_csv("k562.allele.flt.M_atacrnamerged.tsv", separator="\t")
