### Goal: Make a TF gene sheet

In [None]:
import numpy as np
import pandas as pd
import os
import scanpy as sc
import scanpy.external as sce
import sys
import muon as mu
import muon.atac as ac
import matplotlib.pyplot as plt
import seaborn as sns
figures = '/home/carsten/alvira_bioinformatics/postnatal_lung_multiome/data/pilot/231013_tf_gene_sheet'
sc_file = '/home/carsten/alvira_bioinformatics/postnatal_lung_multiome/data/single_cell_files/share'
atac_dir = '/home/carsten/alvira_bioinformatics/postnatal_lung_multiome/data/figures/atac'
rna_dir = '/home/carsten/alvira_bioinformatics/postnatal_lung_multiome/data/figures/rna'
os.makedirs(figures, exist_ok=True)
sc.set_figure_params(dpi=300, format="png")
sc.settings.figdir = figures


#### Load in multiomic data

In [None]:
rna = sc.read(f'{sc_file}/p7_multiome_rna_processed.gz.h5ad')
atac = sc.read(f'{sc_file}/p7_multiome_atac_processed.gz.h5ad')
tf = sc.read(f'{sc_file}/p7_multiome_tf_processed.gz.h5ad')

In [None]:
rna.X = rna.layers['soupx'].copy()
sc.pp.normalize_total(rna,target_sum=1e4)
sc.pp.log1p(rna,base=10)

In [None]:
atac.X = atac.layers['counts'].copy()
sc.pp.normalize_total(atac,target_sum=1e4)
sc.pp.log1p(atac,base=10)

In [None]:
df_tf_gene = atac.var[['tfs','annotated_gene']]
df = atac.var[['tfs','annotated_gene']].copy()
df = df.loc[df['annotated_gene']!='']
df = df.loc[df['tfs'].isna()==False]
gene_tf_dt = {}
tf_gene_dt={}
for row in df.index:
    df_row = df.loc[row]
    genes = df_row['annotated_gene'].split(',')
    tfs = df_row['tfs'].split('&')
    for gene in genes:
        if gene in gene_tf_dt.keys():
            gene_tf_dt[gene] = sorted(set(gene_tf_dt[gene]+tfs))
        else:
            gene_tf_dt[gene] = sorted(set(tfs))
    for tf in tfs:
        if tf in tf_gene_dt.keys():
            tf_gene_dt[tf] = sorted(set(tf_gene_dt[tf]+genes))
        else:
            tf_gene_dt[tf] = sorted(set(genes))
        
            
    

In [None]:
with pd.ExcelWriter(
                f"{figures}/gene_tf_map.xlsx", engine="xlsxwriter"
        ) as writer:
    pd.Series(gene_tf_dt,name='TFs').to_excel(writer, sheet_name=f"gene to tf")
    pd.Series(tf_gene_dt,name='Genes').to_excel(writer, sheet_name=f"tf to gene")