# Set-up

In [1]:
import os
import sys
import glob
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns

In [2]:
results_dir = "/cellar/users/aklie/projects/igvf/topic_grn_links/eval/results"
dataset_name = "meta_analysis"
in_date = "03Sep23"

In [3]:
if not os.path.exists(os.path.join(results_dir, dataset_name, in_date, "baselines")):
    os.makedirs(os.path.join(results_dir, dataset_name, in_date, "baselines"))

# Load in the metadata

In [4]:
# Read in all the networks
metadata_df = pd.read_csv(os.path.join(results_dir, dataset_name, in_date, "grn_metadata.tsv"), sep="\t")
len(metadata_df)

120

In [5]:
# Get the unique h5ad in_files
h5ad_df = metadata_df[metadata_df["in_file_path"].str.contains("h5ad")]
h5ad_df = h5ad_df[h5ad_df["method"] == "celloracle"]
h5ad_df

Unnamed: 0,in_file_path,out_dir_path,tf_list,pval,num_boostraps,tf_type,run_id,method,dataset,base_grn,...,annotation_db,network_type,net_tsv_exists,cells,genes,normalization,genotype,grn_name,output_table_path,output_table_exists
24,/cellar/users/aklie/data/igvf/topic_grn_links/...,/cellar/users/aklie/projects/igvf/topic_grn_li...,,,,,2304921235_20230831-161949,celloracle,igvf_b01_LeftCortex,mouse_scATAC_atlas,...,,,True,balanced_genotype_microglia,0.05,raw,B6J,igvf_b01_LeftCortex_celloracle_balanced_genoty...,/cellar/users/aklie/projects/igvf/topic_grn_li...,True
25,/cellar/users/aklie/data/igvf/topic_grn_links/...,/cellar/users/aklie/projects/igvf/topic_grn_li...,,,,,2304921235_20230831-161949,celloracle,igvf_b01_LeftCortex,mouse_scATAC_atlas,...,,,True,balanced_genotype_microglia,0.05,raw,CASTJ,igvf_b01_LeftCortex_celloracle_balanced_genoty...,/cellar/users/aklie/projects/igvf/topic_grn_li...,True
26,/cellar/users/aklie/data/igvf/topic_grn_links/...,/cellar/users/aklie/projects/igvf/topic_grn_li...,,,,,2304921235_20230831-161949,celloracle,igvf_b01_LeftCortex,mouse_scATAC_atlas,...,,,True,balanced_genotype_microglia,0.05,raw,both,igvf_b01_LeftCortex_celloracle_balanced_genoty...,/cellar/users/aklie/projects/igvf/topic_grn_li...,True
27,/cellar/users/aklie/data/igvf/topic_grn_links/...,/cellar/users/aklie/projects/igvf/topic_grn_li...,,,,,2304921235_20230831-161949,celloracle,igvf_b01_LeftCortex,mouse_scATAC_atlas,...,,,True,balanced_genotype_microglia,0.05,log1p_cp10k,B6J,igvf_b01_LeftCortex_celloracle_balanced_genoty...,/cellar/users/aklie/projects/igvf/topic_grn_li...,True
28,/cellar/users/aklie/data/igvf/topic_grn_links/...,/cellar/users/aklie/projects/igvf/topic_grn_li...,,,,,2304921235_20230831-161949,celloracle,igvf_b01_LeftCortex,mouse_scATAC_atlas,...,,,True,balanced_genotype_microglia,0.05,log1p_cp10k,CASTJ,igvf_b01_LeftCortex_celloracle_balanced_genoty...,/cellar/users/aklie/projects/igvf/topic_grn_li...,True
29,/cellar/users/aklie/data/igvf/topic_grn_links/...,/cellar/users/aklie/projects/igvf/topic_grn_li...,,,,,2304921235_20230831-161949,celloracle,igvf_b01_LeftCortex,mouse_scATAC_atlas,...,,,True,balanced_genotype_microglia,0.05,log1p_cp10k,both,igvf_b01_LeftCortex_celloracle_balanced_genoty...,/cellar/users/aklie/projects/igvf/topic_grn_li...,True
30,/cellar/users/aklie/data/igvf/topic_grn_links/...,/cellar/users/aklie/projects/igvf/topic_grn_li...,,,,,2304921235_20230831-161949,celloracle,igvf_b01_LeftCortex,mouse_scATAC_atlas,...,,,True,balanced_genotype_microglia,0.05,pf_log1p_pf,B6J,igvf_b01_LeftCortex_celloracle_balanced_genoty...,/cellar/users/aklie/projects/igvf/topic_grn_li...,True
31,/cellar/users/aklie/data/igvf/topic_grn_links/...,/cellar/users/aklie/projects/igvf/topic_grn_li...,,,,,2304921235_20230831-161949,celloracle,igvf_b01_LeftCortex,mouse_scATAC_atlas,...,,,True,balanced_genotype_microglia,0.05,pf_log1p_pf,CASTJ,igvf_b01_LeftCortex_celloracle_balanced_genoty...,/cellar/users/aklie/projects/igvf/topic_grn_li...,True
32,/cellar/users/aklie/data/igvf/topic_grn_links/...,/cellar/users/aklie/projects/igvf/topic_grn_li...,,,,,2304921235_20230831-161949,celloracle,igvf_b01_LeftCortex,mouse_scATAC_atlas,...,,,True,balanced_genotype_microglia,0.05,pf_log1p_pf,both,igvf_b01_LeftCortex_celloracle_balanced_genoty...,/cellar/users/aklie/projects/igvf/topic_grn_li...,True
33,/cellar/users/aklie/data/igvf/topic_grn_links/...,/cellar/users/aklie/projects/igvf/topic_grn_li...,,,,,2304921235_20230831-161949,celloracle,igvf_b01_LeftCortex,mouse_scATAC_atlas,...,,,True,balanced_genotype_microglia,0.05,cpm_rank,B6J,igvf_b01_LeftCortex_celloracle_balanced_genoty...,/cellar/users/aklie/projects/igvf/topic_grn_li...,True


# Expression baseline

In [6]:
# Read in the tfs
tf_list = pd.read_csv("/cellar/users/aklie/opt/igvf-ucsd/scenic_pipeline/data/tf_lists/allTFs_mm.txt", header=None)[0].values
tf_list[:5]

array(['Bcl6b', 'Zscan26', 'Mtf1', 'Klf9', 'Zic5'], dtype=object)

In [35]:
grn_tf_metadata = pd.read_csv("/cellar/users/aklie/projects/igvf/topic_grn_links/eval/results/meta_analysis/03Sep23/tf_metadata_topics.tsv", sep="\t", index_col=0)
grn_tf_metadata.head

<bound method NDFrame.head of          in_scenic  in_aracne  in_celloracle  in_literature  number_grns  \
Runx1t1      False       True          False          False           41   
Prdm15        True       True          False          False           27   
Zfp445        True       True          False          False           26   
Gm14308       True      False          False          False           21   
Gas7         False       True          False          False           31   
...            ...        ...            ...            ...          ...   
Gm6710        True      False          False          False           22   
Suclg1        True      False          False          False            6   
Nfia          True       True           True          False           88   
Hcfc2         True      False          False          False           25   
Gmeb1         True       True          False          False           32   

         in_topics  
Runx1t1       True  
Prdm15        T

In [36]:
from scipy import sparse
baseline_metadata = pd.DataFrame(columns=["grn_name", "dataset", "method", "cells", "genes", "normalization", "genotype", "expression_path"])
for i, (_, row) in tqdm(enumerate(h5ad_df.iterrows()), total=len(h5ad_df)):
    
    # Read in the adata
    h5ad_file = row["in_file_path"]
    adata = sc.read_h5ad(h5ad_file)

    # Get the run name
    run = row["dataset"] + "_" + "expression_baseline" + "_" + row["cells"] + "_"  + row["genotype"] + "_" + str(row["genes"]) + "_" + row["normalization"]

    # Check if TFs are in the adata
    grn_tf_metadata[f"in_{row['dataset']}"] = grn_tf_metadata.index.isin(adata.var.index)
                          
    # Intersect with tf list
    tfs_in_adata = np.intersect1d(tf_list, adata.var_names)
    tf_adata = adata[:, tfs_in_adata].copy()

    # Rank the TFs in the list by expression level in the tf_adata
    if sparse.issparse(tf_adata.layers["normalized_counts"]):
        tf_adata.var["rank_weight"] = tf_adata.layers["normalized_counts"].A.mean(axis=0)
    else:
        tf_adata.var["rank_weight"] = tf_adata.layers["normalized_counts"].mean(axis=0)
    tf_adata.var["rank"] = tf_adata.var["rank_weight"].rank(ascending=False)
    tf_adata_ranked = tf_adata.var.sort_values("rank")

    # Clean up
    tf_adata_ranked = tf_adata_ranked[["rank_weight", "rank"]]
    tf_adata_ranked.index.name = None
    
    # Save the ranked TFs
    if not os.path.exists(os.path.join(results_dir, dataset_name, in_date, "baselines", run)):
        os.makedirs(os.path.join(results_dir, dataset_name, in_date, "baselines", run))
    out_file = os.path.join(results_dir, dataset_name, in_date, "baselines", run, "expression_rank.tsv")
    tf_adata_ranked.to_csv(out_file, sep="\t")

    # Add to the metadata
    baseline_metadata.loc[i] = [run, row["dataset"], "expression_baseline", row["cells"], row["genes"], row["normalization"], row["genotype"], out_file]

  0%|          | 0/24 [00:00<?, ?it/s]

In [37]:
grn_tf_metadata["in_both"] = grn_tf_metadata["in_igvf_b01_LeftCortex"] & grn_tf_metadata["in_Bridge_Satpathy"]
grn_tf_metadata["in_neither"] = ~grn_tf_metadata["in_igvf_b01_LeftCortex"] & ~grn_tf_metadata["in_Bridge_Satpathy"]

In [38]:
grn_tf_metadata["in_both"].sum(), grn_tf_metadata["in_neither"].sum()

(473, 0)

In [39]:
grn_tf_metadata.to_csv(os.path.join(results_dir, dataset_name, in_date, "tf_metadata_topics_baselines.tsv"), sep="\t")

# Generic network baseline

In [8]:
import decoupler as dc

In [9]:
net = dc.get_dorothea(organism='mouse', split_complexes=False)
net

Unnamed: 0,source,confidence,target,weight
0,Myc,A,Tert,1.0
1,Myc,A,Cct5,1.0
2,Myc,A,Gnl3,1.0
3,Myc,A,Nol7,1.0
4,Myc,A,Dpy30,1.0
...,...,...,...,...
2800,Jun,A,Myb,1.0
2801,Jund,A,Myb,1.0
2802,Lhx3,A,Fshb,1.0
2803,Usf1,A,Hmox1,1.0


In [10]:
from scipy import sparse
ulm_files, viper_files, aucell_files = [], [], []
for i, (_, row) in tqdm(enumerate(h5ad_df.iterrows()), total=len(h5ad_df)):
    
    # Read in the adata
    h5ad_file = row["in_file_path"]
    adata = sc.read_h5ad(h5ad_file)

    # Get the run name
    run = row["dataset"] + "_" + "expression_baseline" + "_" + row["cells"] + "_"  + row["genotype"] + "_" + str(row["genes"]) + "_" + row["normalization"]

    # Make sure the adata uses the normalized counts
    adata.raw = adata.copy()
    if sparse.issparse(adata.layers["normalized_counts"]):
        adata.X = adata.layers["normalized_counts"].A
    else:
        adata.X = adata.layers["normalized_counts"]

   # Run decoupler with ULM
    dc.run_ulm(
        mat=adata,
        net=net,
        source='source',
        target='target',
        weight='weight',
        verbose=True,
        use_raw=False
    )

    # Grab the ULM estimates
    acts = dc.get_acts(adata, obsm_key='ulm_estimate')
    acts.var["rank_weight"] = acts.X.mean(axis=0)
    acts.var["rank"] = acts.var["rank_weight"].rank(ascending=False)
    acts_ranked = acts.var.sort_values("rank")

    # Clean up
    acts_ranked = acts_ranked[["rank_weight", "rank"]]
    acts_ranked.index.name = None

    # Save the ranked TFs
    out_file = os.path.join(results_dir, dataset_name, in_date, "baselines", run, "dorothea_ulm_rank.tsv")
    acts_ranked.to_csv(out_file, sep="\t")
    ulm_files.append(out_file)

    # Run decoupler with VIPER
    dc.run_viper(
        mat=adata,
        net=net,
        source='source',
        target='target',
        weight='weight',
        verbose=True,
        use_raw=False
    )

    # Grab the VIPER estimates
    acts = dc.get_acts(adata, obsm_key='viper_estimate')
    acts.var["rank_weight"] = acts.X.mean(axis=0)
    acts.var["rank"] = acts.var["rank_weight"].rank(ascending=False)
    acts_ranked = acts.var.sort_values("rank")

    # Clean up
    acts_ranked = acts_ranked[["rank_weight", "rank"]]
    acts_ranked.index.name = None

    # Save the ranked TFs
    out_file = os.path.join(results_dir, dataset_name, in_date, "baselines", run, "dorothea_viper_rank.tsv")
    acts_ranked.to_csv(out_file, sep="\t")
    viper_files.append(out_file)

    # Run decoupler with AUCell
    dc.run_aucell(
        mat=adata,
        net=net,
        source='source',
        target='target',
        verbose=True,
        use_raw=False
    )

    # Grab the AUCell estimates
    acts = dc.get_acts(adata, obsm_key='aucell_estimate')
    acts.var["rank_weight"] = acts.X.mean(axis=0)
    acts.var["rank"] = acts.var["rank_weight"].rank(ascending=False)
    acts_ranked = acts.var.sort_values("rank")

    # Clean up
    acts_ranked = acts_ranked[["rank_weight", "rank"]]
    acts_ranked.index.name = None

    # Save the ranked TFs
    out_file = os.path.join(results_dir, dataset_name, in_date, "baselines", run, "dorothea_aucell_rank.tsv")
    acts_ranked.to_csv(out_file, sep="\t")
    aucell_files.append(out_file)

baseline_metadata["ulm_path"] = ulm_files
baseline_metadata["viper_path"] = viper_files
baseline_metadata["aucell_path"] = aucell_files

  0%|          | 0/24 [00:00<?, ?it/s]

1 features of mat are empty, they will be removed.
Running ulm on mat with 1206 samples and 4076 targets for 24 sources.
1 features of mat are empty, they will be removed.
Running viper on mat with 1206 samples and 4076 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1206/1206 [00:00<00:00, 6491.19it/s]


1 features of mat are empty, they will be removed.
Running aucell on mat with 1206 samples and 4076 targets for 24 sources.


100%|██████████| 1206/1206 [00:00<00:00, 1450.21it/s]


2 features of mat are empty, they will be removed.
Running ulm on mat with 1206 samples and 4075 targets for 24 sources.
2 features of mat are empty, they will be removed.
Running viper on mat with 1206 samples and 4075 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1206/1206 [00:00<00:00, 9120.47it/s]

2 features of mat are empty, they will be removed.





Running aucell on mat with 1206 samples and 4075 targets for 24 sources.


100%|██████████| 1206/1206 [00:00<00:00, 1732.18it/s]


Running ulm on mat with 2412 samples and 4077 targets for 24 sources.
Running viper on mat with 2412 samples and 4077 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 2412/2412 [00:00<00:00, 7264.84it/s]


Running aucell on mat with 2412 samples and 4077 targets for 24 sources.


100%|██████████| 2412/2412 [00:01<00:00, 1994.82it/s]


1 features of mat are empty, they will be removed.
Running ulm on mat with 1206 samples and 4076 targets for 24 sources.
1 features of mat are empty, they will be removed.
Running viper on mat with 1206 samples and 4076 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1206/1206 [00:00<00:00, 8668.26it/s]

1 features of mat are empty, they will be removed.





Running aucell on mat with 1206 samples and 4076 targets for 24 sources.


100%|██████████| 1206/1206 [00:00<00:00, 1893.49it/s]


2 features of mat are empty, they will be removed.
Running ulm on mat with 1206 samples and 4075 targets for 24 sources.
2 features of mat are empty, they will be removed.
Running viper on mat with 1206 samples and 4075 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1206/1206 [00:00<00:00, 9003.66it/s]

2 features of mat are empty, they will be removed.





Running aucell on mat with 1206 samples and 4075 targets for 24 sources.


100%|██████████| 1206/1206 [00:00<00:00, 1733.03it/s]


Running ulm on mat with 2412 samples and 4077 targets for 24 sources.
Running viper on mat with 2412 samples and 4077 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 2412/2412 [00:00<00:00, 7536.81it/s]


Running aucell on mat with 2412 samples and 4077 targets for 24 sources.


100%|██████████| 2412/2412 [00:01<00:00, 1686.75it/s]


1 features of mat are empty, they will be removed.
Running ulm on mat with 1206 samples and 4076 targets for 24 sources.
1 features of mat are empty, they will be removed.
Running viper on mat with 1206 samples and 4076 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1206/1206 [00:00<00:00, 8894.35it/s]

1 features of mat are empty, they will be removed.





Running aucell on mat with 1206 samples and 4076 targets for 24 sources.


100%|██████████| 1206/1206 [00:00<00:00, 2093.36it/s]


2 features of mat are empty, they will be removed.
Running ulm on mat with 1206 samples and 4075 targets for 24 sources.
2 features of mat are empty, they will be removed.
Running viper on mat with 1206 samples and 4075 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1206/1206 [00:00<00:00, 8956.36it/s]

2 features of mat are empty, they will be removed.





Running aucell on mat with 1206 samples and 4075 targets for 24 sources.


100%|██████████| 1206/1206 [00:00<00:00, 1771.50it/s]


Running ulm on mat with 2412 samples and 4077 targets for 24 sources.
Running viper on mat with 2412 samples and 4077 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 2412/2412 [00:00<00:00, 8874.82it/s]


Running aucell on mat with 2412 samples and 4077 targets for 24 sources.


100%|██████████| 2412/2412 [00:01<00:00, 1624.04it/s]


Running ulm on mat with 1206 samples and 4077 targets for 24 sources.
Running viper on mat with 1206 samples and 4077 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1206/1206 [00:01<00:00, 888.96it/s]


Running aucell on mat with 1206 samples and 4077 targets for 24 sources.


100%|██████████| 1206/1206 [00:01<00:00, 1002.13it/s]


Running ulm on mat with 1206 samples and 4077 targets for 24 sources.
Running viper on mat with 1206 samples and 4077 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1206/1206 [00:01<00:00, 834.69it/s]


Running aucell on mat with 1206 samples and 4077 targets for 24 sources.


100%|██████████| 1206/1206 [00:01<00:00, 1127.66it/s]


Running ulm on mat with 2412 samples and 4077 targets for 24 sources.
Running viper on mat with 2412 samples and 4077 targets for 24 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 2412/2412 [00:02<00:00, 893.94it/s]


Running aucell on mat with 2412 samples and 4077 targets for 24 sources.


100%|██████████| 2412/2412 [00:02<00:00, 1066.62it/s]


Running ulm on mat with 1498 samples and 6277 targets for 33 sources.
Running viper on mat with 1498 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1498/1498 [00:00<00:00, 6041.97it/s]


Running aucell on mat with 1498 samples and 6277 targets for 33 sources.


100%|██████████| 1498/1498 [00:01<00:00, 1090.52it/s]


Running ulm on mat with 1498 samples and 6277 targets for 33 sources.
Running viper on mat with 1498 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1498/1498 [00:00<00:00, 6721.62it/s]


Running aucell on mat with 1498 samples and 6277 targets for 33 sources.


100%|██████████| 1498/1498 [00:01<00:00, 1246.48it/s]


Running ulm on mat with 2996 samples and 6277 targets for 33 sources.
Running viper on mat with 2996 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 2996/2996 [00:00<00:00, 8583.11it/s]


Running aucell on mat with 2996 samples and 6277 targets for 33 sources.


100%|██████████| 2996/2996 [00:02<00:00, 1239.13it/s]


Running ulm on mat with 1498 samples and 6277 targets for 33 sources.
Running viper on mat with 1498 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1498/1498 [00:00<00:00, 6532.44it/s]


Running aucell on mat with 1498 samples and 6277 targets for 33 sources.


100%|██████████| 1498/1498 [00:01<00:00, 1347.50it/s]


Running ulm on mat with 1498 samples and 6277 targets for 33 sources.
Running viper on mat with 1498 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1498/1498 [00:00<00:00, 8823.52it/s]


Running aucell on mat with 1498 samples and 6277 targets for 33 sources.


100%|██████████| 1498/1498 [00:01<00:00, 940.70it/s]


Running ulm on mat with 2996 samples and 6277 targets for 33 sources.
Running viper on mat with 2996 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 2996/2996 [00:00<00:00, 8695.14it/s]


Running aucell on mat with 2996 samples and 6277 targets for 33 sources.


100%|██████████| 2996/2996 [00:02<00:00, 1135.09it/s]


Running ulm on mat with 1498 samples and 6277 targets for 33 sources.
Running viper on mat with 1498 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1498/1498 [00:00<00:00, 6656.81it/s]


Running aucell on mat with 1498 samples and 6277 targets for 33 sources.


100%|██████████| 1498/1498 [00:01<00:00, 956.20it/s]


Running ulm on mat with 1498 samples and 6277 targets for 33 sources.
Running viper on mat with 1498 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1498/1498 [00:00<00:00, 6532.69it/s]


Running aucell on mat with 1498 samples and 6277 targets for 33 sources.


100%|██████████| 1498/1498 [00:01<00:00, 1302.94it/s]


Running ulm on mat with 2996 samples and 6277 targets for 33 sources.
Running viper on mat with 2996 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 2996/2996 [00:00<00:00, 7712.73it/s]


Running aucell on mat with 2996 samples and 6277 targets for 33 sources.


100%|██████████| 2996/2996 [00:02<00:00, 1135.49it/s]


Running ulm on mat with 1498 samples and 6277 targets for 33 sources.
Running viper on mat with 1498 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1498/1498 [00:02<00:00, 602.88it/s]


Running aucell on mat with 1498 samples and 6277 targets for 33 sources.


100%|██████████| 1498/1498 [00:02<00:00, 632.72it/s]


Running ulm on mat with 1498 samples and 6277 targets for 33 sources.
Running viper on mat with 1498 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 1498/1498 [00:02<00:00, 540.90it/s]


Running aucell on mat with 1498 samples and 6277 targets for 33 sources.


100%|██████████| 1498/1498 [00:02<00:00, 688.35it/s]


Running ulm on mat with 2996 samples and 6277 targets for 33 sources.
Running viper on mat with 2996 samples and 6277 targets for 33 sources.
Infering activities on 1 batches.
Computing pleiotropy correction.


100%|██████████| 2996/2996 [00:05<00:00, 558.35it/s]


Running aucell on mat with 2996 samples and 6277 targets for 33 sources.


100%|██████████| 2996/2996 [00:04<00:00, 697.46it/s]


# Write metadata

In [11]:
baseline_metadata.head()

Unnamed: 0,grn_name,dataset,method,cells,genes,normalization,genotype,expression_path,ulm_path,viper_path,aucell_path
0,igvf_b01_LeftCortex_expression_baseline_balanc...,igvf_b01_LeftCortex,expression_baseline,balanced_genotype_microglia,0.05,raw,B6J,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
1,igvf_b01_LeftCortex_expression_baseline_balanc...,igvf_b01_LeftCortex,expression_baseline,balanced_genotype_microglia,0.05,raw,CASTJ,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
2,igvf_b01_LeftCortex_expression_baseline_balanc...,igvf_b01_LeftCortex,expression_baseline,balanced_genotype_microglia,0.05,raw,both,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
3,igvf_b01_LeftCortex_expression_baseline_balanc...,igvf_b01_LeftCortex,expression_baseline,balanced_genotype_microglia,0.05,log1p_cp10k,B6J,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
4,igvf_b01_LeftCortex_expression_baseline_balanc...,igvf_b01_LeftCortex,expression_baseline,balanced_genotype_microglia,0.05,log1p_cp10k,CASTJ,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...


In [12]:
baseline_metadata.to_csv(os.path.join(results_dir, dataset_name, in_date, "cleaned_baseline_metadata.tsv"), sep="\t", index=False)

In [16]:
df = pd.read_csv("/cellar/users/aklie/projects/igvf/topic_grn_links/eval/results/meta_analysis/03Sep23/cleaned_grn_metadata.tsv", index_col=0, sep="\t")
df

Unnamed: 0,grn_name,dataset,method,cells,genes,normalization,genotype,scale_free_topology_score,mean_connectivity,median_connectivity,max_connectivity,number_tfs,filtered_links_path,degree_centrality_out_path,eigenvector_centrality_path,betweenness_centrality_path,ulm_path,viper_path,aucell_path
0,igvf_b01_LeftCortex_aracne_balanced_genotype_m...,igvf_b01_LeftCortex,aracne,balanced_genotype_microglia,0.05,raw,B6J,0.782986,6.568144,3.0,186.0,99,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
1,igvf_b01_LeftCortex_aracne_balanced_genotype_m...,igvf_b01_LeftCortex,aracne,balanced_genotype_microglia,0.05,raw,CASTJ,0.784745,3.970117,2.0,96.0,208,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
2,igvf_b01_LeftCortex_aracne_balanced_genotype_m...,igvf_b01_LeftCortex,aracne,balanced_genotype_microglia,0.05,raw,both,0.799480,7.920792,4.0,239.0,84,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
3,igvf_b01_LeftCortex_aracne_balanced_genotype_m...,igvf_b01_LeftCortex,aracne,balanced_genotype_microglia,0.05,log1p_cp10k,B6J,0.837918,10.416667,5.0,128.0,50,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
4,igvf_b01_LeftCortex_aracne_balanced_genotype_m...,igvf_b01_LeftCortex,aracne,balanced_genotype_microglia,0.05,log1p_cp10k,CASTJ,0.835523,10.666667,5.0,135.0,59,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,Bridge_Satpathy_wgcna_balanced_genotype_microg...,Bridge_Satpathy,wgcna,balanced_genotype_microglia,0.05,log1p_cp10k,CASTJ,0.813745,9.070295,3.0,337.0,58,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
116,Bridge_Satpathy_wgcna_balanced_genotype_microg...,Bridge_Satpathy,wgcna,balanced_genotype_microglia,0.05,log1p_cp10k,both,0.833126,9.216590,3.0,336.0,51,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
117,Bridge_Satpathy_wgcna_balanced_genotype_microg...,Bridge_Satpathy,wgcna,balanced_genotype_microglia,0.05,pf_log1p_pf,B6J,0.874247,8.368201,3.0,310.0,55,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...
118,Bridge_Satpathy_wgcna_balanced_genotype_microg...,Bridge_Satpathy,wgcna,balanced_genotype_microglia,0.05,pf_log1p_pf,CASTJ,0.848092,8.097166,2.0,328.0,68,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...,/cellar/users/aklie/projects/igvf/topic_grn_li...


In [17]:
df.to_csv("/cellar/users/aklie/projects/igvf/topic_grn_links/eval/results/meta_analysis/03Sep23/cleaned_grn_metadata2.tsv", sep="\t", index=False)

660

Unnamed: 0_level_0,gene_id,gene_name,n_cells,mt,n_cells_by_counts,mean_counts,pct_dropout_by_counts,total_counts
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Xkr4,ENSMUSG00000051951.6,Xkr4,89155,False,89155,5.561202,18.411516,607694.625000
Lypla1,ENSMUSG00000025903.15,Lypla1,9408,False,9408,0.048942,91.390450,5348.098633
Gm37988,ENSMUSG00000104217.2,Gm37988,25578,False,25578,0.183453,76.592785,20046.699219
Tcea1,ENSMUSG00000033813.16,Tcea1,24118,False,24118,0.199689,77.928876,21820.904297
Rgs20,ENSMUSG00000002459.18,Rgs20,28775,False,28775,0.455780,73.667112,49804.886719
...,...,...,...,...,...,...,...,...
mt-Co2,ENSMUSG00000064354.1,mt-Co2,22589,True,22589,0.131671,79.328111,14388.107422
mt-Atp6,ENSMUSG00000064357.1,mt-Atp6,20593,True,20593,0.117913,81.154712,12884.748047
mt-Co3,ENSMUSG00000064358.1,mt-Co3,40051,True,40051,0.275835,63.348097,30141.630859
mt-Nd4,ENSMUSG00000064363.1,mt-Nd4,17880,True,17880,0.173955,83.637462,19008.792969


# DONE!

---