In [1]:
import scanpy as sc
import os
from flecs.utils import get_project_root
import numpy as np
from operator import itemgetter
import pandas as pd

## Load

In [2]:
adata = sc.read_h5ad(os.path.join(get_project_root(), "figure2", "processed", "adata_processed_with_paths_magic.h5ad"))



In [3]:
gene_name_list = list(adata.var.index)

## Genie3

source: https://github.com/vahuynh/GENIE3/blob/master/GENIE3_python/GENIE3.py 

In [4]:
def get_link_list(VIM):
        
    ngenes = VIM.shape[0]
    input_idx = range(ngenes)

    # Get the non-ranked list of regulatory links
    vInter = [(i,j,score) for (i,j),score in np.ndenumerate(VIM) if i in input_idx and i!=j]
    
    # Rank the list according to the weights of the edges        
    vInter_sort = sorted(vInter,key=itemgetter(2),reverse=True)
    nInter = len(vInter_sort)

    return vInter_sort


In [5]:
def extract_most_affected_genes(link_list, target_len):

    n_top_edges = target_len // 2
    genes_most_affected = [t[0] for t in link_list[:n_top_edges]] + [t[1] for t in link_list[:n_top_edges]]
    while len(np.unique(genes_most_affected)) < target_len:
        n_top_edges += 1
        genes_most_affected = [t[0] for t in link_list[:n_top_edges]] + [t[1] for t in link_list[:n_top_edges]]

    genes_most_affected = list(pd.DataFrame(genes_most_affected).value_counts().index)[:target_len]
    genes_most_affected = [g[0] for g in genes_most_affected]

    return genes_most_affected

In [6]:
def save_most_affected_genes(res, file_name):
    link_list = get_link_list(res)
    most_affected_gene_idx = extract_most_affected_genes(link_list, 33)

    # Retrieve gene names
    most_affected_gene_names = [gene_name_list[idx] for idx in most_affected_gene_idx]

    # Save
    with open(file_name, 'w') as f:
        for gene in most_affected_gene_names:
            f.write(f"{gene}\n")

## Evaluation

In [7]:
genie3_res_unsorted = np.load(os.path.join(get_project_root(), "figure2", "processed", "genie3_res_unsorted.npy"))
genie3_res_cebpa = np.load(os.path.join(get_project_root(), "figure2", "processed", "genie3_res_cebpa.npy"))
genie3_res_cebpe = np.load(os.path.join(get_project_root(), "figure2", "processed", "genie3_res_cebpe.npy"))


In [8]:
save_most_affected_genes(genie3_res_unsorted, 'genie3_affected_genes_unsorted.txt')
save_most_affected_genes(genie3_res_cebpa, 'genie3_affected_genes_cebpa.txt')
save_most_affected_genes(genie3_res_cebpe, 'genie3_affected_genes_cebpe.txt')

In [8]:
genie3_grn_res_unsorted = np.load(os.path.join(get_project_root(), "figure2", "processed", "genie3_grn_res_unsorted.npy"))
genie3_grn_res_cebpa = np.load(os.path.join(get_project_root(), "figure2", "processed", "genie3_grn_res_cebpa.npy"))
genie3_grn_res_cebpe = np.load(os.path.join(get_project_root(), "figure2", "processed", "genie3_grn_res_cebpe.npy"))

In [10]:
save_most_affected_genes(genie3_grn_res_unsorted, 'genie3_grn_affected_genes_unsorted.txt')
save_most_affected_genes(genie3_grn_res_cebpa, 'genie3_grn_affected_genes_cebpa.txt')
save_most_affected_genes(genie3_grn_res_cebpe, 'genie3_grn_affected_genes_cebpe.txt')

## Differences

In [9]:
affected_cebpa_adj_mat = np.abs(genie3_res_unsorted - genie3_res_cebpa)
affected_cebpe_adj_mat = np.abs(genie3_res_unsorted - genie3_res_cebpe)

In [10]:
save_most_affected_genes(affected_cebpa_adj_mat, 'genie3_difference_affected_genes_cebpa.txt')
save_most_affected_genes(affected_cebpe_adj_mat, 'genie3_difference_affected_genes_cebpe.txt')

In [11]:
affected_cebpa_grn_adj_mat = np.abs(genie3_grn_res_unsorted - genie3_grn_res_cebpa)
affected_cebpe_grn_adj_mat = np.abs(genie3_grn_res_unsorted - genie3_grn_res_cebpe)

In [12]:
save_most_affected_genes(affected_cebpa_grn_adj_mat, 'genie3_difference_grn_affected_genes_cebpa.txt')
save_most_affected_genes(affected_cebpe_grn_adj_mat, 'genie3_difference_grn_affected_genes_cebpe.txt')