In [9]:
import scanpy as sc
import os
from flecs.utils import get_project_root
import numpy as np
from operator import itemgetter
import pandas as pd

## Load

In [10]:
adata = sc.read_h5ad(os.path.join(get_project_root(), "datasets", "PerturbSeq", "processed", "adata_processed_with_all_pseudotime_paths.h5ad"))

In [11]:
gene_name_list = list(adata.var.index)

## Genie3

source: https://github.com/vahuynh/GENIE3/blob/master/GENIE3_python/GENIE3.py 

In [12]:
def get_link_list(VIM):
        
    ngenes = VIM.shape[0]
    input_idx = range(ngenes)

    # Get the non-ranked list of regulatory links
    vInter = [(i,j,score) for (i,j),score in np.ndenumerate(VIM) if i in input_idx and i!=j]
    
    # Rank the list according to the weights of the edges        
    vInter_sort = sorted(vInter,key=itemgetter(2),reverse=True)
    nInter = len(vInter_sort)

    return vInter_sort


In [13]:
def extract_most_affected_genes(link_list, target_len):

    n_top_edges = target_len // 2
    genes_most_affected = [t[0] for t in link_list[:n_top_edges]] + [t[1] for t in link_list[:n_top_edges]]
    while len(np.unique(genes_most_affected)) < target_len:
        n_top_edges += 1
        genes_most_affected = [t[0] for t in link_list[:n_top_edges]] + [t[1] for t in link_list[:n_top_edges]]

    genes_most_affected = list(pd.DataFrame(genes_most_affected).value_counts().index)[:target_len]
    genes_most_affected = [g[0] for g in genes_most_affected]

    return genes_most_affected

In [14]:
def save_most_affected_genes(res, file_name):
    link_list = get_link_list(res)
    most_affected_gene_idx = extract_most_affected_genes(link_list, 33)

    # Retrieve gene names
    most_affected_gene_names = [gene_name_list[idx] for idx in most_affected_gene_idx]

    # Save
    with open(file_name, 'w') as f:
        for gene in most_affected_gene_names:
            f.write(f"{gene}\n")

## Evaluation

In [15]:
KO_genes = ['ARGLU1', 'ASF1B', 'MSN', 'DNAJC3', 'MLLT6', 'STK38L', 'HSPG2', 'CHPF', 'TBX1', 'BUB1', 'KIF11', 'RCN1']

In [None]:
for ko in KO_genes:
    grn_res_ko = np.load(os.path.join(get_project_root(), "figure4", "processed", "genie3_grn_res_" + ko + ".npy"))
    grn_res_unperturbed = np.load(os.path.join(get_project_root(), "figure4", "processed", "genie3_grn_res_non-targeting.npy"))
    affected_ko_adj_mat = np.abs(grn_res_ko - grn_res_unperturbed)

    save_most_affected_genes(affected_ko_adj_mat, os.path.join(get_project_root(), "figure4", "logs", 
                                                               "genie3_grn_affected_genes_" + ko + ".txt"))

In [16]:
for ko in KO_genes:
    grn_res_ko = np.load(os.path.join(get_project_root(), "figure4", "processed", "genie3_res_" + ko + ".npy"))
    grn_res_unperturbed = np.load(os.path.join(get_project_root(), "figure4", "processed", "genie3_res_non-targeting.npy"))
    affected_ko_adj_mat = np.abs(grn_res_ko - grn_res_unperturbed)

    save_most_affected_genes(affected_ko_adj_mat, os.path.join(get_project_root(), "figure4", "logs", "genie3_affected_genes_" + ko + ".txt"))