## Process SCENIC results

In [None]:
import scanpy as sc
import loompy as lp
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import random
import numpy as np
from math import sqrt, ceil, floor
import seaborn as sns
import glob
import subprocess
import os
import sys
import pickle as pkl
import json
import zlib
import base64
from datetime import datetime
import warnings

# for white background of figures (only for docs rendering)
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
%config InlineBackend.figure_format='retina'

#hpc figures
%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" # print multiple outputs per code cell (not just last)
seed = 250
def set_seed(seed=int): # Set seed
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    print('Seed set to', seed)

set_seed(seed)

In [93]:
os.mkdir("/scratch/gent/vo/000/gvo00027/projects/CBIGR/21HPP_GRN_neuroinfl/singlecell/SCENIC_rebuttal_VZZ/Sample_Edges")

### MDD

In [None]:
for loom_path in glob.glob("/scratch/gent/vo/000/gvo00027/projects/CBIGR/21HPP_GRN_neuroinfl/singlecell/SCENIC_rebuttal_VZZ/MDD_*") :
    sample = loom_path.split("/")[-1]
    
    if sample.startswith("MDD_female_") or sample.startswith("MDD_M"): 
        continue
    print("Processing sample", sample)

    # Open connection to loom
    lf = lp.connect( f"{loom_path}/scenic/{sample}/SCENIC_output_{sample}.loom", mode='r', validate=False)

    # Create edge list from incidence matrix
    incmat = pd.DataFrame(lf.ra.MotifRegulons,index=lf.ra.Gene).T
    edge_list = incmat.stack().reset_index()

    # Filtering edges where the value is 1
    edge_list = edge_list[edge_list[0] == 1].drop(columns=[0])

    # Renaming columns
    edge_list.columns = ["TF", "target"]

    # Renaming TFs to allow merging with weights
    new_tfnames = [i.split("_")[0] for i in edge_list.TF.values]
    edge_list["TF"] = new_tfnames

    # Load weight list
    weights = pd.read_csv(f"{loom_path}/scenic/{sample}/arboreto_with_multiprocessing/{sample}__adj.tsv", sep="\t")

    # Normalize importance scores by calculating 1/rank
    weights["rank_inverse"] = 1 / np.abs( np.argsort( weights.importance.values ) - len( weights.importance.values ) )

    # Merge
    edge_list = edge_list.merge(weights, on=["TF", "target"], how="left")

    # Sort by normalized importance
    edge_list.sort_values("rank_inverse", ascending=False, inplace=True)

    # Remove self-edges and save
    edge_list.loc[ ~ edge_list["importance"].isna() ].to_csv(f"/scratch/gent/vo/000/gvo00027/projects/CBIGR/21HPP_GRN_neuroinfl/singlecell/SCENIC_rebuttal_VZZ/Sample_Edges/{sample}_finaledges.csv")

### AD

In [None]:
for loom_path in glob.glob("/scratch/gent/vo/000/gvo00027/projects/CBIGR/21HPP_GRN_neuroinfl/singlecell/SCENIC_rebuttal_VZZ/AD_*") :
    sample = loom_path.split("/")[-1]
    
    print("Processing sample", sample)

    # Open connection to loom
    lf = lp.connect( f"{loom_path}/scenic/{sample}/SCENIC_output_{sample}.loom", mode='r', validate=False)

    # Create edge list from incidence matrix
    incmat = pd.DataFrame(lf.ra.MotifRegulons,index=lf.ra.Gene).T
    edge_list = incmat.stack().reset_index()

    # Filtering edges where the value is 1
    edge_list = edge_list[edge_list[0] == 1].drop(columns=[0])

    # Renaming columns
    edge_list.columns = ["TF", "target"]

    # Renaming TFs to allow merging with weights
    new_tfnames = [i.split("_")[0] for i in edge_list.TF.values]
    edge_list["TF"] = new_tfnames

    # Load weight list
    weights = pd.read_csv(f"{loom_path}/scenic/{sample}/arboreto_with_multiprocessing/{sample}__adj.tsv", sep="\t")

    # Normalize importance scores by calculating 1/rank
    weights["rank_inverse"] = 1 / np.abs( np.argsort( weights.importance.values ) - len( weights.importance.values ) )

    # Merge
    edge_list = edge_list.merge(weights, on=["TF", "target"], how="left")

    # Sort by normalized importance
    edge_list.sort_values("rank_inverse", ascending=False, inplace=True)

    # Remove self-edges and save
    edge_list.loc[ ~ edge_list["importance"].isna() ].to_csv(f"/scratch/gent/vo/000/gvo00027/projects/CBIGR/21HPP_GRN_neuroinfl/singlecell/SCENIC_rebuttal_VZZ/Sample_Edges/{sample}_finaledges.csv")