In [1]:
from collections import defaultdict
from itertools import product
import math

import numpy as np
import pandas as pd

In [2]:
# Load data
print('Loading Metadata...')
data_folder = 'data/PsychAD_freeze2_personalized_grpahs/'
meta = pd.read_csv(data_folder + 'syn26527784_latest.csv')

Loading Metadata...


In [3]:
# Utility functions
# Defaultdict with depth
def deepdefaultdict(depth, default=None):
    if depth == 0:
        return None
    return defaultdict(lambda: deepdefaultdict(depth-1, default=default))

# Prune zero-length array entries from d=1 dict
def prunedict(dic):
    newdic = {}
    for k in dic:
        if len(dic[k]) > 0:
            newdic[k] = dic[k]
    return newdic

# Convert to dict
def dd2d(dd):
    if type(dd) == type(defaultdict()):
        for k in dd:
            dd[k] = dd2d(dd[k])
        return(dict(dd))
    return dd

In [4]:
# Hyperparameters
stratify_cols = ['BRAAK_AD', 'Sex']  # BRAAK_AD, Sex
stratify_cols.sort()
cell_type = 'Mural'  # EN Endo Glial Immune IN Mural None
cell_type_top_regulons = 20
edge_percentile = 0
edge_present_pct = 99

In [5]:
# Stratify by column
print('Gathering...')
graph_ids = {}
unique_vals = [np.unique(meta[col]) for col in stratify_cols]
for vals in product(*unique_vals):
    print('\t'.join([f'{col}: {val}' for col, val in zip(stratify_cols, vals)]), end='')
    current_filter = np.array([True for _ in range(meta.shape[0])])
    for col, val in zip(stratify_cols, vals):
        current_filter *= (meta[col] == val)
    graph_ids[vals] = list(meta.loc[current_filter]['SubID'])
    print(f'\t({len(graph_ids[vals])} IDs)')
graph_ids = prunedict(graph_ids)

Gathering...
BRAAK_AD: 0.0	Sex: Female	(58 IDs)
BRAAK_AD: 0.0	Sex: Male	(128 IDs)
BRAAK_AD: 1.0	Sex: Female	(64 IDs)
BRAAK_AD: 1.0	Sex: Male	(65 IDs)
BRAAK_AD: 2.0	Sex: Female	(105 IDs)
BRAAK_AD: 2.0	Sex: Male	(82 IDs)
BRAAK_AD: 3.0	Sex: Female	(104 IDs)
BRAAK_AD: 3.0	Sex: Male	(80 IDs)
BRAAK_AD: 4.0	Sex: Female	(86 IDs)
BRAAK_AD: 4.0	Sex: Male	(52 IDs)
BRAAK_AD: 5.0	Sex: Female	(108 IDs)
BRAAK_AD: 5.0	Sex: Male	(44 IDs)
BRAAK_AD: 6.0	Sex: Female	(253 IDs)
BRAAK_AD: 6.0	Sex: Male	(153 IDs)
BRAAK_AD: nan	Sex: Female	(0 IDs)
BRAAK_AD: nan	Sex: Male	(0 IDs)


In [6]:
# Aggregate graphs
print('Processing...')
processed_graphs = {}
num_graphs = defaultdict(lambda: 0)
for k, v in graph_ids.items():
    print('\t'.join([f'{col}: {val}' for col, val in zip(stratify_cols, k)]), end='')
    running_graph = defaultdict(lambda: defaultdict(list))
    for graph_id in v:
        # Load individual graph
        try:
            graph = pd.read_csv(data_folder + f'regulon_grn/{graph_id}_regulon_list.csv')[['TF', 'gene', 'CoexWeight', 'regulon']]
            graph = graph.rename(columns={'gene': 'TG', 'CoexWeight': 'coef'})  # TF, TG, coef, regulon
        except:
            continue
        # Filter to regulons based on cell-type
        if cell_type is not None:
            try:
                rss = pd.read_csv(data_folder + f'rss/{graph_id}_6_celltype_rss.csv', index_col=0)
                assert cell_type in rss.index
                # Top x regulons
                regulons = rss.loc[cell_type].nlargest(cell_type_top_regulons).index
                graph = graph.loc[np.isin(np.array(graph['regulon']), regulons)]
            except:
                continue
            # Top x percent of coefs
            graph = graph.loc[np.array(graph['coef']) > np.percentile(graph['coef'], edge_percentile)]
        graph = graph[['TF', 'TG', 'coef']]
        num_graphs[k] += 1
        for _, row in graph.iterrows():
            tf, tg, coef = row
            running_graph[tf][tg].append(coef)
    print(f'\t({num_graphs[k]} Graphs)')
    processed_graphs[k] = dd2d(running_graph)
num_graphs = dict(num_graphs)

Processing...
BRAAK_AD: 0.0	Sex: Female	(9 Graphs)
BRAAK_AD: 0.0	Sex: Male	(42 Graphs)
BRAAK_AD: 1.0	Sex: Female	(10 Graphs)
BRAAK_AD: 1.0	Sex: Male	(14 Graphs)
BRAAK_AD: 2.0	Sex: Female	(9 Graphs)
BRAAK_AD: 2.0	Sex: Male	(8 Graphs)
BRAAK_AD: 3.0	Sex: Female	(1 Graphs)
BRAAK_AD: 3.0	Sex: Male	(7 Graphs)
BRAAK_AD: 4.0	Sex: Female	(6 Graphs)
BRAAK_AD: 4.0	Sex: Male	(3 Graphs)
BRAAK_AD: 5.0	Sex: Female	(19 Graphs)
BRAAK_AD: 5.0	Sex: Male	(6 Graphs)
BRAAK_AD: 6.0	Sex: Female	(91 Graphs)
BRAAK_AD: 6.0	Sex: Male	(57 Graphs)


In [7]:
# Keep edges which have some references in common
print('Filtering...')
edgelists = {}
for k0, v0 in processed_graphs.items():
    print('\t'.join([f'{col}: {val}' for col, val in zip(stratify_cols, k0)]), end='')
    # Calculate min edges
    edge_counts = []
    for k1, v1 in v0.items():
        for k2, v2 in v1.items():
            edge_counts.append(len(v2))
    min_edges = np.percentile(edge_counts, edge_present_pct)
    edges = []
    num_edges = 0
    for k1, v1 in v0.items():
        for k2, v2 in v1.items():
            if len(v2) >= min_edges:
                edges.append([k1, k2, np.mean(v2)])
                num_edges += 1
    print(f'\t({num_edges} Edges)')
    edgelists[k0] = edges
edgelists = prunedict(edgelists)

Filtering...
BRAAK_AD: 0.0	Sex: Female	(210 Edges)
BRAAK_AD: 0.0	Sex: Male	(724 Edges)
BRAAK_AD: 1.0	Sex: Female	(191 Edges)
BRAAK_AD: 1.0	Sex: Male	(373 Edges)
BRAAK_AD: 2.0	Sex: Female	(412 Edges)
BRAAK_AD: 2.0	Sex: Male	(814 Edges)
BRAAK_AD: 3.0	Sex: Female	(1359 Edges)
BRAAK_AD: 3.0	Sex: Male	(426 Edges)
BRAAK_AD: 4.0	Sex: Female	(564 Edges)
BRAAK_AD: 4.0	Sex: Male	(482 Edges)
BRAAK_AD: 5.0	Sex: Female	(538 Edges)
BRAAK_AD: 5.0	Sex: Male	(178 Edges)
BRAAK_AD: 6.0	Sex: Female	(1393 Edges)
BRAAK_AD: 6.0	Sex: Male	(841 Edges)


In [8]:
print('Saving...')
for k in edgelists:
    print('\t'.join([f'{col}: {val}' for col, val in zip(stratify_cols, k)]))
    pd.DataFrame(edgelists[k], columns=['TF', 'TG', 'coef']).to_csv(
        data_folder
        + f'processed/grn_{f"{cell_type}_" if cell_type is not None else ""}'
        f'{"_".join([f"{col}_{val}" for col, val in zip(stratify_cols, k)])}.csv')

Saving...
BRAAK_AD: 0.0	Sex: Female
BRAAK_AD: 0.0	Sex: Male
BRAAK_AD: 1.0	Sex: Female
BRAAK_AD: 1.0	Sex: Male
BRAAK_AD: 2.0	Sex: Female
BRAAK_AD: 2.0	Sex: Male
BRAAK_AD: 3.0	Sex: Female
BRAAK_AD: 3.0	Sex: Male
BRAAK_AD: 4.0	Sex: Female
BRAAK_AD: 4.0	Sex: Male
BRAAK_AD: 5.0	Sex: Female
BRAAK_AD: 5.0	Sex: Male
BRAAK_AD: 6.0	Sex: Female
BRAAK_AD: 6.0	Sex: Male
