In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import graph_tool.all as gt
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from functions import *


# Graph-Tool compatibility
plt.switch_backend('cairo')
# Style
sns.set_theme(context='talk', style='white', palette='Set2')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# Computation

In [3]:
# Load metadata
meta = get_meta()

# Subject preview
filtered = []
for i, row in meta.iterrows():
    try:
        load_graph_by_id(row['SubID'])
        assert not np.isnan(row['nps_MoodDysCurValue'])  # Has NPS
        assert row['Sex'] == 'Female'
        assert row['Ethnicity'] != 'White'
    except:
        continue
    filtered.append(f'{row["SubID"]} {row["Ethnicity"]} {row["Sex"]}, {row["Age"]}, BRAAK {row["BRAAK_AD"]}')
for i in (2, 4, 5, 6):
    print(filtered[i])

# Parameters
column_diff = 'att_D_AD_0_1'
column_data = 'att_D_no_prior_0'
coex_diff_compare_phenotype = 'nps_PsychoAgiCurValue'
diff_data_compare_phenotype = 'nps_WtLossCurValue'
individual_subject_ids = ['M19050', 'M59593', 'M83214', 'M36634']

M19050 Hispanic Female, 74.0, BRAAK 5.0
M59593 Hispanic Female, 76.0, BRAAK 5.0
M83214 Hispanic Female, 83.0, BRAAK 6.0
M36634 Hispanic Female, 87.0, BRAAK 6.0


In [4]:
# Compute individual graphs
coex_g_individuals = [cull_isolated_leaves(compute_graph(scale_edge_coefs_list(load_graph_by_id(individual_subject_ids[i], source='coexpression'), 1./60), filter=.9)) for i in range(len(individual_subject_ids))]
diff_g_individuals = [compute_graph(load_graph_by_id(individual_subject_ids[i], column=column_diff)) for i in range(len(individual_subject_ids))]
data_g_individuals = [compute_graph(load_graph_by_id(individual_subject_ids[i], column=column_data)) for i in range(len(individual_subject_ids))]

# Plots

### Coex Individual Trio Comparison

In [5]:
# Choose three graphs
graphs = coex_g_individuals[:3]
graphs_subject_ids = individual_subject_ids[:3]

# Create figure
fig, axs = get_mosaic([list(range(len(graphs)+1))], scale=9)

# Compute edge summaries
df, concatenated_graph = compute_edge_summary(graphs=graphs, subject_ids=graphs_subject_ids)

# Show individual graph comparisons
g = plot_graph_comparison(graphs, concatenated_graph=concatenated_graph, axs=axs, subject_ids=graphs_subject_ids)

# Show edge summary
plot_edge_summary(graphs, df=df, ax=axs[len(graphs)], subject_ids=graphs_subject_ids)

# Save figure
plt.tight_layout()
fig.savefig(f'../plots/CoexIndividualTrioComparison.pdf', format='pdf', transparent=True, backend='cairo')

Removing duplicate edges...


100%|██████████████████████████████████████████████████████████████████████████| 14459/14459 [00:00<00:00, 20642.26it/s]


Collecting edges...


100%|██████████████████████████████████████████████████████████████████████████| 11096/11096 [00:00<00:00, 54996.41it/s]


Calculating positions...


  plt.tight_layout()


### Individual Trio Comparison

In [6]:
# Choose three graphs
graphs = data_g_individuals[:3]
graphs_subject_ids = individual_subject_ids[:3]

# Create figure
fig, axs = get_mosaic([list(range(len(graphs)+1))], scale=9)

# Compute edge summaries
df, concatenated_graph = compute_edge_summary(graphs=graphs, subject_ids=graphs_subject_ids)

# Show individual graph comparisons
plot_graph_comparison(graphs, concatenated_graph=concatenated_graph, axs=axs, subject_ids=graphs_subject_ids)

# Show edge summary
plot_edge_summary(graphs, df=df, ax=axs[len(graphs)], subject_ids=graphs_subject_ids)

# Save figure
plt.tight_layout()
fig.savefig(f'../plots/IndividualTrioComparison.pdf', format='pdf', transparent=True, backend='cairo')

Removing duplicate edges...


100%|████████████████████████████████████████████████████████████████████████████| 3100/3100 [00:00<00:00, 76408.41it/s]


Collecting edges...


100%|████████████████████████████████████████████████████████████████████████████| 2200/2200 [00:00<00:00, 57177.45it/s]


Calculating positions...


  plt.tight_layout()


### Aggregate Trio Comparison

In [7]:
# Parameters
contrast = 'c01x'
column = column_diff

# Create figure
fig, axs = get_mosaic([list(range(len(get_contrast(contrast))+1))], scale=9)

# Compute aggregate edge summaries
contrast_group = compute_aggregate_edge_summary(get_contrast(contrast), column=column_diff)

# Plot graph comparison
plot_graph_comparison(
    graphs=[v for k, v in contrast_group[0].items()],
    subject_ids=[k for k, v in contrast_group[1].items()],
    axs=[axs[i] for i in range(len(get_contrast(contrast)))])

# Plot edge summary for subgroups
plot_aggregate_edge_summary(ax=axs[len(get_contrast(contrast))], contrast=contrast_group)

# Save figure
plt.tight_layout()
fig.savefig(f'../plots/AggregateTrioComparison.pdf', format='pdf', transparent=True, backend='cairo')

Removing duplicate edges...


100%|█████████████████████████████████████████████████████████████████████████| 187003/187003 [00:29<00:00, 6364.90it/s]


Removing duplicate edges...


100%|██████████████████████████████████████████████████████████████████████████| 63192/63192 [00:05<00:00, 12598.02it/s]


Removing duplicate edges...


100%|███████████████████████████████████████████████████████████████████████████| 74862/74862 [00:20<00:00, 3677.98it/s]


Calculating positions...
Collecting edges...


100%|██████████████████████████████████████████████████████████████████████████| 50922/50922 [00:03<00:00, 15248.42it/s]


Collecting edges...


100%|██████████████████████████████████████████████████████████████████████████| 23940/23940 [00:00<00:00, 30584.85it/s]
  plt.tight_layout()


##### Linkage Analysis

In [11]:
# Record edge instances
# df = pd.DataFrame(columns=['Edge', 'Subgroup', 'Count'])
df = {k: [] for k in ['Edge', 'Subgroup', 'Count']}
for subgroup in contrast_group[0]:
    g = contrast_group[0][subgroup]
    for e in tqdm(g.edges(), total=g.num_edges()):
        coefs = g.ep.coefs[e]
        row = [get_edge_string(g, e), subgroup, sum([c!=0 for c in coefs])]
        # df.loc[df.shape[0]] = row  # Slow
        for k, v in zip(df, row):
            df[k].append(v)
df = pd.DataFrame(df)

# Get edge counts
count_table = df.pivot(index='Edge', columns='Subgroup', values='Count')
count_table = count_table.fillna(0)
# Max scale for fairness
for subgroup in contrast_group[0]:
    count_table[subgroup] /= count_table[subgroup].max()
# Compute differences
# TODO: REVISE DIFFERENCE METRIC
count_table['Difference'] = count_table['AD'] - count_table['Control']
count_table['Range'] = count_table.max(axis=1) - count_table.min(axis=1)

# Get list of linkages by significance
open(f'../plots/AggregateTrioComparisonList.txt', 'w').close()
for i in np.unique(count_table['Difference'])[::-1]:
    condition = (count_table['Difference'] == i)
    significant_edges = list(count_table.loc[condition].index)
    synthetic_genes = np.concatenate([detect_synthetic_vertices_graph(contrast_group[0][subgroup]) for subgroup in contrast_group[0]])
    try: significant_genes = np.concatenate([e.split('--') for e in significant_edges])
    except: significant_genes = []
    significant_genes = np.unique([g for g in significant_genes if g not in synthetic_genes])

    # Print significant genes
    if len(significant_genes) > 0:
        with open(f'../plots/AggregateTrioComparisonList.txt', 'a') as f:
            print(f'--- {i} ---', file=f)
            for g in significant_genes:
                print(g, file=f)
            print(file=f)

100%|██████████████████████████████████████████████████████████████████████████| 50922/50922 [00:01<00:00, 29009.67it/s]
100%|██████████████████████████████████████████████████████████████████████████| 23940/23940 [00:00<00:00, 51391.97it/s]


### Differentially Expressed Edges

In [None]:
# # TODO: Fix nodes cutting off
# # Plot total and subplots for aggregate differences
# for prefix, individuals in zip(('diff', 'data'), (diff_g_individuals, data_g_individuals)):
#     plt.clf()
#     concat = concatenate_graphs(*individuals)
#     concat = get_intersection(concat)
#     concat = cull_isolated_leaves(concat)
#     concat = remove_text_by_centrality(concat)
#     concat = color_by_significance(concat)
#     visualize_graph(concat)
#     plt.gca().axis('off')
#     plt.tight_layout()
#     plt.savefig(f'../plots/{prefix}_concat.pdf', format='pdf', transparent=True, backend='cairo')

#     # Show all subsets of graph by cell type
#     for v_name in detect_synthetic_vertices_graph(concat):
#         plt.clf()
#         subset = subset_by_hub(concat, [v_name])
#         visualize_graph(subset)
#         plt.gca().axis('off')
#         plt.tight_layout()
#         plt.savefig(f'../plots/{prefix}_concat_{v_name}.pdf', format='pdf', transparent=True, backend='cairo')

### Enrichment

In [None]:
# from functions.plotting import plot_enrichment

# # Generate fake enrichment data
# import itertools
# # Get columns
# cell_type = ['OPC', 'Micro', 'Oligo', 'Endo']
# disease = ['Alzheimer\'s Disease', 'Schizophrenia', 'Bipolar Disorder', 'A', 'B', 'C', 'D', 'E', 'F', 'G']
# combined = [val for val in itertools.product(cell_type, disease)]
# cell_type = [val[0] for val in combined]
# disease = [val[1] for val in combined]
# # Get significance
# np.random.seed(42)
# significance = np.exp(-8 * np.random.rand(len(combined)))
# # Combine
# df = pd.DataFrame({'cell_type': cell_type, 'disease': disease, 'significance': significance})
# df = df.loc[df['significance'] < 5e-2]

# # Rename
# df = df.rename(columns={'cell_type': 'Cell Type', 'disease': 'Disease'})
# # Add significance scale
# df['-log10(p)'] = -np.log10(df['significance'])
# plt.clf()
# plot_enrichment(df)
# plt.tight_layout()
# plt.savefig(f'../plots/enrichment.pdf', format='pdf', transparent=True, backend='cairo')

### Individual Edge Comparison

In [None]:
# # NOTE: Manually annotate individual points/genes of interest
# for prefix, individuals in zip(('diff', 'data'), (diff_g_individuals, data_g_individuals)):
#     # Get graph
#     sample_ids = individual_sample_ids[:len(individuals)]
#     g = concatenate_graphs(*individuals)
#     g = get_intersection(g)
#     g = cull_isolated_leaves(g)

#     plt.clf()
#     df = plot_individual_edge_comparison(g, sample_ids)
#     plt.tight_layout()
#     plt.savefig(f'../plots/{prefix}_edge_comparison.pdf', format='pdf', transparent=True, backend='cairo')

### Legend

In [None]:
# Plot legend
plt.clf()
plot_legend()
plt.gca().axis('off')
plt.tight_layout()
plt.savefig(f'../plots/legend.pdf', format='pdf', transparent=True, backend='cairo')

  plt.tight_layout()
