In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import graph_tool.all as gt
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from functions import *


# Graph-Tool compatibility
plt.switch_backend('cairo')
# Style
sns.set_theme(context='talk', style='white', palette='Set2')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# Computation

In [3]:
# Load metadata
meta = get_meta()

# Subject preview
filtered = []
for i, row in meta.iterrows():
    try:
        load_graph_by_id(row['SubID'])
        assert not np.isnan(row['nps_MoodDysCurValue'])  # Has NPS
        assert row['Sex'] == 'Female'
        assert row['Ethnicity'] != 'White'
    except:
        continue
    filtered.append(f'{row["SubID"]} {row["Ethnicity"]} {row["Sex"]}, {row["Age"]}, BRAAK {row["BRAAK_AD"]}')
for i in (2, 4, 5, 6):
    print(filtered[i])

# Parameters
column_diff = 'att_D_AD_0_1'
column_data = 'att_D_no_prior_0'
coex_diff_compare_phenotype = 'nps_PsychoAgiCurValue'
diff_data_compare_phenotype = 'nps_WtLossCurValue'
individual_sample_ids = ['M19050', 'M59593', 'M83214', 'M36634']

M19050 Hispanic Female, 74.0, BRAAK 5.0
M59593 Hispanic Female, 76.0, BRAAK 5.0
M83214 Hispanic Female, 83.0, BRAAK 6.0
M36634 Hispanic Female, 87.0, BRAAK 6.0


In [4]:
# Compute summaries
diff_graph_summary_coex = compute_statistics(meta, 'BRAAK_AD', coex_diff_compare_phenotype, source='coexpression')
diff_graph_summary_att = compute_statistics(meta, 'BRAAK_AD', coex_diff_compare_phenotype, column=column_diff)
diff_graph_summary = compute_statistics(meta, 'BRAAK_AD', diff_data_compare_phenotype, column=column_diff)
data_graph_summary = compute_statistics(meta, 'BRAAK_AD', diff_data_compare_phenotype, column=column_data)

100%|███████████████████████████████████████████████████████████████████████████████████| 24/24 [01:54<00:00,  4.77s/it]
100%|███████████████████████████████████████████████████████████████████████████████████| 24/24 [00:01<00:00, 13.10it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 24/24 [00:01<00:00, 13.52it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 24/24 [00:01<00:00, 13.34it/s]


In [5]:
# Compute individual graphs
diff_g_individuals = [compute_graph(load_graph_by_id(individual_sample_ids[i], column=column_diff)) for i in range(2)]  # Comparison between first two on diffusion head
data_g_individuals = [compute_graph(load_graph_by_id(individual_sample_ids[i], column=column_data)) for i in range(len(individual_sample_ids))]

# Plotting

## Differentially Expressed Edges

In [6]:
# Plot total and subplots for aggregate differences
for prefix, individuals in zip(('diff', 'data'), (diff_g_individuals, data_g_individuals)):
    plt.clf()
    concat = concatenate_graphs(*individuals)
    concat = get_intersection(concat)
    concat = cull_isolated_leaves(concat)
    concat = remove_text_by_centrality(concat)
    concat = color_by_significance(concat)
    visualize_graph(concat)
    plt.gca().axis('off')
    plt.tight_layout()
    plt.savefig(f'../plots/{prefix}_concat.pdf', format='pdf', transparent=True, backend='cairo')

    # Show all subsets of graph by cell type
    for v_name in detect_synthetic_vertices_graph(concat):
        plt.clf()
        subset = subset_by_hub(concat, [v_name])
        visualize_graph(subset)
        plt.gca().axis('off')
        plt.tight_layout()
        plt.savefig(f'../plots/{prefix}_concat_{v_name}.pdf', format='pdf', transparent=True, backend='cairo')

## Enrichment

In [7]:
from functions.plotting import plot_enrichment

# Generate fake enrichment data
import itertools
# Get columns
cell_type = ['OPC', 'Micro', 'Oligo', 'Endo']
disease = ['Alzheimer\'s Disease', 'Schizophrenia', 'Bipolar Disorder', 'A', 'B', 'C', 'D', 'E', 'F', 'G']
combined = [val for val in itertools.product(cell_type, disease)]
cell_type = [val[0] for val in combined]
disease = [val[1] for val in combined]
# Get significance
np.random.seed(42)
significance = np.exp(-8 * np.random.rand(len(combined)))
# Combine
df = pd.DataFrame({'cell_type': cell_type, 'disease': disease, 'significance': significance})
df = df.loc[df['significance'] < 5e-2]

# Rename
df = df.rename(columns={'cell_type': 'Cell Type', 'disease': 'Disease'})
# Add significance scale
df['-log10(p)'] = -np.log10(df['significance'])
plt.clf()
plot_enrichment(df)
plt.tight_layout()
plt.savefig(f'../plots/enrichment.pdf', format='pdf', transparent=True, backend='cairo')

## Graph Summaries

In [8]:
# TODO
# plt.clf()
# mosaic = [['AA'], ['BB']]
# fig = plt.figure(figsize=(scale*len(mosaic[0]), scale*len(mosaic)), constrained_layout=True)
# axs = fig.subplot_mosaic(mosaic)
# axs['B1'].get_shared_x_axes().join(axs['B1'], axs['B2'])
# plot_statistic(diff_graph_summary_coex, col='Cliques', ax=ax)
# plt.tight_layout()
# plt.savefig(f'../plots/summary.pdf', format='pdf', transparent=True, backend='cairo')

## Individual Edge Comparison

In [9]:
# NOTE: Manually annotate individual points/genes of interest
for prefix, individuals in zip(('diff', 'data'), (diff_g_individuals, data_g_individuals)):
    # Get graph
    sample_ids = individual_sample_ids[:len(individuals)]
    g = concatenate_graphs(*individuals)
    g = get_intersection(g)
    g = cull_isolated_leaves(g)

    plt.clf()
    df = plot_individual_edge_comparison(g, sample_ids)
    plt.tight_layout()
    plt.savefig(f'../plots/{prefix}_edge_comparison.pdf', format='pdf', transparent=True, backend='cairo')

## Legend

In [10]:
# Plot legend
plt.clf()
plot_legend()
plt.gca().axis('off')
plt.tight_layout()
plt.savefig(f'../plots/legend.pdf', format='pdf', transparent=True, backend='cairo')

## Sankey

In [11]:
# TODO, but not needed
# plt.clf()
# fig = plot_sankey(
#     meta,
#     [('Ethnicity', 'BRAAK_AD'), ('BRAAK_AD', 'nps_PsychoAgiCurValue'), ('BRAAK_AD', 'nps_WtLossCurValue')],
#     # order={'Ethnicity': 0, 'BRAAK_AD': 1, 'nps_PsychoAgiCurValue': 2, 'nps_WtLossCurValue': 2},
# )
# fig.write_image('../plots/sankey.pdf')