In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import graph_tool.all as gt
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from functions import *


# Graph-Tool compatibility
plt.switch_backend('cairo')
# Style
sns.set_theme(context='talk', style='white', palette='Set2')
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# Computation

In [3]:
# Load metadata
meta = get_meta()

# Subject preview
filtered = []
for i, row in meta.iterrows():
    try:
        load_graph_by_id(row['SubID'])
        assert not np.isnan(row['nps_MoodDysCurValue'])  # Has NPS
        assert row['Sex'] == 'Female'
        assert row['Ethnicity'] != 'White'
    except:
        continue
    filtered.append(f'{row["SubID"]} {row["Ethnicity"]} {row["Sex"]}, {row["Age"]}, BRAAK {row["BRAAK_AD"]}')
for i in (2, 4, 5, 6):
    print(filtered[i])

# Parameters
column_diff = 'att_D_AD_0_1'
column_data = 'att_D_no_prior_0'
coex_diff_compare_phenotype = 'nps_PsychoAgiCurValue'
diff_data_compare_phenotype = 'nps_WtLossCurValue'
individual_subject_ids = ['M19050', 'M59593', 'M83214', 'M36634']

M19050 Hispanic Female, 74.0, BRAAK 5.0
M59593 Hispanic Female, 76.0, BRAAK 5.0
M83214 Hispanic Female, 83.0, BRAAK 6.0
M36634 Hispanic Female, 87.0, BRAAK 6.0


In [4]:
# Compute individual graphs
coex_g_individuals = [cull_isolated_leaves(compute_graph(scale_edge_coefs_list(load_graph_by_id(individual_subject_ids[i], source='coexpression'), 1./60), filter=.9)) for i in range(len(individual_subject_ids))]
diff_g_individuals = [compute_graph(load_graph_by_id(individual_subject_ids[i], column=column_diff)) for i in range(len(individual_subject_ids))]
data_g_individuals = [compute_graph(load_graph_by_id(individual_subject_ids[i], column=column_data)) for i in range(len(individual_subject_ids))]

# Plots

### Individual Plots

In [5]:
for type, graphs in zip(('AD', 'Data'), (diff_g_individuals, data_g_individuals)):
    for sid, g in zip(individual_subject_ids, graphs):
        fig, axs = get_mosaic([list(range(1))], scale=9)
        g = g.copy()
        g = filter_graph_by_synthetic_vertices(g, vertex_ids=['OPC', 'Micro', 'Oligo'])
        g = remove_text_by_centrality(g)
        visualize_graph_base(g, pos=get_graph_pos(g), mplfig=axs[0])
        axs[0].axis('off')

        fig.savefig(f'../plots/mini_{sid}_{type}.pdf', format='pdf', transparent=True, backend='cairo')

Calculating positions...
Calculating positions...
Calculating positions...
Calculating positions...
Calculating positions...
Calculating positions...
Calculating positions...
Calculating positions...


### Individual Comparisons

In [16]:
for column, graphs in zip((column_diff, column_data), (diff_g_individuals[:2], data_g_individuals[:2])):
        # Get graph
        sample_ids = individual_subject_ids[:2]
        g = concatenate_graphs(*graphs, threshold=False)
        g = get_intersection(g)
        g = cull_isolated_leaves(g)

        fig, axs = get_mosaic([list(range(1))], scale=6)
        df = plot_individual_edge_comparison(g, sample_ids, ax=axs[0])
        plt.tight_layout()
        fig.savefig(f'../plots/edge_comparison_{column}.pdf', format='pdf', transparent=True, backend='cairo')

Removing duplicate edges...


100%|████████████████████████████████████████████████████████████████████████████| 1940/1940 [00:00<00:00, 32531.14it/s]
  plt.tight_layout()


Removing duplicate edges...


100%|████████████████████████████████████████████████████████████████████████████| 1940/1940 [00:00<00:00, 30745.38it/s]
  plt.tight_layout()


### Characteristic Curves

In [7]:
# Parameters
contrast_name = 'c06x'
contrast = get_contrast(contrast_name)

# Get plots for each column
for column in (column_diff, column_data):
    # Compute
    df_subgroup = compute_contrast_summary(contrast, column=column)

    # Plot
    fig, axs = get_mosaic([list(range(2))], scale=9)
    plot_contrast_curve(df_subgroup, ax=axs[0], legend=False)
    plot_contrast_curve(df_subgroup, sorting_subgroup='Population', ax=axs[1])

    # plt.tight_layout()
    fig.savefig(f'../plots/characteristic_curve_{contrast_name}_{column}.pdf', format='pdf', transparent=True, backend='cairo')

Removing duplicate edges...


100%|█████████████████████████████████████████████████████████████████████████| 187003/187003 [02:29<00:00, 1250.90it/s]


Filtered from 3703 vertices and 50922 edges to 823 vertices and 5699 edges via common edge filtering.
Removing duplicate edges...


100%|███████████████████████████████████████████████████████████████████████████| 63192/63192 [00:23<00:00, 2655.39it/s]


Filtered from 2498 vertices and 23940 edges to 473 vertices and 2955 edges via common edge filtering.
Collecting edges...


100%|█████████████████████████████████████████████████████████████████████████████| 5699/5699 [00:01<00:00, 3676.23it/s]


Collecting edges...


100%|█████████████████████████████████████████████████████████████████████████████| 2955/2955 [00:00<00:00, 6296.91it/s]


Removing duplicate edges...


100%|█████████████████████████████████████████████████████████████████████████| 250195/250195 [03:47<00:00, 1098.07it/s]


Filtered from 4002 vertices and 62619 edges to 801 vertices and 5752 edges via common edge filtering.
Collecting edges...


100%|█████████████████████████████████████████████████████████████████████████████| 5752/5752 [00:01<00:00, 2887.28it/s]


Removing duplicate edges...


100%|█████████████████████████████████████████████████████████████████████████| 187003/187003 [02:30<00:00, 1244.23it/s]


Filtered from 3703 vertices and 50922 edges to 823 vertices and 5699 edges via common edge filtering.
Removing duplicate edges...


100%|███████████████████████████████████████████████████████████████████████████| 63192/63192 [00:23<00:00, 2736.88it/s]


Filtered from 2498 vertices and 23940 edges to 473 vertices and 2955 edges via common edge filtering.
Collecting edges...


100%|█████████████████████████████████████████████████████████████████████████████| 5699/5699 [00:02<00:00, 2807.05it/s]


Collecting edges...


100%|█████████████████████████████████████████████████████████████████████████████| 2955/2955 [00:00<00:00, 8061.68it/s]


Removing duplicate edges...


100%|█████████████████████████████████████████████████████████████████████████| 250195/250195 [03:49<00:00, 1090.14it/s]


Filtered from 4002 vertices and 62619 edges to 801 vertices and 5752 edges via common edge filtering.
Collecting edges...


100%|█████████████████████████████████████████████████████████████████████████████| 5752/5752 [00:01<00:00, 2978.27it/s]


### Coex Individual Trio Comparison

In [8]:
# # Choose three graphs
# graphs = coex_g_individuals[:3]
# graphs_subject_ids = individual_subject_ids[:3]

# # Create figure
# fig, axs = get_mosaic([list(range(len(graphs)+1))], scale=9)

# # Compute edge summaries
# df, concatenated_graph = compute_edge_summary(graphs=graphs, subject_ids=graphs_subject_ids)

# # Show individual graph comparisons
# plot_graph_comparison(graphs, axs=axs, subject_ids=graphs_subject_ids)

# # Show edge summary
# plot_edge_summary(graphs, df=df, ax=axs[len(graphs)], subject_ids=graphs_subject_ids)

# # Save figure
# plt.tight_layout()
# fig.savefig(f'../plots/CoexIndividualTrioComparison.pdf', format='pdf', transparent=True, backend='cairo')

### Individual Trio Comparison

In [9]:
# # Choose three graphs
# graphs = data_g_individuals[:3]
# graphs_subject_ids = individual_subject_ids[:3]

# # Create figure
# plt.clf()
# fig, axs = get_mosaic([list(range(len(graphs)+1))], scale=9)

# # Compute edge summaries
# df, concatenated_graph = compute_edge_summary(graphs=graphs, subject_ids=graphs_subject_ids)

# # Show individual graph comparisons
# plot_graph_comparison(graphs, axs=axs, subject_ids=graphs_subject_ids)

# # Show edge summary
# plot_edge_summary(graphs, df=df, ax=axs[len(graphs)], subject_ids=graphs_subject_ids)

# # Save figure
# plt.tight_layout()
# fig.savefig(f'../plots/IndividualTrioComparison.pdf', format='pdf', transparent=True, backend='cairo')

### Aggregate Trio Comparison

In [10]:
# # Parameters
# contrast = 'c01x'
# column = column_diff

# # Create figure
# fig, axs = get_mosaic([list(range(len(get_contrast(contrast))+1))], scale=9)

# # Compute aggregate edge summaries
# contrast_group = compute_aggregate_edge_summary(get_contrast(contrast), column=column_diff)

# # Plot graph comparison
# plot_graph_comparison(
#     graphs=[v for k, v in contrast_group[0].items()],
#     subject_ids=[k for k, v in contrast_group[1].items()],
#     axs=[axs[i] for i in range(len(get_contrast(contrast)))])

# # Plot edge summary for subgroups
# plot_aggregate_edge_summary(ax=axs[len(get_contrast(contrast))], contrast=contrast_group)

# # Save figure
# plt.tight_layout()
# fig.savefig(f'../plots/AggregateTrioComparison.pdf', format='pdf', transparent=True, backend='cairo')

##### Linkage Analysis

In [11]:
# # Record edge instances
# # df = pd.DataFrame(columns=['Edge', 'Subgroup', 'Count'])
# df = {k: [] for k in ['Edge', 'Subgroup', 'Count']}
# for subgroup in contrast_group[0]:
#     g = contrast_group[0][subgroup]
#     for e in tqdm(g.edges(), total=g.num_edges()):
#         coefs = g.ep.coefs[e]
#         row = [get_edge_string(g, e), subgroup, sum([c!=0 for c in coefs])]
#         # df.loc[df.shape[0]] = row  # Slow
#         for k, v in zip(df, row):
#             df[k].append(v)
# df = pd.DataFrame(df)

# # Get edge counts
# count_table = df.pivot(index='Edge', columns='Subgroup', values='Count')
# count_table = count_table.fillna(0)
# # Max scale for fairness
# for subgroup in contrast_group[0]:
#     count_table[subgroup] /= count_table[subgroup].max()
# # Compute differences
# # TODO: REVISE DIFFERENCE METRIC
# count_table['Difference'] = count_table['AD'] - count_table['Control']
# count_table['Range'] = count_table.max(axis=1) - count_table.min(axis=1)

# # Get list of linkages by significance
# open(f'../plots/AggregateTrioComparisonList.txt', 'w').close()
# for i in np.unique(count_table['Difference'])[::-1]:
#     condition = (count_table['Difference'] == i)
#     significant_edges = list(count_table.loc[condition].index)
#     synthetic_genes = np.concatenate([detect_synthetic_vertices_graph(contrast_group[0][subgroup]) for subgroup in contrast_group[0]])
#     try: significant_genes = np.concatenate([e.split('--') for e in significant_edges])
#     except: significant_genes = []
#     significant_genes = np.unique([g for g in significant_genes if g not in synthetic_genes])

#     # Print significant genes
#     if len(significant_genes) > 0:
#         with open(f'../plots/AggregateTrioComparisonList.txt', 'a') as f:
#             print(f'--- {i} ---', file=f)
#             for g in significant_genes:
#                 print(g, file=f)
#             print(file=f)

### Differentially Expressed Edges

In [12]:
# # TODO: Fix nodes cutting off
# # Plot total and subplots for aggregate differences
# for prefix, individuals in zip(('diff', 'data'), (diff_g_individuals, data_g_individuals)):
#     plt.clf()
#     concat = concatenate_graphs(*individuals)
#     concat = get_intersection(concat)
#     concat = cull_isolated_leaves(concat)
#     concat = remove_text_by_centrality(concat)
#     concat = color_by_significance(concat)
#     visualize_graph(concat)
#     plt.gca().axis('off')
#     plt.tight_layout()
#     plt.savefig(f'../plots/{prefix}_concat.pdf', format='pdf', transparent=True, backend='cairo')

#     # Show all subsets of graph by cell type
#     for v_name in detect_synthetic_vertices_graph(concat):
#         plt.clf()
#         subset = subset_by_hub(concat, [v_name])
#         visualize_graph(subset)
#         plt.gca().axis('off')
#         plt.tight_layout()
#         plt.savefig(f'../plots/{prefix}_concat_{v_name}.pdf', format='pdf', transparent=True, backend='cairo')

### Enrichment

In [13]:
# from functions.plotting import plot_enrichment

# # Generate fake enrichment data
# import itertools
# # Get columns
# cell_type = ['OPC', 'Micro', 'Oligo', 'Endo']
# disease = ['Alzheimer\'s Disease', 'Schizophrenia', 'Bipolar Disorder', 'A', 'B', 'C', 'D', 'E', 'F', 'G']
# combined = [val for val in itertools.product(cell_type, disease)]
# cell_type = [val[0] for val in combined]
# disease = [val[1] for val in combined]
# # Get significance
# np.random.seed(42)
# significance = np.exp(-8 * np.random.rand(len(combined)))
# # Combine
# df = pd.DataFrame({'cell_type': cell_type, 'disease': disease, 'significance': significance})
# df = df.loc[df['significance'] < 5e-2]

# # Rename
# df = df.rename(columns={'cell_type': 'Cell Type', 'disease': 'Disease'})
# # Add significance scale
# df['-log10(p)'] = -np.log10(df['significance'])
# plt.clf()
# plot_enrichment(df)
# plt.tight_layout()
# plt.savefig(f'../plots/enrichment.pdf', format='pdf', transparent=True, backend='cairo')

### Legend

In [14]:
# Plot legend
plt.clf()
plot_legend()
plt.gca().axis('off')
plt.tight_layout()
plt.savefig(f'../plots/legend.pdf', format='pdf', transparent=True, backend='cairo')

  plt.tight_layout()
