In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import graph_tool.all as gt
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from functions import (
    combine_graphs,
    compute_graph,
    compute_statistics,
    figure_data_driven,
    figure_diffusion,
    load_graph_by_id,
    plot_sankey,
    simulate_diffusion,
    subset_graph,
)


# Graph-Tool compatibility
plt.switch_backend('cairo')
# Style
sns.set(style='white', palette='Accent')
plt.rcParams.update({'font.weight': 'normal',
                     'font.size': 18,
                     'axes.titlesize': 'large',
                     'axes.labelsize': 'large',
                     'xtick.labelsize': 'small',
                     'ytick.labelsize': 'small'})

### Computation

In [3]:
# Load metadata
meta = pd.read_csv('../../data/PsychAD_freeze2_personalized_grpahs/syn26527784_latest.csv')
with open('../../data/ting/2023-06-26/AD_genes.txt') as f:
    ad_genes = f.readlines()
ad_genes = [s.replace('\n', '') for s in ad_genes if len(s.replace('\n', '')) > 0]

# Graph summaries
# nps_WtLossCurValue, nps_MoodDysCurValue, Sex, nps_PsychoAgiCurValue
# TODO: Change `column` to respective heads
column_diff = 'att_D_AD_0_1'
column_data = 'att_D_no_prior_0'
coex_diff_compare_phenotype = 'nps_PsychoAgiCurValue'
diff_data_compare_phenotype = 'nps_WtLossCurValue'
individual_sample_id = 'M48247'
other_sample_id = 'M41496'
diff_graph_summary_coex = compute_statistics(meta, 'BRAAK_AD', coex_diff_compare_phenotype, source='coexpression')
diff_graph_summary_att = compute_statistics(meta, 'BRAAK_AD', coex_diff_compare_phenotype, column=column_diff)
diff_graph_summary = compute_statistics(meta, 'BRAAK_AD', diff_data_compare_phenotype, column=column_diff)
data_graph_summary = compute_statistics(meta, 'BRAAK_AD', diff_data_compare_phenotype, column=column_data)

# Graphs
diff_g_individual = compute_graph(load_graph_by_id(individual_sample_id, column=column_diff))
diff_g_individual_coex = compute_graph(load_graph_by_id(individual_sample_id, source='coexpression'))
diff_g_individual_diffusion = subset_graph(simulate_diffusion(diff_g_individual_coex, ad_genes), diff_g_individual)
diff_g_other = compute_graph(load_graph_by_id(other_sample_id, column=column_diff))
data_g_individual = compute_graph(load_graph_by_id(individual_sample_id, column=column_data))
data_g_other = compute_graph(load_graph_by_id(other_sample_id, column=column_data))
data_g_group = compute_graph(combine_graphs(list(meta['SubID']), source='attention', column=column_data))

100%|███████████████████████████████████████████████████████████████████████████████████| 24/24 [01:45<00:00,  4.40s/it]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|███████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 29.34it/s]
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
100%|███████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 36.42it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 40.61it/s]
  graph['coef'] = graph.iloc[:, 2:].mean(axis=1)


In [6]:
# Plotting
fig = figure_diffusion(
        diff_g_individual_diffusion, 
        diff_g_individual, 
        diff_g_other, 
        diff_graph_summary_coex, 
        diff_graph_summary_att,
        meta=meta,
        individual_sample_id=individual_sample_id,
        other_sample_id=other_sample_id,
)
fig.savefig('../plots/diffusion.svg', format='svg', backend='cairo')
fig = figure_data_driven(
    data_g_individual, 
    diff_graph_summary, 
    data_graph_summary, 
    data_g_other,
    subset_graph(data_g_group, data_g_individual),
)
fig.savefig('../plots/data_driven.svg', format='svg', backend='cairo')

### Perform Analysis

In [6]:
filtered = []
for i, row in meta.iterrows():
    try:
        load_graph_by_id(row['SubID'])
        assert not np.isnan(row['nps_MoodDysCurValue'])
        assert row['Sex'] == 'Female'
        assert row['Ethnicity'] != 'White'
    except:
        continue
    filtered.append(f'{row["SubID"]} {row["Ethnicity"]} {row["Sex"]},  {row["Age"]}')
print(filtered[0])
print(filtered[2])

M48247 Black Female,  95.0
M19050 Hispanic Female,  74.0


In [9]:
# meta = pd.read_csv('../../data/PsychAD_freeze2_personalized_grpahs/syn26527784_latest.csv')
# plot_sankey(
#     meta,
#     (
#         ('Brain_bank','BRAAK_AD'),
#         ('BRAAK_AD','nps_WtLossCurValue'),
#     ),
#     {
#         'Brain_bank': 0,
#         'BRAAK_AD': 1,
#         'nps_WtLossCurValue': 2,
#     })

In [7]:
# # Novel Cluster Analysis
# diff_g_novel = compute_graph(combine_graphs(list(meta['SubID']), source='attention', column=column_diff))
# diff_state_novel = gt.minimize_nested_blockmodel_dl(diff_g_novel)
# vp_clusters = diff_state_novel.get_clabel(0)
# v_ids = [diff_g_novel.vp.ids[v] for v in diff_g_novel.vertices()]
# v_clusters = [vp_clusters[v] for v in diff_g_novel.vertices()]
# clusters = [[v_id for v_id, v_c in zip(v_ids, v_clusters) if v_c == c] for c in np.unique(v_clusters)]

# # Print specific clusters
# diff_state_novel.print_summary()
# for gene in clusters[17]:
#     print(gene)