In [None]:
import sys
import os
import os.path
import logging
import numpy as np
import pandas as pd
import scanpy as sc
import scipy.stats as sps
from anndata import AnnData
import anndata
from collections import defaultdict, OrderedDict
import plotly.express.colors as pxcolors
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

# local to this analysis
from util import adata_filter

# need to stop using
from rubedo.platform.client import dev_instance
from rubedo.platform.target_discovery.filters import create_binary_annotation

# for development
from importlib import reload


FORMAT = '%(asctime)-15s %(message)s'
logging.basicConfig(format=FORMAT)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

sc.settings.verbosity = 3
sc.logging.print_header()
sc.settings.set_figure_params(dpi=120)

In [None]:
# Configuration

dataset = 'GSE154659'
variant = 'lin_norm'
model = 'ScNT'
neuron_filter = {
    'model': [model, 'Naive'],
    'mouse': 'C57',
    'subtype': [
        'SST',
        'NP',
        'PEP1',
        'NF2',
        'NF1',
        'NF3',
        'p_cLTMR2',
        'cLTMR1',
        'PEP2',
    ],
    'n_counts': lambda x: x <= 15000,
}
non_neuron_filter = {
    'model': [model, 'Naive'],
    'mouse': 'C57',
    'subtype': [
        'Schwann',
        'Repair schwann',
        'B cell',
        'Endothelial',
        'Fibroblast',
        'Macrophage',
        'Neutrophil',
        'Repair fibroblast',
        'Satglia',
        'Pericyte',
    ],
    'n_counts': lambda x: x <= 15000,
}


# abs(log2 fold change) must be greater than this
l2fc_thresh = .6 


output_dir = '/home/alasla/drgoutput'
output_dir = '/bigdata/adhoc/drg'

# per cell gene signatures to assess
gene_signatures = [
    'CDKN2A,-LMNB1,-TOP2A',
    'CDKN1A,CDKN2A,-LMNB1,-TOP2A',
    'CDKN1A,-LMNB1,-TOP2A',
    'CDKN2A,-TOP2A',
]

# in some cases we group cells into larger cell type groups
# according to this scheme
cell_groups = dict(
    neuron=[
        'SST',
        'NP',
        'PEP1',
        'NF2',
        'NF1',
        'NF3',
        'p_cLTMR2',
        'cLTMR1',
        'PEP2',
    ],
    glia=[
        'Schwann',
        'Repair schwann',
        'Satglia',
    ],
    other=[
        'B cell',
        'Endothelial',
        'Fibroblast',
        'Macrophage',
        'Neutrophil',
        'Repair fibroblast',
        'Pericyte',
    ],
)



In [None]:
# Load Dataset 
client = dev_instance()
broker = client.get_databroker(dataset)
adata = broker.load_variant(variant)
T = broker.get_taxonomy('ward')

adata

In [None]:
import util
reload(util)
import plotting
reload(plotting)
import de
reload(de)
import scoring
reload(scoring)
import signatures
reload(signatures)
adata_filter = util.adata_filter
adata_filter_mask = util.adata_filter_mask
differential_expression = de.differential_expression
flag_de = de.flag_de
get_de_genes = de.get_de_genes
perform_de = de.perform_de
dotplot = plotting.dotplot
plot_de_genes = plotting.plot_de_genes
plot_score_heatmap = plotting.plot_score_heatmap
score_within_key = scoring.score_within_key
assess_signatures = signatures.assess_signatures

In [None]:
# split dataset into neurons and non-neurons according to filters described above
# filter datasets to only contains the configured model and cell types
adata_neuron = adata_filter(adata, neuron_filter).copy()
adata_non_neuron = adata_filter(adata, non_neuron_filter).copy()
adata_neuron


In [None]:
adata_non_neuron

In [None]:

# add some categorizations here to help group cells
# label: A combination of model and time-point
# label_subtype: a combination of model, time-point, and subtype

adata_neuron.obs['label'] = [f'{model}_{int(hour):04}h' if cmodel == model else cmodel for (cmodel, hour) in zip(adata_neuron.obs['model'], adata_neuron.obs['hour'])]
adata_neuron.obs['label_subtype'] = [f'{subtype}_{int(hour):04}h' if cmodel == model else f'{subtype}_{0:04}_Naive' for (cmodel, subtype, hour) in zip(adata_neuron.obs['model'], adata_neuron.obs['subtype'], adata_neuron.obs['hour'])]

adata_non_neuron.obs['label'] = [f'{model}_{int(hour):04}h' if cmodel == model else model for (cmodel, hour) in zip(adata_non_neuron.obs['model'], adata_non_neuron.obs['hour'])]
adata_non_neuron.obs['label_subtype'] = [f'{subtype}_{int(hour):04}h' if cmodel == model else f'{subtype}_{0:04}_Naive' for (cmodel, subtype, hour) in zip(adata_non_neuron.obs['model'], adata_non_neuron.obs['subtype'], adata_non_neuron.obs['hour'])]

adata_neuron.obs['label'].unique()
adata_neuron.obs['label_subtype'].unique()

In [None]:
# list of unique labels
adata_neuron.obs['label'].unique()

In [None]:
# list of unique subtype labels
adata_neuron.obs['label_subtype'].unique()

In [None]:
# create an additional label that includes whether a cell has nonzero expression of ATF3

def atf3_label(l, a):
    if l == 'Naive':
        return l
    if a:
        return f'{l}_ATF3+'
    else:
        return f'{l}_ATF3-'
adata_neuron.obs['atf3_label'] = [atf3_label(l, a) for (l, a) in zip(adata_neuron.obs['label'], adata_filter_mask(adata_neuron, {'ATF3': {'gt': 0.0}}))]
adata_neuron.obs['atf3_label'].unique()

In [None]:
# create differential expression contrasts
# for each of label, label_subtype, and atf3_label, create a comparison between that grouping of cells, and the comperable grouping of Naive cells
# in the resulting 'comparisons' dictionary, t
# key is: "Forground:Background" where Foreground and Background are the names of the group of cells, for example "ScNT_1440h:Naive"
# value is: a tuple of filter dictionaries (each composed of {"obs_key": "required_value"} as accepted by the adata_filter_mask() function) 
#   one for foreground cells, and one for background

naive = None
naive_subtype = None
modeled = {}
modeled_subtype = {}
modeled_atf3 = {}
for i, row in adata_neuron.obs.iterrows():
    model = row['model']
    hour = row['hour']
    label = row['label']
    label_subtype = row['label_subtype']
    atf3_label = row['atf3_label']

    if model == 'Naive':
        if naive is None:
            naive = {'label': label}
        if naive_subtype is None:
            naive_subtype = {'label_subtype': label_subtype}
    else:
        val = (int(hour), {'label': label})
        if label not in modeled:
            modeled[label] = val
            
        val_subtype = (int(hour), {'label_subtype': label_subtype})
        if label_subtype not in modeled_subtype:
            modeled_subtype[label_subtype] = val_subtype

        val_atf3 = (int(hour), {'atf3_label': atf3_label})
        if atf3_label not in modeled_atf3:
            modeled_atf3[atf3_label] = val_atf3

modeled = [y[1] for y in sorted(modeled.values(), key=lambda x: x[0])]
modeled_atf3 = [y[1] for y in sorted(modeled_atf3.values(), key=lambda x: x[0])]
modeled_subtype = [y[1] for y in sorted(modeled_subtype.values(), key=lambda x: x[0])]

comparisons = {}
for m in modeled:
    v = list(m.values())[0]
    n = list(naive.values())[0]
    comparisons[f'{v}:{n}'] = (m, naive)
for m in modeled_atf3:
    v = list(m.values())[0]
    n = list(naive.values())[0]
    comparisons[f'{v}:{n}'] = (m, naive)
for m in modeled_subtype:
    v = list(m.values())[0]
    n = list(naive_subtype.values())[0]
    comparisons[f'{v}:{n}'] = (m, naive_subtype)

comparisons
    

In [None]:
# compute differential expression for neurons based on the above configured contrasts
# takes the dictionary of contrast_name to contrast filters
# it returns a dictionary of contrast_name -> pandas.DataFrame
# each DataFrame contains the per-gene differential expression statistics
des_neuron = differential_expression(adata_neuron, comparisons)
des_neuron['NF3_1440h:SST_0000_Naive']

In [None]:
# use the flag_de function to add a boolean 'is-de' column to each DataFrame
# log2fc_thresh: abs(log2fc) must be greater than this
# p_column: which computed statistic should be used as a significance threshold
# p_thresh: the value of the p_column must be <= p_thresh
de.flag_de(des_neuron, log2fc_thresh=l2fc_thresh, p_column='ranksums-fdr-p', p_thresh=.05)

# show an example differential expression table
det = des_neuron['NF3_1440h:SST_0000_Naive']
det[det['is-de']]
    

In [None]:
# split the differential expression tables up into 3 different analysis groups: label, label_subtype, label_atf3
des_neuron_label = {k: v for k, v in des_neuron.items() if k.endswith(':Naive') and 'ATF3' not in k}
des_neuron_label_subtype = {k: v for k, v in des_neuron.items() if not k.endswith(':Naive') and 'ATF3' not in k}
des_neuron_atf3 = {k: v for k, v in des_neuron.items() if 'ATF3' in k}



In [None]:
# plot differentially expressed genes from included gene sets
# shows only genes that are differential expressed in at least one of the differential expression tables
plot_de_genes(
    adata_neuron, 
    'label', 
    des_neuron_label, 
    genesets={'senmayo': 'senmayo', 'sasp_review': 'sasp_review'}, 
    genes=['CDKN1A', 'CDKN2A', 'ATF3', 'IL6', 'IL1B'],
    sort_genes_by='expr',
    smallest_dot=8.,
    dot_min=.0,
)

In [None]:
# select from the senmayo geneset only the genes that have some kind of differential expression at any model timepoint
senmayo_de_genes = get_de_genes(des_neuron, genes=['CDKN1A', 'CDKN2A'], genesets={'senmayo': 'senmayo'})
print(f'SenMayo DE Genes (at any timepoint): {senmayo_de_genes}')

# using this subset of senmayo genes, score each single cell for its expression of genes in the gene-set
# this is normalized within each neuron subtype
score_within_key(
    adata_neuron, 
    list(senmayo_de_genes),
    'subtype',
    'senmayo_score',
)
    
        
    

In [None]:
# plot heatmap of senmayo scores across model timepoints and neuron subtypes
plot_score_heatmap(adata_neuron, 'subtype', 'hour', 'senmayo_score', y_as='int')

In [None]:
# create a new AnnData object only containing the normalized senmayo scores
adata_neuron_scores = AnnData(adata_neuron.obs[['senmayo_score']])
adata_neuron_scores.obs = adata_neuron.obs
adata_neuron_scores


In [None]:
# compute "differential expression" of senmayo scores in order to generate statistical significace values for changes in score
des_neuron_scores = differential_expression(adata_neuron_scores, comparisons, tests=['ranksums', 'ttest'])

In [None]:
det = des_neuron_scores['ScNT_0024h:Naive']
det

In [None]:
rows = []
for c, de in des_neuron_scores.items():
    if c.endswith(':Naive'):
        continue
    sp = c.split(':')
    fg = sp[0]
    bg = sp[1]
    st = '_'.join(fg.split('_')[:-1])
    rp = de.iloc[0]['ranksums-p']
    tp = de.iloc[0]['ttest-p']
    rows.append([c, st, fg, bg, rp, tp])
score_stats = pd.DataFrame(rows, columns=['contrast', 'subtype', 'foreground', 'background', 'ranksums_p', 'ttest_p'])
score_stats = score_stats.sort_values('contrast')
score_stats.set_index('contrast', inplace=True)
score_stats
    
    

In [None]:
import util
reload(util)
import plotting
reload(plotting)
import de
reload(de)
import scoring
reload(scoring)
import signatures
reload(signatures)
adata_filter = util.adata_filter
adata_filter_mask = util.adata_filter_mask
differential_expression = de.differential_expression
flag_de = de.flag_de
get_de_genes = de.get_de_genes
perform_de = de.perform_de
summarize_de_genes = de.summarize_de_genes
dotplot = plotting.dotplot
plot_de_genes = plotting.plot_de_genes
plot_score_heatmap = plotting.plot_score_heatmap
score_within_key = scoring.score_within_key
assess_signatures = signatures.assess_signatures

In [None]:
# assess configured gene signatures on all neurons and non-neurons
# data is log1p transformed and scaled to per-gene variance without zero centering

adata_neuron_scaled = adata_neuron.copy()
sc.pp.log1p(adata_neuron_scaled)
sc.pp.scale(adata_neuron_scaled, zero_center=False)
assess_signatures(adata_neuron_scaled, gene_signatures)

adata_non_neuron_scaled = adata_non_neuron.copy()
sc.pp.log1p(adata_non_neuron_scaled)
sc.pp.scale(adata_non_neuron_scaled, zero_center=False)
assess_signatures(adata_non_neuron_scaled, gene_signatures)

In [None]:
# example raw signature scores
adata_neuron_scaled.obs[['bi:CDKN2A,-LMNB1,-TOP2A', 'ad:CDKN2A,-LMNB1,-TOP2A', 'ex:CDKN2A,-LMNB1,-TOP2A']]

In [None]:
# summary statistics for number and percentages of cells for each cell-type cellgroup for each signaure
# cells are considered to be positive for a signature only if they conform to it completely 


sig_columns = [f'ex:{sig}' for sig in gene_signatures]
neuron_signatures = adata_neuron_scaled.obs[['hour', 'subtype'] + sig_columns].copy()
neuron_signatures['compartment'] = 'neuron'
non_neuron_signatures = adata_non_neuron_scaled.obs[['hour', 'subtype'] + sig_columns].copy()
non_neuron_signatures['compartment'] = 'non-neuron'

sig_df = pd.concat([neuron_signatures, non_neuron_signatures])

def getgrp(x):
    for k, v in cell_groups.items():
        if x in v:
            return k

sig_df['cellgroup'] = [getgrp(s) for s in sig_df['subtype']]

agg = {}
agg['total'] = pd.NamedAgg(column='hour', aggfunc='count')
for sig in gene_signatures:
    agg[f'{sig}_num_cells_positive'] = pd.NamedAgg(column=f'ex:{sig}', aggfunc=lambda x: np.sum((x > 0.0).astype('int')))
for col in ['subtype', 'compartment']:
    agg[col] = pd.NamedAgg(column=col, aggfunc=lambda x: ','.join(sorted(set(x))))
sig_df = sig_df.groupby(['hour', 'cellgroup']).agg(**agg)
sig_df_no_i = sig_df.reset_index()
sig_df_no_i.index = sig_df.index
sig_df = sig_df_no_i
total_per_hour = sig_df[['total']]
total_per_hour.reset_index(inplace=True)
total_per_hour = total_per_hour.groupby('hour').agg({'total': 'sum'}).to_dict()['total']
for sig in gene_signatures:
    sig_df[f'{sig}_percent_subtype_positive'] = (sig_df[f'{sig}_num_cells_positive'] / sig_df['total']) * 100
    sig_df[f'{sig}_percent_all_positive'] = [(p / total_per_hour[h]) * 100 for (p, h) in zip(sig_df[f'{sig}_num_cells_positive'], sig_df['hour'])]

first_cols = ['hour', 'cellgroup', 'subtype', 'compartment', 'total']
rest_cols = []
for sig in gene_signatures:
    rest_cols.append(f'{sig}_num_cells_positive')
    rest_cols.append(f'{sig}_percent_subtype_positive')
    rest_cols.append(f'{sig}_percent_all_positive')
sig_df['hour_int'] = [int(h) for h in sig_df['hour']]
sig_df['cellgroup_cp'] = sig_df['cellgroup']
sig_df = sig_df.sort_values(by=['hour_int', 'cellgroup_cp'])
sig_df = sig_df[first_cols + rest_cols]
sig_df

In [None]:
# quick timecourse plot of signature positivity
fig = px.line(sig_df, x='hour', y='CDKN2A,-LMNB1,-TOP2A_percent_all_positive', color='cellgroup')
fig.update_layout(height=800, width=1000)

In [None]:

fig = px.line(sig_df, x='hour', y='CDKN1A,-LMNB1,-TOP2A_percent_all_positive', color='cellgroup')
fig.update_layout(height=800, width=1000)

In [None]:
# for plotting purposes order the atf3 labels Naive, ATF3-, ATF3+, and otherwise by timepoint
atf3_labels = list(adata_neuron.obs['atf3_label'].unique())
def sort_order(atf3_label):
    if atf3_label == 'Naive':
        return 0
    sp = atf3_label.split('_')
    model, hour, atf3 = sp
    hv = int(hour[:-1])
    if atf3 == 'ATF3+':
        hv += 10000
    return hv
atf3_labels.sort(key=sort_order)
atf3_labels
    

In [None]:
# get statistics and 
# plot differentially expressed genes from included gene sets
# shows only genes that are differential expressed in at least one of the differential expression tables
# from this you can see that expression of senescence markers and senescence associated genes largely follow ATF3 expression
plot_de_genes(
    adata_neuron, 
    'atf3_label', 
    des_neuron_atf3, 
    genesets={'senmayo': 'senmayo', 'sasp_review': 'sasp_review'}, 
    genes=['CDKN1A', 'CDKN2A', 'ATF3', 'IL6', 'IL1B'],
    sort_genes_by='expr',
    smallest_dot=8.,
    dot_min=.0,
    categories_order=atf3_labels,
)

In [None]:
summarize_de_genes(des_neuron_label, genesets={'senmayo': 'senmayo', 'sasp_review': 'sasp_review'}, genes=['CDKN1A', 'CDKN2A', 'ATF3', 'IL6', 'IL1B'])

In [None]:
raise Exception('stop')

In [None]:
adata.obs['ATF3_pos'] = ['ATF3_pos' if x > 0.0 else 'ATF3_neg' for x in adata[:, adata.var_names == 'ATF3'].X[:, 0]]
qder_atf3_s = rb.tl.quick_de(adata, groupby='ATF3_pos', gpu=False, is_log=False, log_base=2, fdr=True, statistical_tests=['approx-ranksums'])

In [None]:
atf3_de = qder_atf3_s.de_table('ATF3_pos', 'ATF3_neg', log2fc=l2fc_thresh, expr_min=0.0, pct_expr=0.0)
atf3_de_sig = atf3_de[atf3_de['is-de']]
atf3_de_sig

In [None]:
senm_gs = client.get_geneset('SenMayo_genelist', weighted=False)
atf3_de_sig_senmayo = atf3_de_sig.loc[[x in senm_gs for x in atf3_de_sig.index]].sort_values(by='log2fc', ascending=False)
atf3_de_sig_senmayo

In [None]:
adata.obs['senmayo_de_sc_score'] = adata_log_scaled.obs['senmayo_de_sc_score']
rb.pl.dotplot(adata, ['CDKN1A', 'CDKN2A', 'IL6', 'ATF3', 'senmayo_de_sc_score'] + list(atf3_de_sig_senmayo.index), groupby='ATF3_pos', max_scale='var')
de_show = atf3_de.loc[['CDKN1A', 'CDKN2A', 'IL6', 'ATF3'] + list(atf3_de_sig_senmayo.index)][['log2fc', 'approx-ranksums-fdr-p']]
de_show.columns = ['log2fc', 'ranksums-fdr-p']
de_show.round(2)

In [None]:
plot_gs_genes(
    adata, 
    'label',
    qder, 
    {
        'senmayo': 'SenMayo_genelist', 
        'sasp_factors': 'sasp_review_all',
    }, 
    add_genes=['CDKN1A', 'CDKN2A', 'ATF3', 'IL6', 'IL1B', 'senmayo_de_sc_score'],
)

In [None]:
sc.pl.umap(adata, color='subtype')

In [None]:
rb.gme.msigdb.get_gene_modules_overview()

In [None]:
c3mods = rb.gme.msigdb.get_gene_modules(category='C3')
[k for k in c3mods.keys() if k.startswith('ATF')]

In [None]:
atf3_targets = c3mods['ATF3_Q6']

In [None]:
atf3_targets_de = atf3_de[[x in atf3_targets for x in atf3_de.index]]
atf3_targets_de

In [None]:
atf3_targets_de_sig = atf3_targets_de[create_binary_annotation(atf3_targets_de, {'log2fc': {'gt': .5}, 'approx-ranksums-p': {'lt': .05}})]
atf3_targets_de_sig

In [None]:
rb.pl.dotplot(adata, list(atf3_targets_de_sig.index), groupby='ATF3_pos', max_scale='var')
atf3_targets_de_sig[['log2fc', 'approx-ranksums-p', 'approx-ranksums-fdr-p']]

In [None]:
senmayo = client.get_geneset('SenMayo_genelist')
senmayo

In [None]:
set(senmayo).intersection(set(atf3_targets))

In [None]:
signatures = [
    'ex:CDKN2A,-LMNB1,-TOP2A',
    'ex:CDKN1A,CDKN2A,-LMNB1,-TOP2A',
    'ex:CDKN1A,-LMNB1,-TOP2A',
    'ex:CDKN2A,-TOP2A',
]
for s in signatures:
    rb.tl.signature_assess(adata, s)
    rb.tl.signature_assess(adata_nn, s)

In [None]:

rows = []
extra_cols = []
total_total = adata.shape[0] + adata_nn.shape[0]
total_per_hour = defaultdict(int)
for (adt, compartment) in [(adata, 'neuronal'), (adata_nn, 'non-neuronal')]:
    for hour in list(sorted(adt.obs['hour'].unique())):
        adata_h = adt[adt.obs['hour'] == hour]
        total_per_hour[hour] += adata_h.shape[0]
    
for (adt, compartment) in [(adata, 'neuronal'), (adata_nn, 'non-neuronal')]:
    for subtype in list(sorted(adt.obs['subtype'].unique())):
        adata_s = adt[adt.obs['subtype'] == subtype]
        for hour in list(sorted(adata_s.obs['hour'].unique())):
            adata_h = adata_s[adata_s.obs['hour'] == hour]
            total = adata_h.shape[0]
            row = [subtype, compartment, hour, total]
            for s in signatures:
                npos = (adata_h.obs[s] > 0).sum()
                npct = (npos / total) * 100
                npctall = (npos / total_per_hour[hour]) * 100
                row.append(npos)
                row.append(npct)
                row.append(npctall)
                poscol = f'{s}_num_cells_positive'
                pctcol = f'{s}_percent_subtype_positive'
                pctallcol = f'{s}_percent_all_positive'
                for col in [poscol, pctcol, pctallcol]:
                    if col not in extra_cols:
                        extra_cols.append(col)
            rows.append(row)
signature = pd.DataFrame(rows, columns=['subtype', 'compartment', 'hour', 'total'] + extra_cols)
signature
    

In [None]:
signature[signature['subtype'] == 'NF2']

In [None]:
signature['hour_int'] = [int(x) for x in signature['hour']]
signature = signature.sort_values(by=['subtype', 'hour_int'])
signature['subtype_hour'] = [f'{s}_{h:4}' for s, h in zip(signature['subtype'], signature['hour_int'])]
signature.index = [f'{s}_{h:4}' for s, h in zip(signature['subtype'], signature['hour_int'])]
signature

In [None]:
hours = list(sorted(signature['hour_int'].unique()))
hmap = {}
for i in range(len(hours)):
    hmap[hours[i]] = i
signature['hour_idx'] = [hmap[h] for h in signature['hour_int']]
    


In [None]:
def plot_sig(col, colname=None, color='subtype', symbol='compartment', layout_kwargs=None):
    if not colname:
        colname = col
    fig = px.line(
        signature,
        x='hour_int',
        y=col,
        color=color,
        symbol=symbol,
    )
    fig.update_layout(dict(
        title=f'Per Subtype {colname} Timecourse',
    ))
    fig.update_yaxes(dict(
        title=f'{colname}',
    ))
    if layout_kwargs:
        fig.update_layout(**layout_kwargs)
    return fig
plot_sig('ex:CDKN2A,-LMNB1,-TOP2A_percent_subtype_positive', 'CDKN2A,-LMNB1,-TOP2A Percent of Subtype', layout_kwargs=dict(height=800, width=1200))

In [None]:
plot_sig('ex:CDKN2A,-LMNB1,-TOP2A_percent_all_positive', 'CDKN2A,-LMNB1,-TOP2A Percent of All Cells', layout_kwargs=dict(height=800, width=1200))

In [None]:
plot_sig('ex:CDKN1A,-LMNB1,-TOP2A_percent_all_positive', 'CDKN1A,-LMNB1,-TOP2A Percent of All Cells', layout_kwargs=dict(height=800, width=1200))

## Figure 1

In [None]:
def plot_sig_facet(col, colname=None, color='subtype', symbol='compartment', layout_kwargs=None):
    if not colname:
        colname = col
    fig = px.line(
        signature,
        x='hour_int',
        y=col,
        color=color,
        symbol=symbol,
    )
    fig.update_layout(dict(
        title=f'Per Subtype {colname} Timecourse',
    ))
    fig.update_yaxes(dict(
        title=f'{colname}',
    ))
    if layout_kwargs:
        fig.update_layout(**layout_kwargs)
    return fig
plot_sig('ex:CDKN2A,-LMNB1,-TOP2A_percent_all_positive', 'CDKN2A,-LMNB1,-TOP2A positive cells per subtype<br>(as percentage of all cells)', layout_kwargs=dict(height=800, width=1200, font=dict(family='arial')))


In [None]:
def plot_sig_facet(col, colname=None, color='cellgroup', layout_kwargs=None):
    if not colname:
        colname = col

    groups = dict(
        neuron=[
            'SST',
            'NP',
            'PEP1',
            'NF2',
            'NF1',
            'NF3',
            'p_cLTMR2',
            'cLTMR1',
            'PEP2',
        ],
        glia=[
            'Schwann',
            'Repair schwann',
            'Satglia',
        ],
        other=[
            'B cell',
            'Endothelial',
            'Fibroblast',
            'Macrophage',
            'Neutrophil',
            'Repair fibroblast',
            'Pericyte',
        ],
    )

    def getgrp(x):
        for k, v in groups.items():
            if x in v:
                return k

    signature['cellgroup'] = [getgrp(x) for x in signature['subtype']]

    sums = signature.groupby(['hour', 'hour_idx', 'hour_int', 'cellgroup']).sum()
    sums_no_i = sums.reset_index()
    sums_no_i.index = sums.index
    sums = sums_no_i
    #return sums
    
    fig = px.bar(
        sums,
        x='hour_idx',
        y=col,
        color=color,
        #facet_col='compartment',
        color_discrete_sequence=px.colors.qualitative.Dark24,
        barmode='group',
    )
    fig.update_layout(dict(
        title=f'Per Subtype {colname} Timecourse',
    ))
    fig.update_yaxes(dict(
        title=f'{colname}',
    ))
    fig.update_xaxes(dict(
        title='hours',
        tickmode='array',
        tickvals=list(range(len(hours))),
        ticktext=[f'{h}' for h in hours],
    ))
    if layout_kwargs:
        fig.update_layout(**layout_kwargs)
    return fig, sums
fig, sums = plot_sig_facet(
    'ex:CDKN2A,-LMNB1,-TOP2A_percent_all_positive', 
    'CDKN2A,-LMNB1,-TOP2A positive cells per subtype<br>(as percentage of all cells)', 
    layout_kwargs=dict(
        height=600, 
        width=1200, 
        font=dict(family='arial'),
        margin=dict(t=100),
        #yaxis2_title=None,
    ),
)
fig.show()


In [None]:
sums

In [None]:
sums.to_excel(os.path.join(output_dir, 'sen_signatures_data.xlsx'))



In [None]:
sc.settings.figdir = output_dir
sc.set_figure_params(dpi_save=300, format='png')

In [None]:
plot_gs_genes(
    adata, 
    'label',
    qder, 
    {
        'senmayo': 'SenMayo_genelist', 
        #'sensig': 'sensig_g1', 
        #'sasp_factors': 'sasp_review_all',
    }, 
    add_genes=['CDKN1A', 'CDKN2A', 'ATF3', 'IL6', 'IL1B'],
)

In [None]:
plot_gs_genes(
    adata, 
    'label',
    qder, 
    {
        'senmayo': 'SenMayo_genelist', 
        #'sensig': 'sensig_g1', 
        #'sasp_factors': 'sasp_review_all',
    }, 
    add_genes=['CDKN1A', 'CDKN2A', 'ATF3', 'IL6', 'IL1B'],
    smallest_dot=8.,
    dot_min=.0,
    save='renthal_dotplot.png',
)

In [None]:
gk, des = plot_gs_genes(
    adata, 
    'label',
    qder, 
    {
        'senmayo': 'SenMayo_genelist', 
        #'sensig': 'sensig_g1', 
        #'sasp_factors': 'sasp_review_all',
    }, 
    add_genes=['CDKN1A', 'CDKN2A', 'ATF3', 'IL6', 'IL1B'],
    smallest_dot=8.,
    dot_min=.0,
    return_de=True
)

In [None]:
def detable_from_gs_genes(gk, des):
    rows = []
    for gene_group, genes in gk.items():
        for g in genes:
            for cell_group, de in des.items():
                rows.append([
                    g,
                    g,
                    gene_group,
                    cell_group,
                    de.loc[g]['log2fc'],
                    de.loc[g]['approx-ranksums-fdr-p'],
                    de.loc[g]['is-de'],
                ])
    dpdf = pd.DataFrame(rows, columns=['index', 'gene', 'gene_group', 'cell_group', 'log2fc', 'ranksums-fdr-p', 'is-de'])
    dpdf.set_index('index', inplace=True)
    return dpdf

dpdf = detable_from_gs_genes(gk, des)
dpdf.to_excel(os.path.join(output_dir, 'renthal_dotplot.xlsx'))
dpdf
        
    

In [None]:
fig = plot_score_heatmap(
    adata_log_scale_type, 
    'senmayo_de_sc_score', 
    norm_to_zero=False, 
    title='SenMayo DE Genes Score (neuronal)<br>Normalized to Subtype', 
    xname='Neuronal Subtype',
)
fig.update_layout(font=dict(family='arial', size=34))
pio.write_image(fig, os.path.join(output_dir, 'senmayo_neuron_heatmap.svg'), scale=6, width=1080, height=1080)
pio.write_image(fig, os.path.join(output_dir, 'senmayo_neuron_heatmap.png'), scale=6, width=1080, height=1080)

fig.show()

In [None]:
df = plot_score_heatmap(
    adata_log_scale_type, 
    'senmayo_de_sc_score', 
    norm_to_zero=False, 
    title='SenMayo DE Genes Score (neuronal)<br>Normalized to Subtype', 
    xname='Neuronal Subtype',
    return_df=True
)


df

In [None]:
list(adata.obs['label_subtype'].unique())

In [None]:
rows = []
for subtype in adata.obs['subtype'].unique():
    adata_s = adata[adata.obs['subtype'] == subtype]
    timepoints = list(adata_s.obs['label_subtype'].unique())
    naive = [t for t in timepoints if t.endswith('Naive')][0]
    others = [t for t in timepoints if t != naive]
    adata_n = adata_s[adata_s.obs['label_subtype'] == naive]
    scores_n = adata_n.obs['senmayo_de_sc_score']
    for o in others:
        adata_o = adata_s[adata_s.obs['label_subtype'] == o]
        scores_o = adata_o.obs['senmayo_de_sc_score']
        s, p = scipy.stats.ranksums(scores_o, scores_n)
        ttr = scipy.stats.ttest_ind(scores_o, scores_n)
        ttp = ttr.pvalue
        rows.append([f'{o}:{naive}', subtype, o, naive, p, ttp])
heatmap_stats = pd.DataFrame(rows, columns=['contrast', 'subtype', 'foreground', 'background', 'ranksums_p', 'ttest_p'])
heatmap_stats.set_index('contrast', inplace=True)
heatmap_stats = heatmap_stats.sort_values(by=['foreground'])
heatmap_stats.to_excel(os.path.join(output_dir, 'renthal_heatmap_stats.xlsx'))
heatmap_stats
        
        
        

In [None]:

als = list(adata.obs['atf3_label'].unique())
def sort_key_als(l):
    if l == 'Naive':
        return (0, 0)
        
    h = int(re.match('.*?_(\d+)h_.*$', l).group(1))
    if 'ATF3-' in l:
        return (1, h)
    else:
        return (2, h)

als = list(sorted(als, key=sort_key_als))
    
plot_gs_genes(
    adata, 
    'atf3_label',
    qder_atf3, 
    {
        'senmayo': 'SenMayo_genelist', 
        #'sensig': 'sensig_g1', 
        #'sasp_factors': 'sasp_review_all',
    }, 
    add_genes=['CDKN1A', 'CDKN2A', 'ATF3', 'IL6', 'IL1B'],
    smallest_dot=8.,
    dot_min=.0,
    categories_order=als,
    save='renthal_dotplot_atf3_plusminus.png',
)