In [None]:
import getpass
import sys
import os
import os.path
home = os.environ["HOME"]
sys.path.append(f"{home}/bioinformatics/lib")
sys.path.append(f"{home}/bioinformatics/notebooks/analysis/chronic_pain/publishable/src/lib")

# actual libraries
import re
import logging
import numpy as np
import pandas as pd
import scanpy as sc
import scipy.stats as sps
from anndata import AnnData
import anndata
from collections import defaultdict, OrderedDict
import plotly.express.colors as pxcolors
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots

# local to this analysis
import de
import plotting
import scoring
import signatures
import util

# need to stop using
from rubedo.platform.client import dev_instance
from rubedo.platform.target_discovery.filters import create_binary_annotation

# for development
from importlib import reload


FORMAT = '%(asctime)-15s %(message)s'
logging.basicConfig(format=FORMAT)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

sc.settings.verbosity = 3
sc.logging.print_header()
sc.settings.set_figure_params(dpi=120, dpi_save=480)

In [None]:
dataset = 'GSE155622'

figure_dir = '../../build/figures'
sc.settings.figdir = figure_dir
supplement_dir = '../../build/supplement'

neuron_filter = {
    'cell_type': [
        'Mrgprd/Gm7271',
        'Mrgpra3/Mrgprb4',
        'Cldn9',
        'Mrgprd/Lpar3',
        'S100b/Smr2',
        'S100b/Ntrk3/Gfra1',
        'Zcchc12/Sstr2',
        'S100b/Prokr2',
        'Th/Fam19a4',
        'Zcchc12/Dcn',
        'S100b/Baiap2l1',
        'Mrgpra3',
        'Zcchc12/Trpm8',
        'Nppb',
        'S100b/Wnt7a',
        'Zcchc12/Rxfp1',
        'Atf3/Mrgprd',
        'Atf3/Gfra3/Gal',
        'Atf3/S100b/Gal',
        'Atf3/Fam19a4',
        'Neuron',
    ],
}
non_neuron_filter = {
    'cell_type': [
        'Fibroblast',
        'Immune',
        'Red_blood_cell',
        'Satellite',
        'Schwann',
        'VEC',
        'VECC',
        'VSMC',
        'Immune Cell',
        'Red Blood Cell',
        'Satellite Cell',
        'Schwann Cell',
        'RBC'
    ],
}

# config for deg selection
l2fc_thresh = .6


In [None]:
#client = dev_instance()
#broker = client.get_databroker(dataset)
#adata = broker.load_variant(variant)
adata = sc.read_h5ad(os.path.join('../../build/datasets', dataset, f'{dataset}.h5ad'))
adata

In [None]:
adata.obs['condition'].unique()

In [None]:
# clean up the author supplied condition column a lttle bit

if 'old_condition' in adata.obs:
    adata.obs['condition'] == adata.obs['old_condition']
def get_hour(x):
    m = re.match('.*?(\d+)([hd]).*', x)
    if m is None:
        return 0
    elif m.group(2) == 'h':
        return int(m.group(1))
    else:
        return int(m.group(1)) * 24

def h_or_d(h):
    if h >= 24:
        return f'{int(h / 24):02}d'
    else:
        return f'{h:02}h'
        
adata.obs['hour'] = [get_hour(x) for x in adata.obs['condition']]
adata.obs['hourday'] = [h_or_d(h) for h in adata.obs['hour']]



In [None]:
# keep the old condition annotation, but put in the cleaned up one
adata.obs['old_condition'] = adata.obs['condition']
adata.obs['condition'] = [f'SNI_{hd}' for hd in adata.obs['hourday']]
adata.obs['condition'].unique()

In [None]:
# just for confirmation, produce umap of dataset
adata_scaled = adata.copy()
sc.pp.log1p(adata_scaled)
sc.pp.scale(adata_scaled, zero_center=False)
sc.pp.pca(adata_scaled)
sc.pp.neighbors(adata_scaled)
sc.tl.umap(adata_scaled)

In [None]:
# plot a few umaps for confirmation of annotations
sc.pl.umap(adata_scaled, color='cell_type')

In [None]:
# add a 'compartment' annotation to clearly denote neurons from non-neurons using author supplied cell types included
# with the dataset
neuron_mask = util.adata_filter_mask(adata_scaled, neuron_filter)
compartment = ['neuron' if m else 'non-neuron' for m in neuron_mask]
adata.obs['compartment'] = compartment
adata_scaled.obs['compartment'] = compartment

In [None]:
sc.pl.umap(adata_scaled, color='compartment')

In [None]:
sc.pl.umap(adata_scaled, color='condition')

In [None]:
sc.tl.leiden(adata_scaled)
sc.pl.umap(adata_scaled, color='leiden')


In [None]:
adata_neuron = util.adata_filter(adata, neuron_filter)
adata_neuron

In [None]:
list(sorted(adata_neuron.obs['condition'].unique()))

In [None]:
list(sorted(adata_neuron.obs['hour'].unique()))

In [None]:
# generate contrasts of each model timepoint against the Naive control
hour_condition_map = {}
for hour, condition in zip(adata_neuron.obs['hour'], adata_neuron.obs['condition']):
    hour_condition_map[hour] = condition

naive = None
contrasts = {}
condition_order = []
for hour in sorted(adata_neuron.obs['hour'].unique()):
    condition = hour_condition_map[hour]
    condition_order.append(condition)
    if hour == 0:
        naive = condition
    else:
        contrast_name = f'{condition}:{naive}'
        fg_filter = {'condition': condition}
        bg_filter = {'condition': naive}
        contrasts[contrast_name] = (fg_filter, bg_filter)
contrasts


In [None]:
des_neuron = de.differential_expression(adata_neuron, contrasts, tests=['ranksums'])
des_neuron['SNI_02d:SNI_00h']

In [None]:
# use the flag_de function to add a boolean 'is-de' column to each DataFrame
# log2fc_thresh: abs(log2fc) must be greater than this
# p_column: which computed statistic should be used as a significance threshold
# p_thresh: the value of the p_column must be <= p_thresh
de.flag_de(des_neuron, log2fc_thresh=l2fc_thresh, p_column='ranksums-fdr-p', p_thresh=.05)

# show an example differential expression table
det = des_neuron['SNI_02d:SNI_00h']
det[det['is-de']]

In [None]:
# plot differentially expressed genes from included gene sets
# shows only genes that are differential expressed in at least one of the differential expression tables
plotting.plot_de_genes(
    adata_neuron, 
    'condition', 
    des_neuron, 
    genesets={'senmayo': 'senmayo_mouse', 'sasp_review': 'sasp_review_mouse'}, 
    genes=['Cdkn1a', 'Cdkn2a', 'Atf3', 'Il6', 'Il1b'],
    sort_genes_by='expr',
    smallest_dot=8.,
    dot_min=.0,
    categories_order=condition_order,
    save='wang_senescence.png',
)

In [None]:
# output also the stats for the above plot
de_summary = de.summarize_de_genes(
    des_neuron, 
    genesets={'senmayo': 'senmayo_mouse', 'sasp_review': 'sasp_review_mouse'}, 
    genes=['Cdkn1a', 'Cdkn2a', 'Atf3', 'Il6', 'Il1b'],
)
de_summary.to_excel(os.path.join(supplement_dir, 'wang_senescence.xlsx'))
de_summary

In [None]:
# output all differentially expressed genes for neuron timepoints, subtypes, and atf3 positivity combinations
with pd.ExcelWriter(os.path.join(supplement_dir, 'wang_differential_expression.xlsx')) as writer:
    for de_name, de in des_neuron.items():
        sheet_name = re.sub(':', '_vs_', de_name)
        de.to_excel(writer, sheet_name=sheet_name)