In [222]:
import scanpy as sc
import os
import warnings
import anndata
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
import matplotlib.pyplot as plt
from scanpy.metrics.specificity.plot import marker_genes_distribution, one_v_max_genelist
import numpy as np
from scanpy.metrics.specificity.get_data import get_average_celltype_counts


In [223]:
# init functions
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

def find_max_celltype(gene,adata,partition_key="CellType"):
    get_average_celltype_counts(adata , partition_key = partition_key)
    gene_index = int(np.where(adata.var.index == gene)[0])
    max_celltype_index = np.argmax(adata.varm['ave_celltype_counts_CellType'][gene_index,:])
    max_celltype = list(adata.obs['CellType'].cat.categories)[max_celltype_index]
    return(max_celltype)
def show_spec_gene(change):
    output_gene.clear_output()
    with output_gene :
        max_celltype = find_max_celltype(gene=change.new,adata=adata,partition_key="CellType")
        print('Specificity prediction : ' + str(specs.loc[change.new,'spe_type']) )
        print('Most represented celltype, predicted from specificity : ' + max_celltype )

        print('\n Discovair marker information :\n' + str(df_markers_aw.loc[change.new]))
        
        marker_genes_distribution(adata=adata,
                                  gene_list=[change.new],
                                  celltype=max_celltype,
                                  partition_key='CellType')
        one_v_max_genelist(adata=adata,
                           gene_list=[change.new],
                           partition_key='CellType')
        sc.pl.umap(adata=adata, color=change.new, color_map = 'jet')
def show_spec_distrib(change):
    output_marker.clear_output()
    with output_marker :
        spec_markers=markers[change.new].dropna()
        found_markers=spec_markers[spec_markers.isin(adata.var_names)]
        not_found=spec_markers[~spec_markers.isin(adata.var_names)]
        if not not_found.empty:
            print('Gene(s) not found : ' + ','.join(not_found))
        print(specs.loc[found_markers,'spe_type'])
        marker_genes_distribution(adata=adata,
                                  gene_list=spec_markers,
                                  celltype=change.new,
                                  partition_key='CellType')
        one_v_max_genelist(adata=adata,
                           gene_list=spec_markers,
                           partition_key='CellType')
        sc.pl.umap(adata=adata, color=found_markers, color_map = 'jet')

## Load data HCA Barbry

In [224]:
DATA_PATH = '/mnt/d/cellmetrics/'

In [225]:
count_file = 'HCA_Barbry_Grch38_Raw_filter_Norm.h5ad'
adata = anndata.read_h5ad(DATA_PATH + count_file)

In [226]:
adata.obs['CellType'].cat.categories

Index(['AT1', 'AT2', 'B cells', 'Basal', 'Brush cells', 'Cycling Basal',
       'Dendritic', 'Deuterosomal', 'Endothelial', 'Fibroblast', 'Ionocyte',
       'LT/NK', 'Macrophage', 'Mast cells', 'Monocyte', 'Multiciliated',
       'Multiciliated N', 'PNEC', 'Pericyte', 'Plasma cells', 'Precursor',
       'SMG Goblet', 'Secretory', 'Secretory N', 'Serous', 'Smooth muscle',
       'Suprabasal', 'Suprabasal N'],
      dtype='object')

In [227]:
specs = pd.read_csv(DATA_PATH + 'genes_spec_pred_V1', index_col = 'gene')

## Load marker tables

In [228]:
df_markers_aw = pd.read_csv(DATA_PATH + 'SCRINSHOT_Airwaywall_probeset.csv', index_col = 'gene')
df_markers_aw = df_markers_aw[df_markers_aw['selection'] == True]
df_markers_aw.drop('Unnamed: 0',1)

Unnamed: 0_level_0,selection_ranking,selection,AW/Par,pca_gene,tree_gene,list_marker,marker
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
HLA-DRA,1,True,shared,True,"Myeloid,Mast cells,Pericytes,Deuterosomal,Muco...",none,Myeloid
AREG,1,True,shared,True,"Club,Mast cells",none,Mast cells
IFITM1,1,True,shared,True,"(Vascular) Endothelial cell,Lymphatic EC",none,(Vascular) Endothelial cell
CST3,1,True,shared,True,"Lymphocyte,Myeloid,Chondrocytes",none,"Chondrocytes,Myeloid"
S100A4,1,True,shared,True,"(Vascular) Endothelial cell,Basal,Myeloid",none,Myeloid
S100A9,1,True,shared,True,Myeloid,none,Myeloid
VIM,1,True,shared,True,"Basal,Club,Lymphatic EC",none,Lymphatic EC
MT1A,1,True,shared,True,Pericytes,none,Pericytes
TPSAB1,1,True,shared,True,Mast cells,none,Mast cells
IGFBP7,1,True,shared,True,"Lymphatic EC,Myeloid,Pericytes,Duct SMG,Goblet",none,"Lymphatic EC,Pericytes"


In [229]:
df_markers_par = pd.read_csv(DATA_PATH + 'SCRINSHOT_Parenchyma_probeset.csv', index_col = 'gene')
df_markers_par = df_markers_par[df_markers_par['selection'] == True]
df_markers_par.drop('Unnamed: 0',1)

Unnamed: 0_level_0,selection_ranking,selection,AW/Par,pca_gene,tree_gene,list_marker,marker
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
IL7R,1,True,shared,True,"(Vascular) Endothelial cell,Cappillary (G) end...",none,Cappillary (G) endothelial cell
S100A4,1,True,shared,True,"(Vascular) Endothelial cell,Basal,Myeloid,Capp...",none,Myeloid
S100A9,1,True,shared,True,Myeloid,none,Myeloid
MT1A,1,True,shared,True,Pericytes,none,Pericytes
IFITM1,1,True,shared,True,"(Vascular) Endothelial cell,Lymphatic EC",none,(Vascular) Endothelial cell
CXCR4,1,True,shared,True,"Lymphocyte,Mast cells",none,Lymphocyte
HLA-DRA,1,True,shared,True,"Myeloid,Mast cells,Pericytes",none,Myeloid
FOS,1,True,shared,True,Basal,none,Basal
TIMP1,1,True,shared,True,"Adventitial Fibroblast,Mesothelial cell",none,"Adventitial Fibroblast,Mesothelial cell"
VIM,1,True,shared,True,"Basal,Club,Lymphatic EC,AT1,Cappillary (G) end...",none,Lymphatic EC


# Display Prediction on marker from Airway wall

Specificity prediction are in 5 categories : equirep low multirep high unique

In [230]:
combobox_genes = widgets.Combobox(
    options=list(df_markers_aw.index),
    placeholder='Type a gene',
    description='Gene:',
    ensure_option=True,
    disabled=False,
)
output_gene=widgets.Output()
combobox_genes.observe(show_spec_gene,names='value')
display(combobox_genes)
display(output_gene)

Combobox(value='', description='Gene:', ensure_option=True, options=('HLA-DRA', 'AREG', 'IFITM1', 'CST3', 'S10…

Output()

# Display Prediction on marker from Parenchyma

In [231]:
combobox_genes = widgets.Combobox(
    options=list(df_markers_par.index),
    placeholder='Type a gene',
    description='Gene:',
    ensure_option=True,
    disabled=False,
)
output_gene=widgets.Output()
combobox_genes.observe(show_spec_gene,names='value')
display(combobox_genes)
display(output_gene)

Combobox(value='', description='Gene:', ensure_option=True, options=('IL7R', 'S100A4', 'S100A9', 'MT1A', 'IFIT…

Output()

### Save specificity prediction in a table

In [232]:
# Find airway wall marker gene not in our dataset
print("Marker genes not present in dataset :",df_markers_aw.index.difference(specs.index))

Marker genes not present in dataset : Index(['ACAN', 'MT-ATP8', 'PLD5', 'RPS17', 'RPS29'], dtype='object', name='gene')


In [233]:
list_markers = [x for x in df_markers_aw.index if x in specs.index]
df_markers_aw.loc[list_markers,"Specificity_Barbry"] = specs.loc[list_markers,"spe_type"]
list_celltype = [find_max_celltype(gene,adata,partition_key="CellType") for gene in list_markers]
df_markers_aw.loc[list_markers,"Cell_type_Spe_Barbry"] = list_celltype

In [234]:
# Find parenchyma marker gene not in our dataset
print("Marker genes not present in dataset :",df_markers_par.index.difference(specs.index))

Marker genes not present in dataset : Index(['MT-ATP8', 'NAPSA', 'RPS17', 'RPS29'], dtype='object', name='gene')


In [235]:
list_markers = [x for x in df_markers_par.index if x in specs.index]
df_markers_par.loc[list_markers,"Specificity_Barbry"] = specs.loc[list_markers,"spe_type"]
list_celltype = [find_max_celltype(gene,adata,partition_key="CellType") for gene in list_markers]
df_markers_par.loc[list_markers,"Cell_type_Spe_Barbry"] = list_celltype

## Load data HCA Meyer

In [236]:
df_meyer = pd.read_csv(DATA_PATH + 'Gene_Meyer_Barbry_comparison.txt', sep='\t', index_col = 'Genes')

In [249]:
list_markers = [x for x in df_markers_aw.index if x in df_meyer.index]
list_spe = list(df_meyer.loc[list_markers,"spe_type_Meyer"])
df_markers_aw.drop('Unnamed: 0',1)
df_markers_aw.loc[list_markers,"Spqdqzecificity_Meyer"] = list_spe
#df_markers_aw.loc[list_markers,"Ceqzdll_type_Spe"] = df_meyer.loc[list_markers,"CellType"]
df_markers_aw.to_csv(DATA_PATH + 'SCRINSHOT_Airwaywall_probeset_specificity.csv')

ValueError: Must have equal len keys and value when setting with an iterable

In [219]:
list_markers = [x for x in df_markers_par.index if x in df_meyer.index]
#df_markers_par.loc[list_markers,"Specificity_Meyer"] = df_meyer.loc[list_markers,"spe_type_Meyer"]
#df_markers_par.loc[list_markers,"Cell_type_Spe"] = df_meyer.loc[list_markers,"CellType"]
df_markers_par.to_csv(DATA_PATH + 'SCRINSHOT_Parenchyma_probeset_specificity.csv')