# init

In [None]:
import scanpy as sc
import scvi
import pandas as pd
import math
import tqdm as notebook_tqdm
import celltypist
from celltypist import models
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns
import gc
from IPython.display import display, Image
scvi.settings.seed = 0

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
def build_violin(sample, rows=1, cols=3, size=(15, 5), suptitle=None,
                 metrics=['total_counts','n_genes_by_counts','pct_counts_mt'],
                 threshold_line=True, tline_coords=[[40_000], [200, 6_000] , [20]] ):
    
    fig, axes = matplotlib.pyplot.subplots(rows, cols, figsize=size)

    # setting up the subtitle
    if suptitle is None:
        fig.suptitle(f'{sample.uns["sample_name"]}')
    else:
        fig.suptitle(f'{suptitle}')
        
    # plotting graphs on the specified axes
    for i, metric in zip(range(len(axes)), metrics):
        
        axes[i].grid(lw=0.4, ls='--', alpha=0.4)
        axes[i].minorticks_on()
        
        sc.pl.violin(
            sample, [metric],
            jitter=0.4, multi_panel=False, 
            ax=axes[i], show=False)
        
        # add a threshold line that visualizes the future filtering process
        if threshold_line:
            if len(tline_coords[i]) > 1:
                [axes[i].axhline(cord, color='red', ls='--', lw=0.5, label='threshold') for cord in tline_coords[i]]
            else:
                axes[i].axhline(tline_coords[i], color='red', ls='--', lw=0.5, label='threshold')

    return fig, axes

Let's create a dictionary with the following structure `dict[key] = value` --> `dict[gene_symbol] = gene_id`: 
* It will be used to create a query.

In [None]:
names_df = pd.read_csv('./GSE190856_RAW/H001/features.tsv.gz', sep = '\t', header=None, index_col=0)
names_df.columns = ['gene_symbols', 'Gene Expression']

names_dict = {names_df['gene_symbols'].at[i]:i
              for i in names_df.index}
    

In [None]:
names_df

In [None]:
names_dict['Zbp1']


In [None]:
del(names_df)
gc.collect()

In [None]:
def setup_ax(ax, sample, ann_fontsize, add_numbers=False, marker_size=10, cols=7):
    ax.legend_.remove()

    if add_numbers:
        labels = [f'№{index} : {label}' for index, label in enumerate(sample.obs['celltypist_cell_label_coarse'].cat.categories, start=1)]
    else:
        labels = sample.obs['celltypist_cell_label_coarse'].cat.categories
    
    # Make new Legend
    l1=ax.legend(
    
        # Add Legend element for each color group
        handles=[
            # Instead of Line2D we can also use other matplotlib objects, such as Patch, etc.
            matplotlib.lines.Line2D([0], [0], marker='s', color=c, lw=0,
                   label=l, markerfacecolor=c, markersize=marker_size) # <---------------------------------------------------------------- setup!
            # Color groups in adata
            for l,c in zip(
                labels,
                sample.uns['celltypist_cell_label_coarse_colors'])],
    
        # Customize Legend outline
    
        # Remove background
        frameon=False,
        # Make more Legend columns
        ncols=cols, # <---------------------------------------------------------------- setup!
        fontsize=ann_fontsize, # <---------------------------------------------------------------- setup!
        # Set title
        title=''
    )

    return labels

In [None]:
def __axes_handler__(axes, row, column, ylabel=None, ylabsize=None):

    axes[row][column].axis('on')
    axes[row][column].tick_params(top='off', bottom='off', left='off', right='off',
                                labelleft='on', labelbottom='off')
    
    if ylabel == None:
        axes[row][0].set_ylabel('')
    else:
        axes[row][0].set_ylabel(ylabel, rotation=90, fontsize=ylabsize)
        axes[row][0].set_xlabel('')
        axes[row][0].set(frame_on=False)

In [None]:
# samples -

# nrows - number of rows on the figure
# ncols - number of columns on the figure
# vmax - maximum cmap value
# cmap - cmap type
# wspace - the distance between axes
# color - keys for annotations of observations/cells or variables/genes, e.g., 'ann1' or ['ann1', 'ann2']
    
def plot_custom_UMAP(anndata, samples, color, title,
                     ylab, xlab, ann_fontsize=10, marker_size=10,
                     annatation_cols=7, an_x=2, an_y=1.3, add_numbers=False,
                     nrows=4, ncols=4, vmax=6.8,
                     vmin=None, size=30, wspace=0,
                     titlesize=20, xlabsize=20,
                     ylabsize=20, figsize=(10, 10),
                     GeneName2EnsambleID=names_dict,
                     loc_legend = 'right margin',
                     cmap=sns.blend_palette(['#d3d3d3','red'], as_cmap=True)):

    # create matplotlib figure and axies
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, layout=None)
    plt.subplots_adjust(wspace=wspace)
    labels=''
    colorbar_loc = None

    
    # to plot UMAP automatically
    # key is gene name or cell type etc.
    # sample is number of sample in aggregated dataset
    for key, row in zip(color, range(nrows)):
        
        for sample, column in zip(samples, range(ncols)):

            target = [GeneName2EnsambleID.get(key, key)]
            
            sc.pl.umap(anndata[anndata.obs.Sample == str(sample)], use_raw=None,
                       color=target, cmap=cmap, frameon=False,
                       size=size, title='', vmax=vmax, vmin=vmin, colorbar_loc=colorbar_loc,
                       ax=axes[row][column], show=False, legend_loc=loc_legend, )

            if column == 0:
                __axes_handler__(axes, row, column, ylabel=ylab[row], ylabsize=ylabsize)
            if column == ncols - 2:
                colorbar_loc = 'right'
            else:
                colorbar_loc = None
            
            if row == 0:
                axes[0][column].set_title(title[int(column)], fontsize = titlesize)
                
                if loc_legend != None:
                    labels = setup_ax(axes[0][0], anndata, ann_fontsize, cols=annatation_cols,
                             marker_size=marker_size, add_numbers=add_numbers)
                    axes[0][0].legend_.set_bbox_to_anchor((an_x, an_y))
                    loc_legend = None
            if row == 1:
                axes[1][column].set_title(xlab[int(column)], fontsize = xlabsize)
                
    return fig, axes, labels
    

In [None]:
# ytline_coords -- list
# samples -- list

def build_scatter(y, samples, ytline_coords=None, rows=1, cols=4, size=(15, 5), suptitle=None,
                     x='total_counts', threshold_line=True, xtline_coords=[40_000], c=0):
   
    fig, axes = matplotlib.pyplot.subplots(rows, cols, figsize=size)

    for sample, ax in zip(samples, axes):
        
        ax.grid(lw=0.4, ls='--', alpha=0.4)
        ax.minorticks_on()

        sc.pl.scatter(sample, x=x, y=y, ax=ax, show=False, title=sample.uns['sample_name'])

        # processing of threshold values
        if threshold_line:
            if len(ytline_coords) == 2:
                for i, cord in enumerate(ytline_coords):
                    if i == 1:
                        ax.axhline(cord, color='red', ls='--', lw=0.5, label='upper and lower tr. y')
                    else:
                        ax.axhline(cord, color='red', ls='--', lw=0.5)
            elif len(ytline_coords) == 1:
                ax.axhline(ytline_coords[0], color='red', ls='--', lw=0.5, label='upper tr. y')
                
            ax.axvline(xtline_coords[0], color='orange', ls='--', lw=0.5, label='upper tr. x')
    
        if c > 0:
            ax.set_ylabel('')
        elif c ==0 and threshold_line:
            ax.legend()
            
        c+=1
            
    return fig, axes

In [None]:
models.download_models(force_update=True, model=["Adult_Mouse_Gut.pkl"])
mmouse = models.Model.load(model="Adult_Mouse_Gut.pkl")
mmouse.convert('IDs2symbols.csv')

WT Metadata add

In [None]:
WT_NAMES = ['WT steady state (H003)','WT 3 days later (H004)',
            'WT 7 days later (H007)','WT 21 days later (H009)']

WT_VARIABLES= ['WT_SS', 'WT_3DL', 'WT_7DL', 'WT_21DL']

WT_SS_METADATA = {'sample_name': 'WT steady state (H003)',
                  'var_name': 'WT_SS',
                  'cell_count': 4707,
                  'gene_count': 55450,
                  'filtered_cell_count': 4106,
                  'filtered_gene_count': 25569}

WT_3DL_METADATA = {'sample_name': 'WT 3 days later (H004)',
                   'var_name': 'WT_3DL',
                   'cell_count': 12046,
                   'gene_count': 55450,
                   'filtered_cell_count': 11086,
                   'filtered_gene_count': 27268}

WT_7DL_METADATA = {'sample_name': 'WT 7 days later (H007)',
                    'var_name': 'WT_7DL',
                    'cell_count': 10809,
                    'gene_count': 55450,
                    'filtered_cell_count': 10148,
                    'filtered_gene_count': 26595}

WT_21DL_METADATA = {'sample_name': 'WT 21 days later (H009)',
                     'var_name': 'WT_21DL',
                     'cell_count': 9234,
                     'gene_count': 55450,
                     'filtered_cell_count': 8807,
                     'filtered_gene_count': 26944}

WT_METADATA = [WT_SS_METADATA, WT_3DL_METADATA, WT_7DL_METADATA, WT_21DL_METADATA]

PAIR Metadata add

In [None]:
P_NAMES = ['Trem2 KO\nsteady state (H005)','Trem2 KO\n3 d. post CLP (H006)',
           'Trem2 KO\n7 d. post CLP (H008)', 'WT littermate control\nsteady state (H001)',
           'WT littermate control\n3 d. post CLP (H002)', 'WT littermate control\n7 d. post CLP (H011)']

P_VARIABLES= ['Trem2_KO_SS', 'Trem2_KO_3DL', 'Trem2_KO_7DL',
              'WT_LC_SS', 'WT_LC_3DL', 'WT_LC_7DL']

Trem2_KO_SS_METADATA = {'sample_name': 'Trem2 KO\nsteady state (H005)',
                         'var_name': 'Trem2_KO_SS',
                         'cell_count': 6982,
                         'gene_count': 55450,
                         'filtered_cell_count': 6325,
                         'filtered_gene_count': 25695}

Trem2_KO_3DL_METADATA = {'sample_name': 'Trem2 KO\n3 d. post CLP (H006)',
                         'var_name': 'Trem2_KO_3DL',
                         'cell_count': 8627,
                         'gene_count': 55450,
                         'filtered_cell_count': 7565,
                         'filtered_gene_count': 27275}

Trem2_KO_7DL_METADATA = {'sample_name': 'Trem2 KO\n7 d. post CLP (H008)',
                         'var_name': 'Trem2_KO_7DL',
                         'cell_count': 11635,
                         'gene_count': 55450,
                         'filtered_cell_count': 11046,
                         'filtered_gene_count': 27469}

WT_LC_SS_METADATA = {'sample_name': 'WT littermate control\nsteady state (H001)',
                     'var_name': 'WT_LC_SS',
                     'cell_count': 9273,
                     'gene_count': 55450,
                     'filtered_cell_count': 8651,
                     'filtered_gene_count': 27842}

WT_LC_3DL_METADATA = {'sample_name': 'WT littermate control\n3 d. post CLP (H002)',
                         'var_name': 'WT_LC_3DL',
                         'cell_count': 7210,
                         'gene_count': 55450,
                         'filtered_cell_count': 5484,
                         'filtered_gene_count': 26314}

WT_LC_7DL_METADATA = {'sample_name': 'WT littermate control\n7 d. post CLP (H011)',
                     'var_name': 'WT_LC_7DL',
                     'cell_count': 10689,
                     'gene_count': 55450,
                     'filtered_cell_count': 9855,
                     'filtered_gene_count': 26850}

PAIR_METADATA = [Trem2_KO_SS_METADATA, Trem2_KO_3DL_METADATA, Trem2_KO_7DL_METADATA,
                 WT_LC_SS_METADATA, WT_LC_3DL_METADATA, WT_LC_7DL_METADATA]

In [None]:
PAIR_METADATA

# PART1: WT

## Importing wild-type mouse samples (Trem2+)

* There are 4 time points:
    * Steady state (SS)       --     H003
    * 3 days later (3DL)      --     H004
    * 7 days later (7DL)      --     H007
    * 21 days later (21DL)    --     H009

### WT SS

<font color=red>! **SS mice have half as many cells in the sample.**</font>

* 5 thousands vs 10 thousands cells

In [None]:
WT_SS = sc.read_10x_mtx(
    './GSE190856_RAW/H003/',  
    var_names='gene_ids',      
    cache=True)

WT_SS.var_names_make_unique()

WT_SS

In [None]:
WT_SS.var

### WT 3DL

In [None]:
WT_3DL = sc.read_10x_mtx(
    './GSE190856_RAW/H004/',  
    var_names='gene_ids',      
    cache=True)

WT_3DL.var_names_make_unique()

WT_3DL

### WT 7DL

In [None]:
WT_7DL = sc.read_10x_mtx(
    './GSE190856_RAW/H007/',  
    var_names='gene_ids',      
    cache=True)

WT_7DL.var_names_make_unique()

WT_7DL

### WT 21DL

In [None]:
WT_21DL = sc.read_10x_mtx(
    './GSE190856_RAW/H009/',  
    var_names='gene_ids',      
    cache=True)

WT_21DL.var_names_make_unique()

WT_21DL

Let's combine all 4 objects into one list:

In [None]:
WT_SAMPELS = [WT_SS, WT_3DL, WT_7DL, WT_21DL]

Let's add the names of the samples and number of cells to the anndata objects.

In [None]:
for obj, name, var in zip(WT_SAMPELS, WT_NAMES, WT_VARIABLES):
    obj.uns['sample_name'] = name
    obj.uns['var_name'] = var
    obj.uns['cell_count'] = (obj.obs).shape[0]
    obj.uns['gene_count'] = (obj.var).shape[0]

obj.uns

In [None]:
sample.uns

## An additional block. The genes with the highest expression.

In [None]:
sc.pl.highest_expr_genes(WT_SS, n_top=20, gene_symbols='gene_symbols')

In [None]:
sc.pl.highest_expr_genes(WT_3DL, n_top=20, gene_symbols='gene_symbols')

In [None]:
sc.pl.highest_expr_genes(WT_7DL, n_top=20, gene_symbols='gene_symbols')

In [None]:
sc.pl.highest_expr_genes(WT_21DL, n_top=20, gene_symbols='gene_symbols')

## Let's filter the data

Фильтрация, которую применяли в оригинальной [статье](https://www.nature.com/articles/s42255-022-00715-5):
```
All genes expressed in fewer than one cell were removed. Cells expressing fewer than 200 and more than 6,000 genes, unique molecular identifier counts more than 40,000 and the percent of mitochondrial DNA (mtDNA) gene expression more than 20% were excluded. Mitochondrial genes were excluded from the expression matrix.
```

We will first calculate the quality metrics for each of the samples.

In [None]:
for sample in WT_SAMPELS:
    sample.var['mt'] = sample.var['gene_symbols'].str.startswith('mt-')
    sc.pp.calculate_qc_metrics(sample, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [None]:
WT_SS.obs

### WT samples before filtering: violinplts whith `total_counts`, `n_genes_by_counts` and `pct_counts_mt`.

In [None]:
matplotlib.pyplot.bar(WT_VARIABLES, [sample.uns['cell_count'] for sample in WT_SAMPELS],
                      color=['blue', 'red', 'green', 'orange'], alpha=0.5)

matplotlib.pyplot.grid(lw=0.4, ls='--', alpha=0.4, axis='y')
matplotlib.pyplot.minorticks_on()

matplotlib.pyplot.title('Samples')
matplotlib.pyplot.xlabel('Samples')
matplotlib.pyplot.ylabel('number of cells')

for y,x in zip([sample.uns['cell_count'] for sample in WT_SAMPELS], WT_VARIABLES):
    matplotlib.pyplot.text(x, y-500, str(y), ha='center')

In [None]:
for sample in WT_SAMPELS:
    build_violin(sample)

In [None]:
build_scatter(y='pct_counts_mt', samples=WT_SAMPELS, ytline_coords=[20])

In [None]:
build_scatter(y='n_genes_by_counts', samples=WT_SAMPELS, ytline_coords=[6_000, 200])

### filtering WT samples

In [None]:
WT_SAMPELS[0].var.mt.value_counts()

In [None]:
for i, sample in enumerate(WT_SAMPELS):
    
    # cells expressing less than 200 and more than 6_000 genes are removed
    sc.pp.filter_cells(sample, min_genes=200)
    sc.pp.filter_cells(sample, max_genes=6_000)

    # cells containing more than 40_000 counts are removed
    sc.pp.filter_cells(sample, max_counts=40_000)
    
    # genes expressed in less than one cell are removed
    sc.pp.filter_genes(sample, min_cells=1)
    
    # filtering cells by mt-content
    WT_SAMPELS[i] = WT_SAMPELS[i][WT_SAMPELS[i].obs.pct_counts_mt < 20, :] 

    # to except mt- genes
    WT_SAMPELS[i] = WT_SAMPELS[i][:, WT_SAMPELS[i].var.mt == False]

    # genes / celss
    WT_SAMPELS[i].uns['filtered_cell_count'] = (WT_SAMPELS[i].obs).shape[0]
    WT_SAMPELS[i].uns['filtered_gene_count'] = (WT_SAMPELS[i].var).shape[0]

In [None]:
WT_SAMPELS[0].var.mt.value_counts()

In [None]:
matplotlib.pyplot.bar(WT_VARIABLES, [sample.uns['filtered_cell_count'] for sample in WT_SAMPELS],
                      color=['blue', 'red', 'green', 'orange'], alpha=0.5)

matplotlib.pyplot.grid(lw=0.4, ls='--', alpha=0.4, axis='y')
matplotlib.pyplot.minorticks_on()

matplotlib.pyplot.title('Filtred samples')
matplotlib.pyplot.xlabel('Samples')
matplotlib.pyplot.ylabel('number of cells')

for y,x in zip([sample.uns['filtered_cell_count'] for sample in WT_SAMPELS], WT_VARIABLES):
    matplotlib.pyplot.text(x, y-500, str(y), ha='center')

In [None]:
matplotlib.pyplot.bar(WT_VARIABLES, [sample.uns['filtered_gene_count'] for sample in WT_SAMPELS],
                      color=['blue', 'red', 'green', 'orange'], alpha=0.5)

matplotlib.pyplot.grid(lw=0.4, ls='--', alpha=0.4, axis='y')
matplotlib.pyplot.minorticks_on()

matplotlib.pyplot.title('Filtred samples')
matplotlib.pyplot.xlabel('Samples')
matplotlib.pyplot.ylabel('number of genes')

for y,x in zip([sample.uns['filtered_gene_count'] for sample in WT_SAMPELS], WT_VARIABLES):
    matplotlib.pyplot.text(x, y-1500, str(y), ha='center')

In [None]:
for sample in WT_SAMPELS:
    build_violin(sample, threshold_line=False)

In [None]:
build_scatter(y='pct_counts_mt', samples=WT_SAMPELS, threshold_line=False)

In [None]:
build_scatter(y='n_genes_by_counts', samples=WT_SAMPELS, threshold_line=False)

## Let's perform preliminary data processing

In the article normalization was carried out as follows:
```
To visualize the data, we used the Seurat package for further analysis. 

First, we used the Log Normalizer method of the Normalization function of the Seurat package to assess the expression value of genes. Second, we performed principal-component analysis on the normalized expression matrix with highly variable genes identified by FindVariableGenes function. Based on the top ten principal components, we obtained the unsupervised cell cluster result by weighted shared nearest neighbor graph-based clustering method. To detect cluster-specific genes, we identified the marker genes by the bimod (Likelihood-ratio test) of the FindAllMarkers function in Seurat. Compared to other clusters, the genes whose expression was more than 25% of the cells and average log (fold change) >0.26 in the target cluster, were defined as marker genes. Cell types were defined based on known markers. Cells expressing non-immune cell markers were excluded.
```

### Preparing combined data for plot UMAP visualisation

We want to get a good UMAP visualization, so it is advisable to combine the samples.
* We will use `scvi` tool.

In [None]:
WT_COMBINED =  WT_SAMPELS[0].concatenate(WT_SAMPELS[1], WT_SAMPELS[2], WT_SAMPELS[3], batch_key="Sample")

The combined object consists of 34147 cells.

We can define a sample using the `batch_key` field.

In [None]:
WT_COMBINED.obs.head(2)

In [None]:
WT_COMBINED.obs.tail(2)

`scvi` needs raw counts, let's save them on the new layer.

In [None]:
WT_COMBINED.layers['counts'] = WT_COMBINED.X.copy()

In [None]:
WT_COMBINED

### normalization, log-transform, scalling and simple annotate the samples

We gonna use celltypist, let's create objects for this.

In [None]:
WT_CELLTYPIST = []
for i in range(len(WT_SAMPELS)):
    WT_CELLTYPIST.append(WT_SAMPELS[i].copy())

Standard data transformation.

In [None]:
for i in range(len(WT_SAMPELS)):
    sc.pp.normalize_total(WT_SAMPELS[i], target_sum=1e4) # 1e4 is Seurat default param
    sc.pp.log1p(WT_SAMPELS[i])
    sc.pp.scale(WT_SAMPELS[i], max_value=10)

#### scVI transformation

... and for combined object:

In [None]:
sc.pp.normalize_total(WT_COMBINED, target_sum=1e4)
sc.pp.log1p(WT_COMBINED)

In [None]:
# Выделение высоковариабельных генов в данном случае не требуется, сет не большой + нужно учитывать ZBP1
# sc.pp.highly_variable_genes(WT_COMBINED, n_top_genes=1.5e4, subset=True, layer='counts', flavor='seurat_v3', batch_key='Sample')

We will indicate the features that scvi will use. Note that we save raw counts in layer 'counts'; `Сategorical_covariate_keys` indicate samples and `continuous_covariate_keys` indicate some features.

We will be ignore WARNINGS below because actually CUDA works.

In [None]:
scvi.model.SCVI.setup_anndata(WT_COMBINED, layer='counts', 
                             categorical_covariate_keys=['Sample'],
                             continuous_covariate_keys=['pct_counts_mt', 'total_counts'])

Let's initialize new scvi model based on `WT_COMBINED` object. 

In [None]:
model = scvi.model.SCVI(WT_COMBINED)

Let's print some models description.

In [None]:
model

... and train the model. (~ 13-14 minutes on RTX 3060)

In [None]:
model.train()

We save latent representation in `latent` variable.

In [None]:
latent = model.get_latent_representation()

In [None]:
latent

To add latent representation in `.obsm['X_scVI']` attribute of `WT_COMBINED` anndata object. 

In [None]:
WT_COMBINED.obsm['X_scVI'] = latent

we save scVI normalized gene expression additionally.

In [None]:
WT_COMBINED.layers['scVI_normalized'] = model.get_normalized_expression(library_size=1e4)

Scaling data. We need to use scaling because all genes should be equally significant.

In [None]:
sc.pp.scale(WT_COMBINED, max_value=10)

When we want to use `sc.pp.neighbors()` with scvi data, we need to specify the `use_rep` parameter.

In [None]:
sc.pp.neighbors(WT_COMBINED, use_rep='X_scVI', metric='cosine', n_neighbors=18)
sc.tl.umap(WT_COMBINED, min_dist=0.3)

Clustering can be skipped before splitting into subpopulations.

In [None]:
sc.tl.leiden(WT_COMBINED, resolution=0.5)

#### To prepare the celltypist annotation

We will use celltypist model for automatic cell types annotation.

We use a mouse gut model, since no immune model has been created for the mouse.

(there are many immune cells in the mouse gut, suitable for approximate classification)

In [None]:
models.download_models(force_update=True, model=["Adult_Mouse_Gut.pkl"])
mmouse = models.Model.load(model="Adult_Mouse_Gut.pkl")

We use the ensemble identifiers as Indexes, so we need to prepare a file linking them to the gene symbols. By default, the model uses gene symbols.

In [None]:
with open('IDs2symbols.csv', 'w') as f:
    for Symbol in names_dict.keys():
        f.write(f'{names_dict[Symbol]},{Symbol}\n')

In [None]:
mmouse.convert('IDs2symbols.csv')

#### *Annotation of combine sample

Create a copy of the anndata object containing the scvi result.
* We will use 'counts' layer because it contains raw data.

In [None]:
WT_COMBINED_CellTypist = WT_COMBINED.copy()
WT_COMBINED_CellTypist.X = WT_COMBINED.layers["counts"]

`WT_COMBINED_CellTypist.X` raw data below:

In [None]:
print(WT_COMBINED_CellTypist.X.toarray()[100:150])

Let's prepare data: the `normalize_per_cell` is a obligatory condition for using the celltypist.

* **False WARNING**. The reason is the presence of logp1 in uns.
* We don't use scaling because the celltypist does it automatically.

In [None]:
sc.pp.normalize_per_cell(WT_COMBINED_CellTypist, counts_per_cell_after=1e4)
sc.pp.log1p(WT_COMBINED_CellTypist)

To predict cell types and save result in `.obs` attribute.
* conf_score == confidence score
* majority_voting == cell type

In [None]:
predictions = celltypist.annotate(WT_COMBINED_CellTypist, model=mmouse, majority_voting=True)
predictions_adata = predictions.to_adata()
WT_COMBINED.obs["celltypist_cell_label_coarse"] = predictions_adata.obs['majority_voting']
WT_COMBINED.obs["celltypist_conf_score_coarse"] = predictions_adata.obs["conf_score"]

#### *Single samples

we will also perform a special processing of a object-copy for celltypist.

In [None]:
for i in range(len(WT_CELLTYPIST)):
    sc.pp.normalize_per_cell(WT_CELLTYPIST[i], counts_per_cell_after=1e4)
    sc.pp.log1p(WT_CELLTYPIST[i])
    #sc.pp.scale(WT_CELLTYPIST[i], max_value=10) # unnecessary. celltypist performs the scaling itself.

We should to compute a neighborhood graph of observations (We should set custom parameters). Otherwise, celltypist does it by itself.

In [None]:
for i in range(len(WT_CELLTYPIST)):
    sc.tl.pca(WT_CELLTYPIST[i], n_comps=40, random_state=110701)
    sc.pp.neighbors(WT_CELLTYPIST[i], metric='cosine', n_pcs=40, n_neighbors=18, random_state=110701)

Predits cell types. let's add prediction to the WT_SAMPLE anndata objects.

In [None]:
for i in range(len(WT_CELLTYPIST)):
    print(WT_CELLTYPIST[i].uns['sample_name'])
    
    predictions = celltypist.annotate(WT_CELLTYPIST[i], model=mmouse, majority_voting=True)
    predictions_adata = predictions.to_adata()
    WT_SAMPELS[i].obs["celltypist_cell_label_coarse"] = predictions_adata.obs['majority_voting']
    WT_SAMPELS[i].obs["celltypist_conf_score_coarse"] = predictions_adata.obs["conf_score"]
    
    print('#'*10, end='\n')

# RAM opt

In [None]:
del(WT_COMBINED_CellTypist)
gc.collect()

In [None]:
del WT_COMBINED.layers['counts']
gc.collect()

In [None]:
"""
import time
t0 = time.time()
fn = './proc_data/WT_COMBINED.h5ad'
WT_COMBINED.write_h5ad(fn, compression='gzip')
print('%.1f seconds passed'%(time.time()-t0))
"""

# PART2: Trem2 -/-

## Importing mouse samples (Trem2-)

* There are 4 time points:
    * Steady state (SS)       --     H005
    * 3 days later (3DL)      --     H006
    * 7 days later (7DL)      --     H008

T SS

In [None]:
T_SS = sc.read_10x_mtx(
    './GSE190856_RAW/H005/',  
    var_names='gene_ids',      
    cache=True)

T_SS.var_names_make_unique()

T_SS

T 3DL

In [None]:
T_3DL = sc.read_10x_mtx(
    './GSE190856_RAW/H006/',  
    var_names='gene_ids',      
    cache=True)

T_3DL.var_names_make_unique()

T_3DL

T 7DL

In [None]:
T_7DL = sc.read_10x_mtx(
    './GSE190856_RAW/H008/',  
    var_names='gene_ids',      
    cache=True)

T_7DL.var_names_make_unique()

T_7DL

Let's combine all 3 objects into one list:

In [None]:
T_SAMPELS = [T_SS, T_3DL, T_7DL]

Let's add the names of the samples and number of cells to the anndata objects.

In [None]:
T_NAMES = ['Trem2 -/- steady state (H005)','Trem2 -/- 3 days later (H006)',
            'Trem2 -/- 7 days later (H008)']

T_VARIABLES= ['T_SS', 'T_3DL', 'T_7DL']

for obj, name, var in zip(T_SAMPELS, T_NAMES, T_VARIABLES):
    obj.uns['sample_name'] = name
    obj.uns['var_name'] = var
    obj.uns['cell_count'] = (obj.obs).shape[0]
    obj.uns['gene_count'] = (obj.var).shape[0]

obj.uns

## Let's filter the data

We will first calculate the quality metrics for each of the samples.

In [None]:
for sample in T_SAMPELS:
    sample.var['mt'] = sample.var['gene_symbols'].str.startswith('mt-')
    sc.pp.calculate_qc_metrics(sample, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

In [None]:
T_SS.obs

### Trem2 samples before filtering: violinplts whith `total_counts`, `n_genes_by_counts` and `pct_counts_mt`.

In [None]:
matplotlib.pyplot.bar(T_VARIABLES, [sample.uns['cell_count'] for sample in T_SAMPELS],
                      color=['blue', 'red', 'green'], alpha=0.5)

matplotlib.pyplot.grid(lw=0.4, ls='--', alpha=0.4, axis='y')
matplotlib.pyplot.minorticks_on()

matplotlib.pyplot.title('Samples')
matplotlib.pyplot.xlabel('Samples')
matplotlib.pyplot.ylabel('number of cells')

for y,x in zip([sample.uns['cell_count'] for sample in T_SAMPELS], T_VARIABLES):
    matplotlib.pyplot.text(x, y-500, str(y), ha='center')

In [None]:
for sample in T_SAMPELS:
    build_violin(sample)

In [None]:
build_scatter(y='pct_counts_mt', cols=3, ytline_coords=[20], samples=T_SAMPELS)

In [None]:
build_scatter(y='n_genes_by_counts', cols=3, ytline_coords=[6_000, 200], samples=T_SAMPELS)

### filtering Trem2 -/- samples

In [None]:
for i, sample in enumerate(T_SAMPELS):
    
    # cells expressing less than 200 and more than 6_000 genes are removed
    sc.pp.filter_cells(sample, min_genes=200)
    sc.pp.filter_cells(sample, max_genes=6_000)

    # cells containing more than 40_000 counts are removed
    sc.pp.filter_cells(sample, max_counts=40_000)
    
    # genes expressed in less than one cell are removed
    sc.pp.filter_genes(sample, min_cells=1)
    
    # filtering cells by mt-content
    T_SAMPELS[i] = T_SAMPELS[i][T_SAMPELS[i].obs.pct_counts_mt < 20, :] 

    # to except mt- genes
    T_SAMPELS[i] = T_SAMPELS[i][:, T_SAMPELS[i].var.mt == False]

    # genes / celss
    T_SAMPELS[i].uns['filtered_cell_count'] = (T_SAMPELS[i].obs).shape[0]
    T_SAMPELS[i].uns['filtered_gene_count'] = (T_SAMPELS[i].var).shape[0]

In [None]:
matplotlib.pyplot.bar(T_VARIABLES, [sample.uns['filtered_cell_count'] for sample in T_SAMPELS],
                      color=['blue', 'red', 'green'], alpha=0.5)

matplotlib.pyplot.grid(lw=0.4, ls='--', alpha=0.4, axis='y')
matplotlib.pyplot.minorticks_on()

matplotlib.pyplot.title('Filtred samples')
matplotlib.pyplot.xlabel('Samples')
matplotlib.pyplot.ylabel('number of cells')

for y,x in zip([sample.uns['filtered_cell_count'] for sample in T_SAMPELS], T_VARIABLES):
    matplotlib.pyplot.text(x, y-500, str(y), ha='center')

In [None]:
matplotlib.pyplot.bar(T_VARIABLES, [sample.uns['filtered_gene_count'] for sample in T_SAMPELS],
                      color=['blue', 'red', 'green'], alpha=0.5)

matplotlib.pyplot.grid(lw=0.4, ls='--', alpha=0.4, axis='y')
matplotlib.pyplot.minorticks_on()

matplotlib.pyplot.title('Filtred samples')
matplotlib.pyplot.xlabel('Samples')
matplotlib.pyplot.ylabel('number of genes')

for y,x in zip([sample.uns['filtered_gene_count'] for sample in T_SAMPELS], T_VARIABLES):
    matplotlib.pyplot.text(x, y-1500, str(y), ha='center')

In [None]:
for sample in T_SAMPELS:
    build_violin(sample, threshold_line=False)

In [None]:
build_scatter(y='pct_counts_mt', cols=3, threshold_line=False, samples=T_SAMPELS)

In [None]:
build_scatter(y='n_genes_by_counts', cols=3, threshold_line=False, samples=T_SAMPELS)

## Let's perform preliminary data processing

### Preparing combined data for plot UMAP visualisation

We want to get a good UMAP visualization, so it is advisable to combine the samples.
* We will use `scvi` tool.

In [None]:
T_COMBINED =  T_SAMPELS[0].concatenate(T_SAMPELS[1], T_SAMPELS[2], batch_key="Sample")

In [None]:
T_COMBINED

The combined object consists of 24936 cells.

`scvi` needs raw counts, let's save them on the new layer.

In [None]:
T_COMBINED.layers['counts'] = T_COMBINED.X.copy()

### normalization, log-transform, scalling and simple annotate the samples

We gonna use celltypist, let's create objects for this.

In [None]:
T_CELLTYPIST = []
for i in range(len(T_SAMPELS)):
    T_CELLTYPIST.append(T_SAMPELS[i].copy())

Standard data transformation.

In [None]:
for i in range(len(T_SAMPELS)):
    sc.pp.normalize_total(T_SAMPELS[i], target_sum=1e4)
    sc.pp.log1p(T_SAMPELS[i])
    sc.pp.scale(T_SAMPELS[i], max_value=10)

#### scVI transformation

... and for combined object:

In [None]:
sc.pp.normalize_total(T_COMBINED, target_sum=1e4)
sc.pp.log1p(T_COMBINED)

In [None]:
scvi.model.SCVI.setup_anndata(T_COMBINED, layer='counts', 
                             categorical_covariate_keys=['Sample'],
                             continuous_covariate_keys=['pct_counts_mt', 'total_counts'])

In [None]:
model = scvi.model.SCVI(T_COMBINED)

In [None]:
model.train()

In [None]:
latent_T = model.get_latent_representation()

In [None]:
T_COMBINED.obsm['X_scVI'] = latent_T

In [None]:
T_COMBINED.layers['scVI_normalized'] = model.get_normalized_expression(library_size=1e4)

In [None]:
sc.pp.scale(T_COMBINED, max_value=10)

In [None]:
sc.pp.neighbors(T_COMBINED, use_rep='X_scVI', metric='cosine', n_neighbors=18)
sc.tl.umap(T_COMBINED, min_dist=0.3)

In [None]:
sc.tl.leiden(T_COMBINED, resolution=0.5)

#### To prepare the celltypist annotation

In [None]:
T_COMBINED_CellTypist = T_COMBINED.copy()
T_COMBINED_CellTypist.X = T_COMBINED.layers["counts"]

In [None]:
sc.pp.normalize_per_cell(T_COMBINED_CellTypist, counts_per_cell_after=1e4)
sc.pp.log1p(T_COMBINED_CellTypist)

In [None]:
predictions = celltypist.annotate(T_COMBINED_CellTypist, model=mmouse, majority_voting=True)
predictions_adata = predictions.to_adata()
T_COMBINED.obs["celltypist_cell_label_coarse"] = predictions_adata.obs['majority_voting']
T_COMBINED.obs["celltypist_conf_score_coarse"] = predictions_adata.obs["conf_score"]

# PART 3: pair analisys Trem2 (-/-) & WT lc

## Data import

In [None]:
Trem2_KO_SS = sc.read_10x_mtx(
    './GSE190856_RAW/H005/',       
    var_names='gene_ids',      
    cache=True)

Trem2_KO_SS.var_names_make_unique()

In [None]:
Trem2_KO_3DL = sc.read_10x_mtx(
    './GSE190856_RAW/H006/',  
    var_names='gene_ids',      
    cache=True)

Trem2_KO_3DL.var_names_make_unique()

In [None]:
Trem2_KO_7DL = sc.read_10x_mtx(
    './GSE190856_RAW/H008/',  
    var_names='gene_ids',      
    cache=True)

Trem2_KO_7DL.var_names_make_unique()

In [None]:
WT_LC_SS = sc.read_10x_mtx(
    './GSE190856_RAW/H001/',  
    var_names='gene_ids',      
    cache=True)

WT_LC_SS.var_names_make_unique()

In [None]:
WT_LC_3DL = sc.read_10x_mtx(
    './GSE190856_RAW/H002/',  
    var_names='gene_ids',      
    cache=True)

WT_LC_3DL.var_names_make_unique()

In [None]:
WT_LC_7DL = sc.read_10x_mtx(
    './GSE190856_RAW/H011/',  
    var_names='gene_ids',      
    cache=True)

WT_LC_7DL.var_names_make_unique()

Add metadata

In [None]:
PAIR_SAMPELS = [Trem2_KO_SS, Trem2_KO_3DL, Trem2_KO_7DL,
                WT_LC_SS, WT_LC_3DL, WT_LC_7DL]

P_NAMES = ['Trem2 -/- steady state (H005)','Trem2 -/- 3 days later (H006)',
           'Trem2 -/- 7 days later (H008)', 'WT littermate control\nsteady state (H001)',
           'WT littermate control\n3 days later (H002)', 'WT littermate control\n7 days later (H011)']

P_VARIABLES= ['Trem2_KO_SS', 'Trem2_KO_3DL', 'Trem2_KO_7DL',
              'WT_LC_SS', 'WT_LC_3DL', 'WT_LC_7DL']

for obj, name, var in zip(PAIR_SAMPELS, P_NAMES, P_VARIABLES):
    obj.uns['sample_name'] = name
    obj.uns['var_name'] = var
    obj.uns['cell_count'] = (obj.obs).shape[0]
    obj.uns['gene_count'] = (obj.var).shape[0]

obj.uns

## Let's filter the data

In [None]:
for sample in PAIR_SAMPELS:
    sample.var['mt'] = sample.var['gene_symbols'].str.startswith('mt-')
    sc.pp.calculate_qc_metrics(sample, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

### WT samples before filtering: violinplts whith `total_counts`, `n_genes_by_counts` and `pct_counts_mt`.

In [None]:
matplotlib.pyplot.bar(P_VARIABLES, [sample.uns['cell_count'] for sample in PAIR_SAMPELS],
                      color=['blue', 'red', 'green'], alpha=0.5)

matplotlib.pyplot.grid(lw=0.4, ls='--', alpha=0.4, axis='y')
matplotlib.pyplot.minorticks_on()

matplotlib.pyplot.title('Samples')
matplotlib.pyplot.xlabel('Samples')
matplotlib.pyplot.ylabel('number of cells')

for y,x in zip([sample.uns['cell_count'] for sample in PAIR_SAMPELS], P_VARIABLES):
    matplotlib.pyplot.text(x, y-500, str(y), ha='center')

plt.xticks(rotation=90)

In [None]:
for sample in PAIR_SAMPELS:
    build_violin(sample)

In [None]:
build_scatter(y='pct_counts_mt', cols=3, ytline_coords=[20], samples=PAIR_SAMPELS[0:3])

In [None]:
build_scatter(y='pct_counts_mt', cols=3, ytline_coords=[20], samples=PAIR_SAMPELS[3:6])

In [None]:
build_scatter(y='n_genes_by_counts', cols=3, ytline_coords=[6_000, 200], samples=PAIR_SAMPELS[:3])

In [None]:
build_scatter(y='n_genes_by_counts', cols=3, ytline_coords=[6_000, 200], samples=PAIR_SAMPELS[3:])

### filtering samples

In [None]:
for i, sample in enumerate(PAIR_SAMPELS):
    
    # cells expressing less than 200 and more than 6_000 genes are removed
    sc.pp.filter_cells(sample, min_genes=200)
    sc.pp.filter_cells(sample, max_genes=6_000)

    # cells containing more than 40_000 counts are removed
    sc.pp.filter_cells(sample, max_counts=40_000)
    
    # genes expressed in less than one cell are removed
    sc.pp.filter_genes(sample, min_cells=1)
    
    # filtering cells by mt-content
    PAIR_SAMPELS[i] = PAIR_SAMPELS[i][PAIR_SAMPELS[i].obs.pct_counts_mt < 20, :] 

    # to except mt- genes
    PAIR_SAMPELS[i] = PAIR_SAMPELS[i][:, PAIR_SAMPELS[i].var.mt == False]

    # genes / celss
    PAIR_SAMPELS[i].uns['filtered_cell_count'] = (PAIR_SAMPELS[i].obs).shape[0]
    PAIR_SAMPELS[i].uns['filtered_gene_count'] = (PAIR_SAMPELS[i].var).shape[0]

In [None]:
matplotlib.pyplot.bar(P_VARIABLES, [sample.uns['filtered_cell_count'] for sample in PAIR_SAMPELS],
                      color=['blue', 'red', 'green'], alpha=0.5)

matplotlib.pyplot.grid(lw=0.4, ls='--', alpha=0.4, axis='y')
matplotlib.pyplot.minorticks_on()

matplotlib.pyplot.title('Filtred samples')
matplotlib.pyplot.xlabel('Samples')
matplotlib.pyplot.ylabel('number of cells')

for y,x in zip([sample.uns['filtered_cell_count'] for sample in PAIR_SAMPELS], P_VARIABLES):
    matplotlib.pyplot.text(x, y-500, str(y), ha='center')

plt.xticks(rotation=90)

In [None]:
matplotlib.pyplot.bar(P_VARIABLES, [sample.uns['filtered_gene_count'] for sample in PAIR_SAMPELS],
                      color=['blue', 'red', 'green'], alpha=0.5)

matplotlib.pyplot.grid(lw=0.4, ls='--', alpha=0.4, axis='y')
matplotlib.pyplot.minorticks_on()

matplotlib.pyplot.title('Filtred samples')
matplotlib.pyplot.xlabel('Samples')
matplotlib.pyplot.ylabel('number of genes')

for y,x in zip([sample.uns['filtered_gene_count'] for sample in PAIR_SAMPELS], P_VARIABLES):
    matplotlib.pyplot.text(x, y-1500, str(y), ha='center')

plt.xticks(rotation=90)

In [None]:
for sample in PAIR_SAMPELS:
    build_violin(sample, threshold_line=False)

In [None]:
build_scatter(y='pct_counts_mt', cols=3, ytline_coords=[20], samples=PAIR_SAMPELS[0:3], threshold_line=False)

In [None]:
build_scatter(y='pct_counts_mt', cols=3, ytline_coords=[20], samples=PAIR_SAMPELS[3:], threshold_line=False)

In [None]:
build_scatter(y='n_genes_by_counts', cols=3, samples=PAIR_SAMPELS[:3], threshold_line=False)

In [None]:
build_scatter(y='n_genes_by_counts', cols=3, samples=PAIR_SAMPELS[3:], threshold_line=False)

## Let's perform preliminary data processing

### Preparing combined data for plot UMAP visualisation

We want to get a good UMAP visualization, so it is advisable to combine the samples.
* We will use `scvi` tool.

In [None]:
PAIR_COMBINED =  PAIR_SAMPELS[0].concatenate(*PAIR_SAMPELS[1:], batch_key="Sample")
PAIR_COMBINED

`scvi` needs raw counts, let's save them on the new layer.

In [None]:
PAIR_COMBINED.layers['counts'] = PAIR_COMBINED.X.copy()

### normalization, log-transform, scalling and simple annotate the samples

We gonna use celltypist, let's create objects for this.

In [None]:
PAIR_CELLTYPIST = []
for i in range(len(PAIR_SAMPELS)):
    PAIR_CELLTYPIST.append(PAIR_SAMPELS[i].copy())

Standard data transformation.

In [None]:
for i in range(len(PAIR_SAMPELS)):
    sc.pp.normalize_total(PAIR_SAMPELS[i], target_sum=1e4)
    sc.pp.log1p(PAIR_SAMPELS[i])
    sc.pp.scale(PAIR_SAMPELS[i], max_value=10)

#### scVI transformation

... and for combined object:

In [None]:
sc.pp.normalize_total(PAIR_COMBINED, target_sum=1e4)
sc.pp.log1p(PAIR_COMBINED)

In [None]:
scvi.model.SCVI.setup_anndata(PAIR_COMBINED, layer='counts', 
                             categorical_covariate_keys=['Sample'],
                             continuous_covariate_keys=['pct_counts_mt', 'total_counts'])

In [None]:
model = scvi.model.SCVI(PAIR_COMBINED)

In [None]:
model.train()

In [None]:
latent_PAIR = model.get_latent_representation()

In [None]:
PAIR_COMBINED.obsm['X_scVI'] = latent_PAIR

In [None]:
PAIR_COMBINED.layers['scVI_normalized'] = model.get_normalized_expression(library_size=1e4)

In [None]:
sc.pp.scale(PAIR_COMBINED, max_value=10)

In [None]:
sc.pp.neighbors(PAIR_COMBINED, use_rep='X_scVI', metric='cosine', n_neighbors=18)
sc.tl.umap(PAIR_COMBINED, min_dist=0.3)

In [None]:
sc.tl.leiden(PAIR_COMBINED, resolution=0.5)

#### To prepare the celltypist annotation

In [None]:
PAIR_COMBINED_CellTypist = PAIR_COMBINED.copy()
PAIR_COMBINED_CellTypist.X = PAIR_COMBINED.layers["counts"]

In [None]:
sc.pp.normalize_per_cell(PAIR_COMBINED_CellTypist, counts_per_cell_after=1e4)
sc.pp.log1p(PAIR_COMBINED_CellTypist)

In [None]:
predictions = celltypist.annotate(PAIR_COMBINED_CellTypist, model=mmouse, majority_voting=True)
predictions_adata = predictions.to_adata()
PAIR_COMBINED.obs["celltypist_cell_label_coarse"] = predictions_adata.obs['majority_voting']
PAIR_COMBINED.obs["celltypist_conf_score_coarse"] = predictions_adata.obs["conf_score"]

In [None]:
import time
t0 = time.time()
fn = './proc_data/PAIR_COMBINED.h5ad'
PAIR_COMBINED.write_h5ad(fn, compression='gzip')
print('%.1f seconds passed'%(time.time()-t0))

In [None]:
del(PAIR_COMBINED_CellTypist)
gc.collect()

In [None]:
del PAIR_COMBINED.layers['counts']
gc.collect()

# Dimension reduction, clustering and visualization for all samples

Synchronize the colors of different samples groups

In [None]:
T_CellTypes_pred_set = set(['Ccr7 DC', 'Cd206 Mac', 'CD8+ T cell', 'DC (CD103+ CD11-b)', 'Fibroblast', 'ILC2', 'ILC3', 'Inflammatory Monocytes', 'Ly6c2 Mono', 'Mast cell',
           'Monocytes', 'NK cell', 'Naive B cells', 'Neutrophil', 'Plasma cell','Resting CD4+ T cells', 'Stromal cell (DN)', 'capillary Aqp7+', 'eMBC'])

WT_CellTypes_pred_set = set(['Cd206 Mac', 'DC', 'DC (CD103+ CD11-b)', 'Fibroblast', 'ILC2', 'ILC3', 
                        'Inflammatory Monocytes', 'Mast cell', 'NK cell', 'Naive B cells',
                        'Neutrophil', 'Plasma cell', 'Resting CD4+ T cells', 'capillary Aqp7+'])

PAIR_CellTypes_pred_set = set(['Ccr7 DC', 'Cd206 Mac', 'DC (CD103+ CD11-b)', 'Fibroblast', 'ILC2',
                               'ILC3', 'Inflammatory Monocytes', 'Ly6c2 Mono', 'Mast cell',
                               'Monocytes', 'NK cell', 'Naive B cells', 'Neutrophil', 'Plasma cell',
                               'Resting CD4+ T cells', 'Stromal cell (DN)', 'capillary Aqp7+', 'eMBC',
                               'lymphatic'])

s = sns.color_palette('tab20').as_hex()
s.append('#bdb76b')


ALL_CELLS_PRED = (T_CellTypes_pred_set.union(WT_CellTypes_pred_set, PAIR_CellTypes_pred_set))

In [None]:
'''
a = PAIR_COMBINED.obs.celltypist_cell_label_coarse.value_counts()
b = WT_COMBINED.obs.celltypist_cell_label_coarse.value_counts()
c = T_COMBINED.obs.celltypist_cell_label_coarse.value_counts()
'''

In [None]:
'''
f = pd.concat([a,b,c], axis=1)
f = f.sum(axis=1).sort_values(ascending=False)
ALL_CELLS_PRED = f
ALL_CELLS_PRED
'''

In [None]:
#ALL_CELLS_PRED.index

In [None]:
ALL_CELLS_PRED = ['Cd206 Mac', 'capillary Aqp7+', 'Inflammatory Monocytes', 'Neutrophil',
                'Fibroblast', 'Naive B cells', 'NK cell', 'Ly6c2 Mono',
                'DC (CD103+ CD11-b)', 'Resting CD4+ T cells', 'ILC2', 'Monocytes',
                'eMBC', 'Mast cell', 'Stromal cell (DN)', 'ILC3', 'DC', 'CD8+ T cell',
                'lymphatic', 'Ccr7 DC', 'Plasma cell']
    
    

In [None]:
CELL_COLOR_DICT = {cell:hex for cell, hex in zip(ALL_CELLS_PRED, s[0:len(ALL_CELLS_PRED)])}

In [None]:
CELL_COLOR_DICT

## WT

In [None]:
WT_COMBINED = sc.read_h5ad('./proc_data/WT_COMBINED.h5ad')

In [None]:
'''
sc.pp.neighbors(WT_COMBINED, metric='cosine', n_neighbors=18, use_rep='X_scVI')
sc.tl.umap(WT_COMBINED, min_dist=0.3)
'''

In [None]:
new_colors=[]
for key in WT_COMBINED.obs.celltypist_cell_label_coarse.cat.categories:
    new_colors.append(CELL_COLOR_DICT[key])
    
WT_COMBINED.uns['celltypist_cell_label_coarse_colors']=new_colors

#### Visualization of a single samples (hide)

If you do not combine the samples, the UMAPs will turn out to be too different and it will be difficult to perceive them.

In [None]:
for i in range(len(WT_SAMPELS)):
    
    sc.tl.pca(WT_SAMPELS[i], n_comps=40, random_state=110701)
    sc.pp.neighbors(WT_SAMPELS[i], metric='cosine', n_pcs=40, random_state=110701, n_neighbors=18)
    sc.tl.umap(WT_SAMPELS[i], min_dist=0.5, random_state=110701)
    
    sc.pl.umap(
    WT_SAMPELS[i],
    color=["celltypist_cell_label_coarse", names_dict['Zbp1']],
    size=20,
    frameon=False,
    sort_order=False,
    wspace=1)

#### custom cmap

In [None]:
pink = np.array([248/256, 24/256, 148/256, 1])
hex_pallete2 = sns.light_palette('red', as_cmap=True).
hex_pallete2

In [None]:
from matplotlib.colors import LinearSegmentedColormap

cm = LinearSegmentedColormap.from_list(
        'SeuratLike', 
        [*[(0.83, 0.83, 0.83, 0.5) for i in range(0,5)],
         *[(1, 0.75, 0.75, 0.6) for i in range(0,33)],
         *[(1, 0.6, 0.6, 0.7) for i in range(0,38)],
         *[(1, 0.4, 0.4, 0.7) for i in range(0,38)],
         *[(1, 0.3, 0.3, 0.8) for i in range(0,38)],
         *[(1, 0.2, 0.2, 0.8) for i in range(0,38)],
         *[(1, 0.15, 0.15, 0.9) for i in range(0,38)],
         *[(0.55, 0, 0, 1) for i in range(0,19)]],
        N = 25)
#*[(1, 0, 0, 1) for i in range(0,250)]], 
cm

In [None]:
from matplotlib.colors import LinearSegmentedColormap

cm = LinearSegmentedColormap.from_list(
        'SeuratLike', 
        [(0.83, 0.83, 0.83, 0.5),
         (1, 0.75, 0.75, 0.6),
         (1, 0.6, 0.6, 0.7),
         (1, 0.4, 0.4, 0.7),
         (1, 0.3, 0.3, 0.8),
         (1, 0.2, 0.2, 0.8),
         (1, 0.15, 0.15, 0.9),
         (0.55, 0, 0, 1)],
        N = 7)
#*[(1, 0, 0, 1) for i in range(0,250)]], 
cm

In [None]:
hex_pallete = sns.light_palette('red').as_hex()
hex_pallete[0] = "#A5A5A5"
hex_pallete[-1] = "#8B0000"

In [None]:
sns.color_palette(hex_pallete, as_cmap=True)

In [None]:
sns.blend_palette(['#A5A5A5','red'], n_colors=6)

In [None]:
sns.blend_palette(['#A5A5A5','#8B0000'])

In [None]:
sns.color_palette(hex_pallete)

In [None]:
sc.pl.umap(WT_COMBINED[WT_COMBINED.obs.Sample == '2'],
           color=[names_dict['Zbp1']], title='',
           frameon=False, size=30, cmap=cm, vmin=0, vmax=6.8, show=False)

In [None]:
sc.pl.umap(WT_COMBINED[WT_COMBINED.obs.Sample == '2'],
           color=[names_dict['Zbp1']], title='',
           frameon=False, size=30, cmap=sns.blend_palette(['#d3d3d3','red'], as_cmap=True),vmin=0, vmax=6.8, show=False)

In [None]:
sc.pl.umap(WT_COMBINED[WT_COMBINED.obs.Sample == '2'],
           color=[names_dict['Zbp1']], title='',
           frameon=False, size=30, cmap=sns.blend_palette(['#d3d3d3','red'], as_cmap=True),vmin=0, vmax=6.8, show=False)

#### WT-SAMPLES UMAPs: results

In [None]:
fig, axes = plt.subplots(1,2,figsize=(20,10))
plt.subplots_adjust(wspace=0)

sc.pl.umap(WT_COMBINED, color=["celltypist_cell_label_coarse"], ax=axes[1],
           show=False, size=12, frameon=False)

axes[1].set_title('Aggregated datasets:\nCelltypist automatic annotation', fontsize=14)

sc.pl.umap(WT_COMBINED, color=["celltypist_conf_score_coarse"], ax=axes[0],
           size=12, frameon=False) 

axes[0].set_title('Aggregated datasets:\nCelltypist confidence score', fontsize=14)
fig.savefig('WT_agg.pdf', format='pdf', bbox_inches='tight')

In [None]:
# 30 30
# axes[ctype][1].legend_.set_bbox_to_anchor((x,y))

fig, axes = plot_custom_UMAP(anndata=WT_COMBINED, samples=[0,1,2,3],
                 ylabsize=10, xlabsize=10, figsize=(10, 10),
                 size=10, vmax=10, titlesize=10,
                 annatation_cols=5, an_x=4.5, an_y=1.6,
                 ann_fontsize=10, marker_size=10,
                 
                 ylab = ['Cell Types',
                         f'Zbp1\n({names_dict["Zbp1"]})',
                         f'Ripk3\n({names_dict["Ripk3"]})',
                         f'Mlkl\n({names_dict["Mlkl"]})'],

                 title =[WT_METADATA[0]['sample_name'],
                         WT_METADATA[1]['sample_name'],
                         WT_METADATA[2]['sample_name'],
                         WT_METADATA[3]['sample_name']],
                 
                 xlab = [f"{WT_METADATA[0]['filtered_cell_count']} cells",
                         f"{WT_METADATA[1]['filtered_cell_count']} cells",
                         f"{WT_METADATA[2]['filtered_cell_count']} cells",
                         f"{WT_METADATA[3]['filtered_cell_count']} cells"],
                 
                 color=['celltypist_cell_label_coarse',
                       'Zbp1', 'Ripk3', 'Mlkl'])

fig.savefig('WT_Zbp1_Ripk3_Mlkl.pdf', format='pdf')

In [None]:
del WT_COMBINED
gc.collect()

---------------

## Trem2 (-/-)

In [None]:
new_colors=[]
for key in T_COMBINED.obs.celltypist_cell_label_coarse.cat.categories:
    new_colors.append(CELL_COLOR_DICT[key])
T_COMBINED.uns['celltypist_cell_label_coarse_colors']=new_colors

In [None]:
fig, axes = plt.subplots(1,2,figsize=(20,10))
plt.subplots_adjust(wspace=0)

sc.pl.umap(T_COMBINED, color=["celltypist_cell_label_coarse"], ax=axes[1],
           show=False, size=12, frameon=False)

axes[1].set_title('Aggregated datasets Trem2 -/-:\nCelltypist automatic annotation', fontsize=14)

sc.pl.umap(T_COMBINED, color=["celltypist_conf_score_coarse"], ax=axes[0],
           size=12, frameon=False) 

axes[0].set_title('Aggregated datasets Trem2 -/-:\nCelltypist confidence score', fontsize=14)
fig.savefig('T_agg.pdf', format='pdf', bbox_inches='tight')

In [None]:
fig, axes = plt.subplots(2, 3, figsize=(30,15), layout=None)
plt.subplots_adjust(wspace=0) # расстояние между осями

ctype=0
zbp1=1
vmax=6.5

cmap='rocket_r' #Reds

s = 30

x = 2
y = 1.3

sc.pl.umap(T_COMBINED[T_COMBINED.obs.Sample == '0'],
           color=[names_dict['Zbp1']], cmap=cmap,
           frameon=False, size=s, title='', vmax=vmax,
           ax=axes[zbp1][0], show=False)


"<AXES>"
axes[zbp1][0].axis('on')
axes[zbp1][0].tick_params(top='off', bottom='off', left='off', right='off',
               labelleft='on', labelbottom='off')

axes[zbp1][0].set_ylabel('Zbp1 (ENSMUSG00000027514)', rotation=90, fontsize=20)
axes[zbp1][0].set_xlabel('')
axes[zbp1][0].set(frame_on=False)
"<\AXES>"


sc.pl.umap(T_COMBINED[T_COMBINED.obs.Sample == '0'],
           color=['celltypist_cell_label_coarse'],
           frameon=False, size=s,
           ax=axes[ctype][0], show=False, legend_loc=None)

"<AXES>"
axes[ctype][0].axis('on')
axes[ctype][0].tick_params(top='off', bottom='off', left='off', right='off',
               labelleft='on', labelbottom='off')

axes[ctype][0].set_ylabel('')
axes[ctype][0].set_xlabel(f'{T_SAMPELS[0].uns["filtered_cell_count"]} cells', fontsize=14)
axes[ctype][0].set(frame_on=False)
"<\AXES>"


"<TITLE>"
axes[ctype][0].set_title(f"{T_SS.uns['sample_name']}", fontsize = 20)
"<\TITLE>"

####################################################

sc.pl.umap(T_COMBINED[T_COMBINED.obs.Sample == '1'],
           color=[names_dict['Zbp1']], title='',
           frameon=False, size=s, cmap=cmap, vmax=vmax,
          ax=axes[zbp1][1], show=False)

sc.pl.umap(T_COMBINED[T_COMBINED.obs.Sample == '1'],
           color=['celltypist_cell_label_coarse'],
           frameon=False, size=s, cmap=cmap,
          ax=axes[ctype][1], show=False)

"<TITLE>"
axes[ctype][1].set_title(f"{T_3DL.uns['sample_name']}", fontsize = 20)
"<\TITLE>"

setup_ax(axes[ctype][1], T_COMBINED, cols=9)
axes[ctype][1].legend_.set_bbox_to_anchor((x,y))

"<AXES>"
axes[ctype][1].axis('on')
axes[ctype][1].tick_params(top='off', bottom='off', left='off', right='off',
               labelleft='on', labelbottom='off')

axes[ctype][1].set_ylabel('')
axes[ctype][1].set_xlabel(f'{T_SAMPELS[1].uns["filtered_cell_count"]} cells', fontsize=14)
axes[ctype][1].set(frame_on=False)
"<\AXES>"

####################################################

sc.pl.umap(T_COMBINED[T_COMBINED.obs.Sample == '2'],
           color=[names_dict['Zbp1']], title='',
           frameon=False, size=s, cmap=cmap, vmax=vmax,
          ax=axes[zbp1][2], show=False)

sc.pl.umap(T_COMBINED[T_COMBINED.obs.Sample == '2'],
           color=['celltypist_cell_label_coarse'],
           frameon=False, size=s,
          ax=axes[ctype][2], show=False, legend_loc=None)

"<TITLE>"
axes[ctype][2].set_title(f"{T_7DL.uns['sample_name']}", fontsize = 20)
"<\TITLE>"

"<AXES>"
axes[ctype][2].axis('on')
axes[ctype][2].tick_params(top='off', bottom='off', left='off', right='off',
               labelleft='on', labelbottom='off')

axes[ctype][2].set_ylabel('')
axes[ctype][2].set_xlabel(f'{T_SAMPELS[2].uns["filtered_cell_count"]} cells', fontsize=14)
axes[ctype][2].set(frame_on=False)
"<\AXES>"

fig.savefig('T.pdf', format='pdf')

## PAIR: KO & WT_LC

In [None]:
PAIR_COMBINED = sc.read_h5ad('./proc_data/PAIR_COMBINED.h5ad')

In [None]:
new_colors=[]

for key in PAIR_COMBINED.obs.celltypist_cell_label_coarse.cat.categories:
    new_colors.append(CELL_COLOR_DICT[key])

PAIR_COMBINED.uns['celltypist_cell_label_coarse_colors']=new_colors

In [None]:
fig, axes = plt.subplots(1,2,figsize=(20,10))
plt.subplots_adjust(wspace=0)

sc.pl.umap(PAIR_COMBINED, color=["celltypist_cell_label_coarse"], ax=axes[1],
           show=False, size=12, frameon=False)

axes[1].set_title('Aggregated datasets Trem2 KO & WT littermate control:\nCelltypist automatic annotation', fontsize=14)

sc.pl.umap(PAIR_COMBINED, color=["celltypist_conf_score_coarse"], ax=axes[0],
           size=12, frameon=False) 

axes[0].set_title('Aggregated datasets Trem2 KO & WT littermate control:\nCelltypist confidence score', fontsize=14)
fig.savefig('PAIR_agg.pdf', format='pdf', bbox_inches='tight')

#### ZBP1-dep necroptosis

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                    ylabsize=14, xlabsize=11, figsize=(15, 10), add_numbers=True,
                    size=10, vmax=10, titlesize=12, ncols=6,
                    annatation_cols=5, an_x=6, an_y=1.8, loc_legend='right margin',
                    ann_fontsize=10, marker_size=10,
                 
                    ylab = ['Cell Types',
                            f'Zbp1',
                            f'Ripk3',
                            f'Mlkl'],

                    title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                    xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                    color=['celltypist_cell_label_coarse',
                       'Zbp1', 'Ripk3', 'Mlkl'])


# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/Zbp1-dep.pdf', bbox_inches='tight', format='pdf')
fig.savefig('./tmp/Zbp1-dep.png', bbox_inches='tight', format='png')
display(Image('./tmp/Zbp1-dep.png'))

#### external apoptosis

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                 ylabsize=14, xlabsize=11, figsize=(15, 20), 
                 add_numbers=True, loc_legend='right margin',
                 size=10, vmax=10, titlesize=12, ncols=6, nrows=8,
                 annatation_cols=1, an_x=7.6, an_y=1.35,
                 ann_fontsize=10, marker_size=10, 
                 
                 ylab = ['Cell Types',
                         f'Irak3',
                         f'Pik3r5',
                         f'Fas',
                         f'Fadd',
                         f'Casp8',
                         f'Diablo',
                         f'Tradd'
                        ],

                 title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                 xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                 color=['celltypist_cell_label_coarse',
                        'Irak3', 'Pik3r5', 'Fas', 'Fadd',
                        'Casp8', 'Diablo', 'Tradd'])

# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/external_apoptosis.pdf', bbox_inches='tight', format='pdf')
fig.savefig('./tmp/external_apoptosis.png', bbox_inches='tight', format='png')
display(Image('./tmp/external_apoptosis.png'))

#### anti-apoptocit factors (extrinsic)

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                 ylabsize=14, xlabsize=11, figsize=(15, 15), 
                 add_numbers=True,  loc_legend='right margin',
                 size=10, vmax=10, titlesize=12, ncols=6, nrows=5,
                 annatation_cols=1, an_x=7.5, an_y=1.3,
                 ann_fontsize=10, marker_size=10, 
                 
                 ylab = ['Cell Types',
                         f'Xiap',
                         f'Cflar',
                         f'Birc2',
                         f'Birc3'],

                 title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                 xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                 color=['celltypist_cell_label_coarse',
                        'Xiap', 'Cflar', 'Birc2', 'Birc3'])

# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/external_anti-apoptosis.pdf', bbox_inches='tight', format='pdf')
fig.savefig('./tmp/external_anti-apoptosis.pdf.png', bbox_inches='tight', format='png')
display(Image('./tmp/external_anti-apoptosis.pdf.png'))

#### internal apoptosis

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                 ylabsize=12, xlabsize=11, figsize=(15, 10), 
                 add_numbers=True, loc_legend='right margin',
                 size=10, vmax=10, titlesize=11, ncols=6, nrows=4,
                 annatation_cols=5, an_x=6.1, an_y=1.8,
                 ann_fontsize=10, marker_size=10, 
                 
                 ylab = ['Cell Types',
                         f'Bax',
                         f'Bak1',
                         f'Bok'],

                 title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                 xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                 color=['celltypist_cell_label_coarse',
                        'Bax', 'Bak1', 'Bok'])

# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/internal_apoptosis.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/internal_apoptosis.png', bbox_inches='tight', format='png')
#display(Image('./tmp/internal_apoptosis.png'))

#### anti-apoptocit factors (intrinsic)

**У мыша нет Bcl2a1!**

In [None]:
for i in names_dict.keys():
    if 'Bcl2' in i:
        print(i)

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                 ylabsize=14, xlabsize=11, figsize=(15, 15), 
                 add_numbers=True, loc_legend='right margin',
                 size=10, vmax=10, titlesize=11, ncols=6, nrows=6,
                 annatation_cols=1, an_x=7.5, an_y=1.3,
                 ann_fontsize=10, marker_size=10, 
                 
                 ylab = ['Cell Types',
                         f'Bcl2',
                         f'Bcl2l1',
                         f'Bcl2l2',
                         f'Bag1',
                         f'Mcl1'
                        ],

                 title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                 xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                 color=['celltypist_cell_label_coarse',
                        'Bcl2', 'Bcl2l1', 'Bcl2l2', 'Bag1', 'Mcl1'])

# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/internal_anti-apoptosis.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/internal_anti-apoptosis.png', bbox_inches='tight', format='png')
#display(Image('./tmp/internal_anti-apoptosis.png'))

### pro-survival

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                 ylabsize=14, xlabsize=11, figsize=(15, 15), 
                 add_numbers=True, loc_legend='right margin',
                 size=10, vmax=10, titlesize=11, ncols=6, nrows=6,
                 annatation_cols=1, an_x=7.5, an_y=1.3,
                 ann_fontsize=10, marker_size=10, 
                 
                 ylab = ['Cell Types',
                         f'Myd88',
                         f'Irak1',
                         f'Irak2',
                         f'Traf6',
                         f'Map3k7'
                        ],

                 title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                 xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                 color=['celltypist_cell_label_coarse',
                        'Myd88', 'Irak1', 'Irak2', 'Traf6', 'Map3k7'])

# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/pro_survival.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/pro_survival.png', bbox_inches='tight', format='png')
#display(Image('./tmp/pro_survival.png'))

### cytotoxic NK

In [None]:
names_dict['Prf1']

In [None]:
for i in names_dict.keys():
    if 'Gzm' in i:
        print(i)

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                    ylabsize=14, xlabsize=11, figsize=(15, 10), add_numbers=True,
                    size=10, vmax=10, titlesize=12, ncols=6,
                    annatation_cols=5, an_x=6, an_y=1.8, loc_legend='right margin',
                    ann_fontsize=10, marker_size=10,
                 
                    ylab = ['Cell Types',
                            f'Gzma',
                            f'Gzmb',
                            f'Prf1'],

                    title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                    xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                    color=['celltypist_cell_label_coarse',
                       'Gzma', 'Gzmb', 'Prf1'])


# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/cytotoxic_NK.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/cytotoxic_NK.png', bbox_inches='tight', format='png')
#display(Image('./tmp/cytotoxic_NK.png'))

### netotic cell death

In [None]:
names_dict['Elane'] # <-- not present in dataset

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                    ylabsize=14, xlabsize=11, figsize=(15, 5), add_numbers=True,
                    size=10, vmax=10, titlesize=12, ncols=6, nrows=2,
                    annatation_cols=5, an_x=6, an_y=1.8, loc_legend='right margin',
                    ann_fontsize=10, marker_size=10,
                 
                    ylab = ['Cell Types',
                            f'Mpo'],

                    title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                    xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                    color=['celltypist_cell_label_coarse', 'Mpo'])


# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)

fig.savefig('./report_TREM2_KO_dataset/netotic_cell_death.pdf', bbox_inches='tight', format='pdf')

### Autophagy

In [None]:
for i in names_dict.keys():
    if 'Ifna' in i:
        print(i)

`IFNA17`, `IFNA8` is not present.

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                 ylabsize=12, xlabsize=11, figsize=(15, 8), 
                 add_numbers=True, loc_legend='right margin',
                 size=10, vmax=10, titlesize=11, ncols=6, nrows=3,
                 annatation_cols=5, an_x=6.1, an_y=1.8,
                 ann_fontsize=10, marker_size=10, 
                 
                 ylab = ['Cell Types',
                         f'Atg12',
                         f'Gabarapl1'],

                 title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                 xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                 color=['celltypist_cell_label_coarse',
                        'Atg12', 'Gabarapl1'])

# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/autophagy.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/autophagy.png', bbox_inches='tight', format='png')
#display(Image('./tmp/autophagy.png'))

### Pyroptosis

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                 ylabsize=12, xlabsize=11, figsize=(15, 10), 
                 add_numbers=True, loc_legend='right margin',
                 size=10, vmax=10, titlesize=11, ncols=6, nrows=4,
                 annatation_cols=5, an_x=6.1, an_y=1.8,
                 ann_fontsize=10, marker_size=10, 
                 
                 ylab = ['Cell Types',
                         f'Gsdmd',
                         f'Casp1',
                         f'Gsdme (~Dfna5)'],

                 title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                 xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                 color=['celltypist_cell_label_coarse',
                        'Gsdmd', 'Casp1', 'Gsdme'])

# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/pyroptosis.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/pyroptosis.png', bbox_inches='tight', format='png')
#display(Image('./tmp/pyroptosis.png'))

### Entotic Cell Death

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                 ylabsize=14, xlabsize=11, figsize=(20, 25), 
                 add_numbers=True, loc_legend='right margin',
                 size=10, vmax=10, titlesize=12, ncols=6, nrows=10,
                 annatation_cols=1, an_x=7.6, an_y=1.35,
                 ann_fontsize=10, marker_size=10, 
                 
                 ylab = ['Cell Types',
                         'Cdh1',
                         'Ctnna1',
                         'Rhoa',
                         'Rock1',
                         'Rock2',
                         'Diaph1',
                         'Mal',
                         'Srf',
                         'Ezr'],

                 title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                 xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                 color=['celltypist_cell_label_coarse',
                        'Cdh1', 'Ctnna1', 'Rhoa', 'Rock1', 'Rock2',
                        'Diaph1', 'Mal', 'Srf', 'Ezr'])

# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

# to draw border betweent groups 
for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/Entotic_Cell_Death.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/Entotic_Cell_Death.png', bbox_inches='tight', format='png')
#display(Image('./tmp/Entotic_Cell_Death.png'))

### Parthanatos

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                    ylabsize=14, xlabsize=11, figsize=(15, 10), add_numbers=True,
                    size=10, vmax=10, titlesize=12, ncols=6,
                    annatation_cols=5, an_x=6, an_y=1.8, loc_legend='right margin',
                    ann_fontsize=10, marker_size=10,
                 
                    ylab = ['Cell Types',
                            f'Parp1',
                            f'Aifm1',
                            f'Mif'],

                    title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                    xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                    color=['celltypist_cell_label_coarse',
                       'Parp1', 'Aifm1', 'Mif'])


# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/Parthanatos.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/Parthanatos.png', bbox_inches='tight', format='png')
#display(Image('./tmp/Parthanatos.png'))

### Autosis

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                    ylabsize=14, xlabsize=11, figsize=(15, 10), add_numbers=True,
                    size=10, vmax=10, titlesize=12, ncols=6,
                    annatation_cols=5, an_x=6, an_y=1.8, loc_legend='right margin',
                    ann_fontsize=10, marker_size=10,
                 
                    ylab = ['Cell Types',
                            f'Ulk1',
                            f'Becn1',
                            f'Pik3r4'],

                    title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                    xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                    color=['celltypist_cell_label_coarse',
                       'Ulk1', 'Becn1', 'Pik3r4'])


# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/Autosis.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/Autosis.png', bbox_inches='tight', format='png')
#display(Image('./tmp/Autosis.png'))

### Ferroptosis

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                    ylabsize=14, xlabsize=11, figsize=(15, 5), add_numbers=True,
                    size=10, vmax=10, titlesize=12, ncols=6, nrows=2,
                    annatation_cols=5, an_x=6, an_y=1.8, loc_legend='right margin',
                    ann_fontsize=10, marker_size=10,
                 
                    ylab = ['Cell Types',
                            f'Gpx4'],

                    title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                    xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                    color=['celltypist_cell_label_coarse', 'Gpx4'])


# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)

fig.savefig('./report_TREM2_KO_dataset/Ferroptosis.pdf', bbox_inches='tight', format='pdf')

### Lysosome-dependent Cell Death

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                    ylabsize=14, xlabsize=11, figsize=(15, 10), add_numbers=True,
                    size=10, vmax=10, titlesize=12, ncols=6,
                    annatation_cols=5, an_x=6, an_y=1.8, loc_legend='right margin',
                    ann_fontsize=10, marker_size=10,
                 
                    ylab = ['Cell Types',
                            f'Ctsb',
                            f'Ctsd',
                            f'Ctsl'],

                    title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                    xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                    color=['celltypist_cell_label_coarse',
                       'Ctsb', 'Ctsd', 'Ctsl'])


# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/Lysosome_dep.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/Lysosome_dep.png', bbox_inches='tight', format='png')
#display(Image('./tmp/Lysosome_dep.png'))

### Immunogenic Cell Death

In [None]:
for i in names_dict.keys():
    if 'Ifnb' in i:
        print(i)

`Ifnb1` not present in scData

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                    ylabsize=14, xlabsize=11, figsize=(15, 10), add_numbers=True,
                    size=10, vmax=10, titlesize=12, ncols=6,
                    annatation_cols=5, an_x=6, an_y=1.8, loc_legend='right margin',
                    ann_fontsize=10, marker_size=10,
                 
                    ylab = ['Cell Types',
                            f'Calr',
                            f'Hmgb1',
                            f'Anxa1'],

                    title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                    xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                    color=['celltypist_cell_label_coarse',
                       'Calr', 'Hmgb1', 'Anxa1'])


# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/Immunogenic.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/Immunogenic.png', bbox_inches='tight', format='png')
#display(Image('./tmp/Immunogenic.png'))

### Senescence

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                 ylabsize=12, xlabsize=11, figsize=(15, 8), 
                 add_numbers=True, loc_legend='right margin',
                 size=10, vmax=10, titlesize=11, ncols=6, nrows=3,
                 annatation_cols=5, an_x=6.1, an_y=1.8,
                 ann_fontsize=10, marker_size=10, 
                 
                 ylab = ['Cell Types',
                         f'Cdkn1a',
                         f'Cdkn2a'],

                 title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                 xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                 color=['celltypist_cell_label_coarse',
                        'Cdkn1a', 'Cdkn2a'])

# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)


fig.savefig('./report_TREM2_KO_dataset/Senescence.pdf', bbox_inches='tight', format='pdf')
#fig.savefig('./tmp/Senescence.png', bbox_inches='tight', format='png')
#display(Image('./tmp/Senescence.png'))

### Proliferation

In [None]:
fig, axes, labels = plot_custom_UMAP(anndata=PAIR_COMBINED, samples=[0,1,2,3,4,5],
                    ylabsize=14, xlabsize=11, figsize=(15, 5), add_numbers=True,
                    size=10, vmax=10, titlesize=12, ncols=6, nrows=2,
                    annatation_cols=5, an_x=6, an_y=1.8, loc_legend='right margin',
                    ann_fontsize=10, marker_size=10,
                 
                    ylab = ['Cell Types',
                            f'Mki67'],

                    title =[PAIR_METADATA[0]['sample_name'],
                         PAIR_METADATA[1]['sample_name'],
                         PAIR_METADATA[2]['sample_name'],
                         PAIR_METADATA[3]['sample_name'],
                         PAIR_METADATA[4]['sample_name'],
                         PAIR_METADATA[5]['sample_name']],
                 
                    xlab = [f"{PAIR_METADATA[0]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[1]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[2]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[3]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[4]['filtered_cell_count']} cells",
                         f"{PAIR_METADATA[5]['filtered_cell_count']} cells"],
                 
                    color=['celltypist_cell_label_coarse', 'Mki67'])


# add numbers on picture

for legend_txt in labels:
    label = legend_txt.split(':')[0].rstrip()[1:]
    type = legend_txt.split(':')[1].rstrip().lstrip()

    # for automaticaly labeling
    location_cells = PAIR_COMBINED[PAIR_COMBINED.obs.celltypist_cell_label_coarse == type, :].obsm["X_umap"]
    x = location_cells[:, 0].mean()
    y = location_cells[:, 1].mean()

    # for manual labeling
    if label == '11':
        add_x = -1.5
        add_y = 0.8
    elif label == '18':
        add_y = -1
    elif label == '14':
        add_y = 3.4
        add_x = -1.2
    elif label == '15':
        add_y = -0.5
        add_x = -1.2
    elif label == '9':
        add_y = -3.5
        add_x = -1.2
    elif label == '6':
        add_y = -1
    else:
        add_x, add_y = 0, 0
    axes[0][5].text(x + add_x, y + add_y, label, size=8, fontweight='bold')
    axes[0][2].text(x + add_x, y + add_y, label, size=8, fontweight='bold')

for i in range(len(axes)):
    axes[i][3].plot([axes[i][3].get_xlim()[0],
                     axes[i][3].get_xlim()[0]],
                    [axes[i][3].get_ylim()[0],
                     axes[i][3].get_ylim()[1]],
                     color='black', ls='--',
                     lw=1, alpha = 0.5)

fig.savefig('./report_TREM2_KO_dataset/Proliferation.pdf', bbox_inches='tight', format='pdf')

# GFWGW

In [None]:
sc.pl.umap(PAIR_COMBINED[PAIR_COMBINED.obs.Sample == '2'],
           color=[names_dict['Trem2']],
           frameon=False, size=s, cmap=cmap, vmax=vmax)

In [None]:
sc.pl.umap(PAIR_COMBINED[PAIR_COMBINED.obs.Sample == '5'],
           color=[names_dict['Trem2']],
           frameon=False, size=s, cmap=cmap, vmax=vmax)

In [None]:
sc.pl.umap(PAIR_COMBINED[PAIR_COMBINED.obs.Sample == '5'],
           color=[names_dict['Ripk3']],
           frameon=False, size=s, cmap=cmap, vmax=vmax)

In [None]:
sc.pl.umap(PAIR_COMBINED[PAIR_COMBINED.obs.Sample == '5'],
           color=[names_dict['Mlkl']],
           frameon=False, size=s, cmap=cmap, vmax=vmax)

In [None]:
sc.pl.umap(PAIR_COMBINED[PAIR_COMBINED.obs.Sample == '2'],
           color=[names_dict['Ripk3']],
           frameon=False, size=s, cmap=cmap, vmax=vmax)

In [None]:
sc.pl.umap(PAIR_COMBINED[PAIR_COMBINED.obs.Sample == '2'],
           color=[names_dict['Mlkl']],
           frameon=False, size=s, cmap=cmap, vmax=vmax)