In [None]:
import scanpy as sc
import scanpy.external as sce
import numpy as np
import pandas as pd
import warnings, scipy.sparse as sp, matplotlib, matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.pyplot import rc_context
from collections import Counter
import matplotlib.font_manager
import openpyxl
import pyreadr
import rpy2
import os
os.environ['R_HOME'] = '/Library/Frameworks/R.framework/Resources/'
os.environ['R_USER'] = '/Library/Frameworks/R.framework/Resources'
import anndata2ri
from rpy2.robjects.packages import importr
import rpy2.robjects as robjects
#import magic
from scipy import sparse
from sklearn.neighbors import NearestNeighbors
#import seaborn as sns
#import palantir
import loompy
#import feather
import re
#from scipy.sparse import csgraph

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = 'Arial'
matplotlib.rc('font', size=14)
import matplotlib.lines as lines

pd.set_option('display.max_rows', 200)

sc.set_figure_params(dpi=80, dpi_save=300, color_map='Spectral_r', vector_friendly=True, transparent=True)
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()

In [None]:
import sys
!{sys.executable} -m  pip install pyppeteer





In [None]:
# preset color palettes and color maps
user_defined_palette =  [ '#F6222E', '#16FF32', '#3283FE', '#FEAF16', '#BDCDFF', '#3B00FB', '#1CFFCE', '#C075A6', '#F8A19F', '#B5EFB5', '#FBE426', '#C4451C', 
                          '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', '#1CBE4F', '#1C8356', '#0e452b', '#AA0DFE', '#B5EFB5', '#325A9B', '#90AD1C']

user_defined_cmap_markers = LinearSegmentedColormap.from_list('mycmap', ["#E6E6FF", "#CCCCFF", "#B2B2FF", "#9999FF",  "#6666FF",   "#3333FF", "#0000FF"])
user_defined_cmap_degs = LinearSegmentedColormap.from_list('mycmap', ["#0000FF", "#3333FF", "#6666FF", "#9999FF", "#B2B2FF", "#CCCCFF", "#E6E6FF", "#E6FFE6", "#CCFFCC", "#B2FFB2", "#99FF99", "#66FF66", "#33FF33", "#00FF00"])

In [None]:
%matplotlib inline 

## Perform quality control and clean-up samples

### Load cellranger output files

In [None]:
from pathlib import Path

adatas_list=[]
names_list=[]

tenexdir = '/Users/alemarquis/Desktop//CD45/CD45new/'
h5_path = Path(tenexdir).glob('**/**/**/**/filtered_feature_bc_matrix.h5')

for path in h5_path:
    tmp_adata = sc.read_10x_h5(path)
    tmp_adata.var_names_make_unique()
    tmp_adata.shape # check the number of cells and genes in sample 1
    adatas_list.append(tmp_adata)

In [None]:
adata = sc.concat(
    adatas_list, # add more annadata objects here separated by commas
    join='outer', 
    label = 'sample', 
    keys = ['mo18_CD45pos1_d4', "mo18_CD45pos1_d1", "mo18_CD45pos2_d7", "mo18_CD45pos1_d7", "mo02_CD45pos1_d4", "mo02_CD45pos2_d1", "mo02_CD45pos1_d1",
             "mo02_CD45pos2_d4", "mo02_CD45pos1_d0", "mo02_CD45pos2_d0", "mo02_CD45pos1_d7", "mo02_CD45pos2_d7", "mo18_CD45pos2_d0", "mo18_CD45pos2_d1",
            "mo18_CD45pos3_d1", "mo18_CD45pos1_d0", "mo18_CD45pos2_d4", "mo18_CD45pos3_d4" 
           ], # or use your sample_names list (as used above) here. 
    # Make sure the order of the batch categories matches that of the AnnData objects 
    index_unique = '@'
)

In [None]:
adata2 = sc.concat(
    adatas_list, # add more annadata objects here separated by commas
    join='outer', 
    label = 'sample', 
    keys = ['mo18_CD45pos1_d4', "mo18_CD45pos1_d1", "mo18_CD45pos2_d7", "mo18_CD45pos1_d7", "mo02_CD45pos1_d4", "mo02_CD45pos2_d1", "mo02_CD45pos1_d1",
             "mo02_CD45pos2_d4", "mo02_CD45pos1_d0", "mo02_CD45pos2_d0", "mo02_CD45pos1_d7", "mo02_CD45pos2_d7", "mo18_CD45pos2_d0", "mo18_CD45pos2_d1",
            "mo18_CD45pos3_d1", "mo18_CD45pos1_d0", "mo18_CD45pos2_d4", "mo18_CD45pos3_d4" 
           ], # or use your sample_names list (as used above) here. 
    # Make sure the order of the batch categories matches that of the AnnData objects 
    index_unique = '@'
)

In [None]:
adata.raw = adata # keep a copy of the raw adata 
np.random.seed(42) 
index_list = np.arange(adata.shape[0]) # randomize the order of cells for plotting
np.random.shuffle(index_list)
adata = adata[index_list]

In [None]:
adata.shape

In [None]:
adata.layers['raw_data'] = adata.X.copy()

In [None]:
rng = np.random.RandomState(42) # set seed 

In [None]:
# metadata
adata.obs['stage'] = ['02mo' if 'mo02' in x else '18mo' if 'mo18' in x else 'error' for x in adata.obs['sample'] ]
adata.obs['day'] = ['d0' if 'd0' in x else 'd1' if 'd1' in x else 'd4' if 'd4' in x else 'd7' if 'd7' in x else 'error' for x in adata.obs['sample'] ]

In [None]:
adata.uns['stage_colors'] =  [ '#76D6FF','#FF8072'] # ['#F5B4AE', '#8FD6D9']
adata.uns['day_colors'] = ['#0080FF', '#FFA500',  '#FF00FF', '#00D6D8']

In [None]:
adata

### Calculate quality control metrics and perform standard data clean-up

In [None]:
sc.pp.calculate_qc_metrics(adata, inplace=True)
#store all unfiltered/unprocessed data prior to downstream analysis
adata.obs['original_total_counts'] = adata.obs['total_counts']
adata.obs['log10_original_total_counts'] = np.log10(adata.obs['original_total_counts'])

In [None]:
# mitochondrial genes
adata.var['mt'] = adata.var_names.str.startswith(('MT-', 'mt-')) 
# ribosomal genes
adata.var['ribo'] = adata.var_names.str.startswith(('RPS','RPL', 'Rps', 'Rpl','Gm'))
# hemoglobin genes.
adata.var['hb'] = adata.var_names.str.startswith(('^Hb', '^HB'))

# for each cell compute fraction of counts in mitochondrial genes and ribosomal genes vs. all genes 
adata.obs['mito_frac'] = np.sum(adata[:,adata.var['mt']==True].X, axis=1) / np.sum(adata.X, axis=1)
adata.obs['ribo_frac'] = np.sum(adata[:,adata.var['ribo']==True].X, axis=1) / np.sum(adata.X, axis=1)
adata.obs['hb_frac'] = np.sum(adata[:,adata.var['hb']==True].X, axis=1) / np.sum(adata.X, axis=1)

#### Identify doublet cells

In [None]:
sc.external.pp.scrublet(adata, threshold=0.35, random_state=42) # choose threshold manually

In [None]:
# check manual threshold
sc.external.pl.scrublet_score_distribution(adata)

#### Remove not expressed genes

In [None]:
# remove genes that are not expressed in any cells (remove columns with all 0s)
sc.pp.filter_genes(adata, min_cells=4)

#### Normalize for each cell's library size

In [None]:
sc.pp.normalize_per_cell(adata, counts_per_cell_after=10**4)

#### Log-transform counts

In [None]:
sc.pp.log1p(adata)

### Select subset of principal components 

In [None]:
sc.pp.highly_variable_genes(adata, n_top_genes=4000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(adata, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
def observe_variance(anndata_object):
    fig = plt.figure(figsize=(10,5))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)
    # variance per principal component
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = anndata_object.uns['pca']['variance_ratio']
    ax1.scatter(x,y,s=4)
    ax1.set_xlabel('PC')
    ax1.set_ylabel('Fraction of variance explained\n')
    ax1.set_title('Fraction of variance explained per PC\n')
    # cumulative variance explained
    cml_var_explained = np.cumsum(anndata_object.uns['pca']['variance_ratio'])
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = cml_var_explained
    ax2.scatter(x,y,s=4)
    ax2.set_xlabel('PC')
    ax2.set_ylabel('Cumulative fraction of variance\nexplained')
    ax2.set_title('Cumulative fraction of variance\nexplained by PCs')
    fig.tight_layout()
    plot = plt.show
    return(plot)
observe_variance(adata)

In [None]:
adata.uns['pca']

In [None]:
adata.uns['pca']['variance_ratio']

In [None]:
plt.plot(range(len(adata.uns['pca']['variance_ratio'])), np.cumsum(adata.uns['pca']['variance_ratio']) * 100, '.-')
plt.axvline(30, color = 'r')
plt.xlabel('Principal Component', fontsize = 14)
plt.ylabel('% Variance Explained', fontsize = 14)

In [None]:
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30, random_state=42)

In [None]:
sc.tl.umap(adata, min_dist=0.1)

In [None]:

# Define the new order for the categories
new_order = ["mo02_CD45pos1_d0", "mo02_CD45pos2_d0", "mo02_CD45pos1_d1","mo02_CD45pos2_d1",
             "mo02_CD45pos1_d4","mo02_CD45pos2_d4",  "mo02_CD45pos1_d7", "mo02_CD45pos2_d7",
             "mo18_CD45pos1_d0","mo18_CD45pos2_d0","mo18_CD45pos1_d1", "mo18_CD45pos2_d1","mo18_CD45pos3_d1",  
             'mo18_CD45pos1_d4', "mo18_CD45pos2_d4", "mo18_CD45pos3_d4" ,"mo18_CD45pos1_d7",  "mo18_CD45pos2_d7",  ]

# Assign the new order to the cell_type_subset column
adata.obs['sample'] = pd.Categorical(adata.obs['sample'], categories=new_order, ordered=True)


#### Sample metadata

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    adata, 
    color=['stage', 'day', 'sample'], 
    color_map='Spectral_r', 
    use_raw=False,
    ncols=15,
    wspace = 0.2,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    save='_metadata_S1.pdf'
)

#### QC metrics

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    adata, 
    color=['log10_original_total_counts', 'n_genes_by_counts','ribo_frac', 'mito_frac'], 
    palette=user_defined_palette,  
    color_map='Spectral_r',
    use_raw=False,
    ncols=5,
    wspace = 0.2,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    save='_QCmetrics_S1.pdf'
)

In [None]:
sc.pl.violin(adata, ['n_genes_by_counts', 'log10_original_total_counts', 'ribo_frac', 'mito_frac'],  
             palette=user_defined_palette,  jitter=0.4, groupby = 'sample', rotation= 90)

#### Potential contaminant populations

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    adata, 
    color=[ 'doublet_score'], 
    palette=user_defined_palette,  
    color_map='Spectral_r',
    use_raw=False,
    ncols=4,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    save='_contaminants_S1.pdf'
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    adata, 
    color=[ "stage"], 
    palette=user_defined_palette,  
    color_map='Spectral_r',
    use_raw=False,
    ncols=4,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    save='_contaminants_S1.pdf'
)

In [None]:
adata_total = adata

### Run unsupervised clustering analysis leiden

In [None]:
for resolution_parameter in [0.6, 0.8, 1.0, 1.2]:
    sc.tl.leiden(adata, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

We can visualize the clustering to see which clusters match with the cells that we would like to filter out. Inspect the list of QC metrics and canonical markers to make your choice.

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    adata, 
    color=['leiden_0.6', 'leiden_0.8', 'leiden_1.0','leiden_1.2'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    adata, 
    color=['leiden_1.2'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    size=15,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

sc.pl.umap(
    adata, 
    color=['leiden_1.2','mito_frac'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    size=15,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False,
    legend_loc="on data"
)

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden_1.2', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False) 

In [None]:
sc.pl.violin(adata, ['n_genes_by_counts', 'log10_original_total_counts', 'ribo_frac', 'mito_frac'],  
             palette=user_defined_palette,  jitter=0.4, groupby = 'leiden_1.2', rotation= 90)

### Filter out bad quality cells by cluster

In [None]:
clusters_to_remove = ['10', '19' ,'25',] #"12", "24","27", "30"]
cluster_filter = [x not in clusters_to_remove for x in adata.obs['leiden_1.2']]
print('Total number of cells pre-filtering: ' + str(adata.shape[0]))
print('Number of cells to keep after filtering: ' + str(sum(cluster_filter)))
adata_filtered = adata[cluster_filter]

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    adata_filtered, 
    color=['leiden_1.2'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.5,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.pl.violin(adata_filtered, ['n_genes_by_counts', 'log10_original_total_counts', 'ribo_frac', 'mito_frac'],  
             palette=user_defined_palette,  jitter=0.4, groupby = 'leiden_1.2', rotation= 90)

In [None]:
# filter for percent mito
adata_filtered = adata_filtered[adata_filtered.obs['mito_frac'] <0.05, :]

# filter for percent ribo > 0.05
adata_filtered = adata_filtered[adata_filtered.obs['ribo_frac'] > 0.1, :]

In [None]:
sc.pl.violin(adata_filtered, [ 'ribo_frac', 'mito_frac'],  
             palette=user_defined_palette,  jitter=0.4, groupby = 'sample', rotation= 90)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    adata_filtered, 
    color=['sample', 'ribo_frac'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.pl.violin(adata_filtered, [ 'ribo_frac', 'mito_frac', ],  
             palette=user_defined_palette,  jitter=0.4, groupby = 'sample', rotation= 90)

In [None]:
print("Original cell number %d"%adata.n_obs)
print("Remaining cells %d"%adata_filtered.n_obs)


In [None]:
adata = adata_filtered

In [None]:
adata.shape

#### Remove ribosomal protein genes

In [None]:
adata = adata[:,adata.var['ribo']==False]
adata.shape
adata = adata[:,adata.var['hb']==False]
adata.shape

### Filter out doublets and cell contaminants

In [None]:
adata.obs['predicted_doublet'] = adata.obs['predicted_doublet'].astype('category')
sc.pl.umap(
    adata, 
    color=['predicted_doublet', 'doublet_score'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
adata.obs['predicted_doublet'] = adata.obs['predicted_doublet'].astype('category')
sc.pl.umap(
    adata_total, 
    color=['doublet_score'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
adata = adata[adata.obs['predicted_doublet'] == False]

In [None]:
sc.pl.umap(
    adata, 
    color=['predicted_doublet', 'doublet_score'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
adata.obs['keep_cell'] = '0.yes'

In [None]:
keep_cells = pd.concat([adata.obs['keep_cell']])

In [None]:
adata_total.obs['keep_cell'] = '1.no'

In [None]:
adata_total.obs['keep_cell'][adata_total.obs.index.isin(keep_cells.index) == True] = '0.yes'

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    adata_total, 
    color=['keep_cell'], 
    palette=['blue', '#d3d3d3'],  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    save='_keepcells.pdf'
)

In [None]:
crosstb = pd.crosstab(adata_total.obs['sample'], adata_total.obs['keep_cell'])

In [None]:
with rc_context({'figure.figsize': (8, 3)}):
    ax = crosstb.plot(kind="bar", stacked=True, edgecolor = "black", width=0.8,  color=['blue', '#d3d3d3'])
    ax.grid(False) 
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.savefig('barplot_keepcells_S1.pdf')

### Reanalyze young data after removal of cells

In [None]:
adata.obs['stage']

In [None]:
young = adata[adata.obs['stage']=="02mo"]

In [None]:
sc.pp.highly_variable_genes(young, n_top_genes=2000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(young, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(young)


In [None]:
plt.plot(range(len(young.uns['pca']['variance_ratio'])), np.cumsum(young.uns['pca']['variance_ratio']) * 100, '.-')
plt.axvline(30, color = 'r')
plt.xlabel('Principal Component', fontsize = 14)
plt.ylabel('% Variance Explained', fontsize = 14)

In [None]:
sc.pp.neighbors(young, n_neighbors=50,n_pcs=30)

In [None]:
sc.tl.umap(young, min_dist=0.6)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    young, 
    color=["Sox4","Rorc","Ncr1","Klrk1","Cxcr6", 'Cd8b1',"Cd8a","Cd4",'Tnfrsf4',"Foxp3","H2-Aa","Clec9a","Xcr1",
           "Sirpa","Ccr7","Fscn1",'Cd79a', 'Ms4a1', "Xbp1","Igkc","Msrb1","stage", 'day'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
for resolution_parameter in [ 0.1, 0.2, 0.3, 0.4,0.5]:
    sc.tl.leiden(young, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    young, 
    color=[ 'leiden_0.1', 'leiden_0.2', 'leiden_0.3','leiden_0.4','leiden_0.5',
         ], 
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    young, 
    color=[ 'leiden_0.2',"day","stage","Foxp3","Cd4",], 
)

In [None]:
sc.tl.rank_genes_groups(young, 'leiden_0.2', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(young, n_genes=25, sharey=False)  

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    young, 
    color=['leiden_0.4'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    size=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False,
    legend_loc="on data"
)

In [None]:
young.obs['cell_type'] = ['T cells' if  (x=='1'or x=='3' or x=='4' ) else
                          'B cells' if (x=='9'  ) else  
                          'NKT and ILTCK' if (x=='0' or x=='2' or x=='8' )  else
                          'NK cells' if (x=='7' ) else
                          'DCs and Macrophages' if (x=='10' or x=='12' or x=='13')  else                             
                          'DN/DPs' if (x=='6' or x=='11') else
                          'ILC' if x=='5' else
                          'ERROR' for x in young.obs['leiden_0.4']] 

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    young, 
    color=['leiden_0.1','cell_type','stage','day',"Foxp3"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.3, 0.05],
    size=15,
    vmax=2,
    frameon=False,
    add_outline=True,
    sort_order = False,
    legend_loc="on data"
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    young, 
    color=['cell_type'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.3, 0.05],
    size=15,
    vmax=2,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.pl.violin(young, [ 'ribo_frac', 'mito_frac', ],  
             palette=user_defined_palette,  jitter=0.4, groupby = 'cell_type', rotation= 90)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    young, 
    color=['leiden_0.1','cell_type','stage','day',"Foxp3"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.3, 0.05],
    size=15,
    vmax=2,
    frameon=False,
    add_outline=True,
    sort_order = False,
    legend_loc="on data"
)

In [None]:
sc.tl.rank_genes_groups(young, 'cell_type', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(young, n_genes=25, sharey=False)  

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    young, 
    color=["Sox4","Rorc","Ncr1","Klrk1","Cxcr6", 'Cd8b1',"Cd4",'Tnfrsf4',"Foxp3","H2-Aa","Clec9a","Xcr1",
           "Sirpa","Ccr7","Fscn1",'Cd79a', 'Ms4a1', "Xbp1","Igkc","Msrb1",'Fcer1g'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
genes = {'T-cells': ['Cd3e', 'Cd8a', 'Cd4', 'Il7r'], 
         'NKT and invariant cells':['Gzmb'],
         'NK cells':['Ncr1','Nkg7',	'Klrd1',],
         'ILC':[],
          'DN/DPs':[],
         'B-cells': ['Ms4a1', 'Cd19'], 
         'Plasmacells': ['Ppbp'], 
         'NKT and invariant T cells': ['Nkg7'], 
         'Dendritic-cells': ['Cst3', 'Fcer1a'],
         'Eosinophils':['Tyrobp']}

In [None]:
genes ={ 'B cells_n':	[	'Cst3',	'Cd79b',	'Ms4a1',	'H2-DMb2',	'Bank1',	'Ebf1',	'Ly6d',	'Mzb1',	'Igkc',	'Cd74',	'Napsa',	'Ighm',	'H2-Eb1',	'H2-Aa',	'H2-Ab1',	'Iglc3',	'Iglc2',	'Lyn',	'Ly86',	'Pkig',	'Plac8',	'Blnk',	'Syk',	'Cd37',	'Siglecg',	]	,
'DCs_n':	[	'Arpp21',	'Cd74',	'H2-Aa',	'Atox1',	'H2-Eb1',	'Spi1',	'Ifi30',	'Tyrobp',	'Psap',	'H2-Ab1',	'Tmsb4x',	'Ftl1',	'Syngr2',	'Cxcl16',	'Aif1',	'Ctsh',	'Ctsz',	'Actg1',	'Pkib',	'Tbc1d8',	'Atpif1',	'Flt3',	'Skap2',	'Fmnl2',	'Clic4',	]	,
'DN/DPs_n':	[	'Msrb1',	'Dntt',	'Sox4',	'Tcf7',	'Endou',	'Trbc2',	'Themis',	'Satb1',	'Ccr9',	'Rhoh',	'Cyb5a',	'Cd8b1',	'Hmgb1',	'H3f3a',	'Aqp11',	'Ramp1',	'Ap3s1',	'Cux1',	'Mier1',	'Edem1',	'Cd8a',	'Tcf12',	'Desi1',	'2610307P16Rik',	'Trbc1',	]	,
'EOS_n':	[	'Tmem176a',	'Tyrobp',	'Fcer1g',	'Ifitm3',	'Ftl1',	'Srgn',	'Il1b',	'Isg15',	'Fth1',	'S100a9',	'Rtp4',	'Slfn4',	'S100a8',	'Hdc',	'Csf3r',	'Acod1',	'Lst1',	'Rsad2',	'Ifitm2',	'Ifit3',	'Ifit1',	'Mxd1',	'Cebpb',	'Isg20',	'Txn1',	]	,
'ILC_n':	[	'Fcer1g',	'Tmem176b',	'Ramp1',	'Il23r',	'Il1r1',	'Emb',	'Ikzf3',	'Ckb',	'Igf1r',	'Lmo4',	'Pxdc1',	'Blk',	'St6galnac3',	'S100a4',	'Cxcr6',	'Il7r',	'Furin',	'Icos',	'Tcrg-C1',	'Rora',	'Zbtb16',	'Selenop',	'Serpinb1a',	'Avpi1',	'Il18r1',	]	,
'NK cells_n':	[	'Il12rb2',	'Tyrobp',	'Ncr1',	'Klre1',	'Klrb1c',	'Gzma',	'Xcl1',	'AW112010',	'Anxa2',	'Nkg7',	'Car2',	'Irf8',	'Klrk1',	'Klrd1',	'Prf1',	'Il2rb',	'Txk',	'Ccl5',	'Ccl4',	'Myl6',	'Klri2',	'Clnk',	'Serpinb9',	'Gem',	'Ptprc',	]	,
'NKT and invariant cells_n':	[	'Tox',	'Tmsb10',	'Ly6c2',	'Ctsw',	'Sh3bgrl3',	'Gzmb',	'Klrk1',	'Id2',	'Il2rb',	'Nkg7',	'Klrd1',	'Dennd4a',	'Satb1',	'Cxcr6',	'Klra9',	'Dusp2',	'Gimap4',	'Vps37b',	'Chn2',	'Pitpnc1',	'Xcl1',	'Klrb1c',	'Cd7',	'Inpp4b',	'Zfp36l2',	]	,
'T cells_n':	[	'Igkc',	'Ctla4',	'Themis',	'Emb',	'Prkca',	'Fam169b',	'Tnfrsf4',	'Fyb',	'Cd8b1',	'Trbc2',	'Sntb1',	'Itga4',	'Lat',	'Cd3d',	'Tnfsf8',	'Cd8a',	'Shisa5',	'Ikzf2',	'Ms4a6b',	'Itgav',	'Fyn',	'Cd2',	'Gzmk',	'Trps1',	'Smc4',	]	,
'plasmacells_n':	[		'Jchain',	'Xbp1',	'Txndc5',	'Mzb1',	'Iglc2',	'Eaf2',	'Derl3',	'Iglv1',	'Pdia4',	'Iglc3',	'Creld2',	'Herpud1',	'Serp1',	'Ssr4',	'Ckap4',	'Fkbp2',	'Hsp90b1',	'Prdx4',	'Sec11c',	'Edem2',	'Edem1',	'Iglc1',	'Pou2af1']}

In [None]:
sc.set_figure_params(scanpy=True, fontsize = 14)
ac = sc.pl.matrixplot(young, genes, groupby = 'cell_type', show = False, standard_scale = 'var')
ac['mainplot_ax'].set_xlabel('Genes')
ac['mainplot_ax'].set_ylabel('Clusters')

In [None]:
result = young.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(25)

## T cells young

In [None]:
T = young[young.obs['cell_type'].isin(['T cells'])]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(T, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(T, n_top_genes=2000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(T, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
def observe_variance(anndata_object):
    fig = plt.figure(figsize=(10,5))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)
    # variance per principal component
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = anndata_object.uns['pca']['variance_ratio']
    ax1.scatter(x,y,s=4)
    ax1.set_xlabel('PC')
    ax1.set_ylabel('Fraction of variance explained\n')
    ax1.set_title('Fraction of variance explained per PC\n')
    # cumulative variance explained
    cml_var_explained = np.cumsum(anndata_object.uns['pca']['variance_ratio'])
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = cml_var_explained
    ax2.scatter(x,y,s=4)
    ax2.set_xlabel('PC')
    ax2.set_ylabel('Cumulative fraction of variance\nexplained')
    ax2.set_title('Cumulative fraction of variance\nexplained by PCs')
    fig.tight_layout()
    plot = plt.show
    return(plot)
observe_variance(T)

In [None]:
plt.plot(range(len(T.uns['pca']['variance_ratio'])), np.cumsum(T.uns['pca']['variance_ratio']) * 100, '.-')
plt.axvline(30, color = 'r',)
plt.xlabel('Principal Component', fontsize = 14)
plt.ylabel('% Variance Explained', fontsize = 14)

In [None]:
sc.pp.neighbors(T, n_neighbors=30, n_pcs=30)
sc.tl.umap(T, min_dist=0.5)

### T cells clustering and annotation


In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(T, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    T, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    T, 
    color=['leiden_0.4','day','Foxp3', 'Cd4',"Cd8a","Gzmk",'Fcer1g'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
T.obs['cell_type_subset'] = ['Tregs' if (x=='0' or x=="2") else 
                                'CD4' if (x=='3' ) else
                              'Naive CD4' if ( x=="6" ) else
                               'GZMK+ CD8' if (x=='5' ) else
                                'Naive CD8' if ( x=='1'  or x=='4' ) else
                      
                               'ERROR' for x in T.obs['leiden_0.4']] 

In [None]:

# Define the new order for the categories
new_order = ['Tregs', 'CD4','GZMK+ CD8','Naive CD4', 'Naive CD8', ]

# Assign the new order to the cell_type_subset column
T.obs['cell_type_subset'] = pd.Categorical(T.obs['cell_type_subset'], categories=new_order, ordered=True)


In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    T, 
    color=['cell_type_subset','Cd8a',"Cd4",'Foxp3',"Cd40lg","Gzmk"] , 
    palette=user_defined_palette,  

    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    T, 
    color=['cell_type_subset', 'day'] , 
    palette=user_defined_palette,  

    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    T, 
    color=['leiden_0.2',"day","stage","Ly6a",'Cd4', 'Cd40lg',"Icos",'Cd8a', "Cd8b1","Ccr7", "Stat1", 'Lef1','Foxp1',"Sell",'Foxp3', 'Ikzf2', 'Ctla4','Gzmk',"Nkg7","Ccl5","Foxp3"] , 
    palette=user_defined_palette,  

    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.tl.rank_genes_groups(T, 'cell_type_subset', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(T, n_genes=25, sharey=False) 

In [None]:
marker_genes_dict = {    'Tregs': ['Foxp3', 'Ikzf2', 'Ctla4'],
    
                      'CD4': ["Ly6a",'Cd4', 'Cd40lg',"Icos"],
                     'GZMK+ CD8': ['Gzmk',"Nkg7","Ccl5"],
        'CD8': ['Cd8a', "Cd8b1"], 
                     
      'Naive CD8': ["Lef1","Sell","Ccr7"],
                     'Naive CD4': ["Bach2"],
    
  
}

In [None]:
sc.pl.dotplot(T, marker_genes_dict, 'cell_type_subset', dendrogram=True,log=True)


In [None]:
sc.pl.matrixplot(T, marker_genes_dict, 'cell_type_subset', dendrogram=False, cmap='Blues', standard_scale='var', colorbar_title='column scaled\nexpression')


In [None]:
genes = { 'Tregs': ['Foxp3', 'Ikzf2', 'Ctla4'],
    
                      'CD4': ["Ly6a",'Cd4', 'Cd40lg',"Icos"],
                     'GZMK+ CD8': ['Gzmk',"Nkg7","Ccl5",'Cd8a', "Cd8b1","Ccr7"], 
                     
      'Naive CD8': ["Lef1","Sell"],
                     'Naive CD4': ["Bach2"],}

genes_unlist = [gene_name for item in genes.values() for gene_name in item]
print(genes_unlist)

In [None]:
# Get the gene indices
gene_ids = [T.var_names.get_loc(gene_name) for gene_name in ['Foxp3', 'Ikzf2', 'Ctla4', 'Ly6a', 'Cd4', 'Cd40lg', 'Icos', 'Gzmk', 'Nkg7', 'Ccl5', 'Cd8a', 'Cd8b1', 'Ccr7', 'Lef1', 'Sell', 'Bach2']]

# Get the gene names
gene_names = ['Foxp3', 'Ikzf2', 'Ctla4', 'Ly6a', 'Cd4', 'Cd40lg', 'Icos', 'Gzmk', 'Nkg7', 'Ccl5', 'Cd8a', 'Cd8b1', 'Ccr7', 'Lef1', 'Sell', 'Bach2']

# Check the shape of the selected data
selected_data_sparse = T.X[:, gene_ids]

# Convert the sparse matrix to a dense array
selected_data_dense = selected_data_sparse.toarray()
print(selected_data_dense.shape)

# Create a data matrix with only genes of interest 
data_matrix = pd.DataFrame(selected_data_dense, index=T.obs_names, columns=gene_names)

In [None]:
data_matrix

In [None]:

# Add cluster labels to compute average
data_matrix['cell_type_subset'] = T.obs['cell_type_subset']

In [None]:
# Compute the average for each cluster
avg_matrix = data_matrix.groupby(['cell_type_subset']).mean()

In [None]:
# Create a heatmap
import seaborn as sns
g = sns.clustermap(avg_matrix, col_cluster = False, standard_scale = 1, cbar_pos=(1.05, .3, .03, .4), 
                   linewidth = 0.02, figsize = (6, 6))
g.ax_heatmap.set_xlabel('Genes')
g.ax_heatmap.set_ylabel('Leiden Clusters')

# To save figure:
# g.savefig('/path/to/save/plot/clustermap.png', dpi = 150, bbox_inches = 'tight')

In [None]:
genes

In [None]:
color_set = ['#f6222e','#ff34ff','#060047','#ffbaba','#002FA7',    ]
celltype_color_map = dict(zip(genes.keys(), color_set))

In [None]:
celltype_color_map

In [None]:
genes_color_map = {}
for item, value in genes.items():
    for k in value:
        genes_color_map[k] = celltype_color_map[item]
    
genes_color_map

In [None]:
# Get the gene ids
gene_ids = [T.var_names.get_loc(j) for j in genes_color_map.keys()]

# Check the shape of the selected data
selected_data_sparse = T.X[:, gene_ids]

# Convert the sparse matrix to a dense array
selected_data_dense = selected_data_sparse.toarray()
print(selected_data_dense.shape)

# Create a data matrix with only genes of interest 
data_matrix = pd.DataFrame(selected_data_dense, index = T.obs_names, columns = genes_color_map.keys())

# Add cluster labels to compute average
data_matrix['cell_type_subset'] = T.obs['cell_type_subset']

In [None]:
# Compute the average for each cluster
avg_matrix = data_matrix.groupby(['cell_type_subset']).mean()

In [None]:
# Assuming your data is in avg_matrix
# You can create a transposed version of avg_matrix to switch x and y axes
#avg_matrix = np.transpose(avg_matrix)

In [None]:
import seaborn as sns

# Assuming avg_matrix, genes_color_map, and celltype_color_map are defined

# Create a heatmap
g = sns.clustermap(avg_matrix,row_cluster=False, col_cluster=False, standard_scale=1, cbar_pos=(1.05, .3, .03, .4), 
                   linewidth=0.5, figsize=(6, 3.5),  
                   col_colors=[color_use for color_use in genes_color_map.values()], row_colors=None)

# Set labels for axes
g.ax_heatmap.set_xlabel('Genes')
g.ax_heatmap.set_ylabel('celltype')

# Draw the legend bar for the classes                 
for label in celltype_color_map.keys():
    g.ax_col_dendrogram.bar(0, 0, color=celltype_color_map[label], label=label, linewidth=0)

# Add legend to the dendrogram
g.ax_col_dendrogram.legend(ncol=1, bbox_to_anchor=(0, 0.75))

# To save figure:
g.savefig('CD45new/spectra/clustermap.pdf', dpi = 6000, bbox_inches = 'tight')

In [None]:
T.uns['cell_type_subset_colors']=['#f6222e', '#002FA7', '#060047','#b57edc', '#ffbaba',]

In [None]:
sc.pl.umap(T, color='cell_type_subset') 

In [None]:
sc.pl.violin(T, [ 'ribo_frac', 'mito_frac', ],  
             palette=user_defined_palette,  jitter=0.4, groupby = 'cell_type_subset', rotation= 90)

In [None]:
sc.set_figure_params(dpi=150, dpi_save=300, vector_friendly=True, transparent=True)
sc.pl.umap(
    T, 
    color='cell_type_subset', 
    use_raw=False,
    ncols=5,
    frameon=False,
    add_outline=False,
    sort_order = False,
)

In [None]:
# Create a heatmap
a = sns.clustermap(avg_matrix, col_cluster = False,row_cluster = False, standard_scale = 1, cbar_pos=(1.05, .3, .03, .4), 
                   linewidth = 0.1, figsize = (6, 6), 
                   col_colors = [color_use for color_use in genes_color_map.values()])
g.ax_heatmap.set_xlabel('Genes')
g.ax_heatmap.set_ylabel('PhenoGraph Clusters')

# Draw the legend bar for the classes                 
for label in celltype_color_map.keys():
    g.ax_col_dendrogram.bar(0, 0, color=celltype_color_map[label],
                            label=label, linewidth=0)
g.ax_col_dendrogram.legend(ncol=1, bbox_to_anchor = (0, 0.75))

# To save figure:
# g.savefig('path/to/save/plot/clustermap.png', dpi = 150, bbox_inches = 'tight')

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    T, 
    color=[ 'cell_type_subset','stage', 'day',], 
    ncols=6,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.4,
    use_raw=False,
    add_outline=True
)

In [None]:
colors = T.uns['cell_type_subset_colors']

tmp = pd.crosstab(T.obs['day'],T.obs["cell_type_subset"], normalize='index', )
tmp.plot.area(stacked=True, color=colors).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)
plt.title("2 month-old")


In [None]:
Tregs= T[T.obs["cell_type_subset"]=="Tregs"]

In [None]:
sc.tl.rank_genes_groups(Tregs, 'day', groups=['d7'], reference='d0', method='wilcoxon', use_raw=False)
result = Tregs.uns['rank_genes_groups']

In [None]:
sc.pl.rank_genes_groups(Tregs, n_genes=30, sharey=False)


In [None]:
#path_to_h5ad = '/Users/alemarquis/Desktop/CD45/CD45new/CD45pos_02mo18mo_SLTBId147_T.h5ad'
#T.write_h5ad(path_to_h5ad)

In [None]:
#T=sc.read_h5ad('/Users/xleana/Desktop/CD45/CD45new/CD45pos_02mo18mo_SLTBId147_T.h5ad')
#T.uns['log1p']['base']=None


In [None]:
TyoungD0=T[T.obs["day"]=="d0"]

In [None]:
TyoungD1=T[T.obs["day"]=="d1"]

In [None]:
TyoungD4=T[T.obs["day"]=="d4"]

In [None]:
TyoungD7=T[T.obs["day"]=="d7"]

In [None]:
sc.tl.rank_genes_groups(TyoungD0, groupby='cell_type_subset', method='wilcoxon',layers='norm_counts')
sc.pl.rank_genes_groups_dotplot(TyoungD0, n_genes=50, dendrogram=False)


In [None]:
sc.tl.rank_genes_groups(TyoungD1, groupby='cell_type_subset', method='wilcoxon',layers='norm_counts')
sc.pl.rank_genes_groups_dotplot(TyoungD1, n_genes=50, dendrogram=False)


In [None]:
sc.tl.rank_genes_groups(TyoungD4, groupby='cell_type_subset', method='wilcoxon',layers='norm_counts')
sc.pl.rank_genes_groups_dotplot(TyoungD4, n_genes=50, dendrogram=False)


In [None]:
sc.tl.rank_genes_groups(TyoungD7, groupby='cell_type_subset', method='wilcoxon',layers='norm_counts')
sc.pl.rank_genes_groups_dotplot(TyoungD7, n_genes=50, dendrogram=False)


In [None]:
result = TyoungD0.uns['rank_genes_groups']
groups = result['names'].dtype.names
df = pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names','logfoldchanges','pvals_adj','pvals',]})
df.to_csv('/Users/alemarquis/Desktop/Tyoung/TyoungD0.csv')

In [None]:
result = TyoungD1.uns['rank_genes_groups']
groups = result['names'].dtype.names
df = pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names','logfoldchanges','pvals_adj','pvals',]})
df.to_csv('/Users/alemarquis/Desktop/Tyoung/TyoungD1.csv')

In [None]:
result = TyoungD4.uns['rank_genes_groups']
groups = result['names'].dtype.names
df = pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names','logfoldchanges','pvals_adj','pvals',]})
df.to_csv('/Users/alemarquis/Desktop/Tyoung/TyoungD4.csv')

In [None]:
result = TyoungD7.uns['rank_genes_groups']
groups = result['names'].dtype.names
df = pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names','logfoldchanges','pvals_adj','pvals',]})
df.to_csv('/Users/alemarquis/Desktop/Tyoung/TyoungD7.csv')

In [None]:
sc.pl.umap(young,color="cell_type")

## NKT cells

In [None]:
NKT = young[young.obs['cell_type'].isin(['NKT and ILTCK'])]


In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(NKT, min_cells=4)

In [None]:
sc.pp.highly_variable_genes(NKT, n_top_genes=5000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(NKT, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(NKT)

In [None]:
plt.plot(range(len(T.uns['pca']['variance_ratio'])), np.cumsum(T.uns['pca']['variance_ratio']) * 100, '.-')
plt.axvline(30, color = 'r')
plt.xlabel('Principal Component', fontsize = 14)
plt.ylabel('% Variance Explained', fontsize = 14)

In [None]:
sc.pp.neighbors(NKT, n_neighbors=30, n_pcs=30)
sc.tl.umap(NKT, min_dist=0.9)

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(NKT, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    NKT, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    NKT, 
    color=['leiden_0.1',"stage","Trac","Trdc","Cd8a","Cd4"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=6,
    size=15,
    wspace = 0.2,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    NKT, 
    color=['leiden_0.1','leiden_0.3', 'stage', 'day', 'sample', 'day',"Il4",
           "Rorc","Il17a","Icos","Ncr1","Cxcr6",'Fcer1g',], 
    ncols=6,
    use_raw=False,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.5,
    add_outline=True
)

In [None]:
NKT.obs['cell_type_subset'] = [  'Invariant T' if (x=='2' ) else
                                #'NKT' if (x=='0' or x=='2' or x=='1'   )else
                               'NKT' for x in NKT.obs['leiden_0.1']] 

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    NKT, 
    color=['leiden_0.3','cell_type_subset',"day"], 
    ncols=6,
    use_raw=False,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.5,
    add_outline=True
)

## B cells

In [None]:
B = young[young.obs['cell_type'].isin(['B cells'])]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(B, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(B, n_top_genes=2000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(B, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(B)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(B, n_comps=30, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sc.pp.neighbors(B, n_neighbors=15)
sc.tl.umap(B)

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(B, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
B

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    B, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0',"day","Cd3e"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.1, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)


In [None]:
sc.tl.rank_genes_groups(B, 'leiden_0.1', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(B, n_genes=25, sharey=False) 

In [None]:
B.obs['cell_type_subset'] = [  'B cells' if (x=='0' or x=='1' ) else                            
                               'Plasma cells' if (x=='3'or x=='2'     )else
                               'Error' for x in B.obs['leiden_0.1']] 

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    B, 
    color=['cell_type_subset',"Igha","Ighm" ], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.1, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)


In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    young, 
    color=['cell_type' ], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.1, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)


In [None]:
tmp = pd.crosstab(B.obs['day'],B.obs['cell_type_subset'], normalize='index')
tmp.plot.area(stacked=True).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)

## DCs

In [None]:
DC = young[young.obs['cell_type'].isin(['DCs and Macrophages'])]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(DC, min_cells=4)

In [None]:
sc.pp.highly_variable_genes(DC, n_top_genes=3000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(DC, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(DC)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(DC, n_comps=30, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sc.pp.neighbors(DC, n_neighbors=30, n_pcs=30)
sc.tl.umap(DC,min_dist=0.5)

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(DC, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    DC, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.1, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)


In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    DC, 
    color=["H2-Aa","Clec9a","Xcr1","Sirpa","Ccr7","Fscn1","Msrb1","Siglech","Csf1r","Zbtb46","Mertk","Spic","Timd4",
          "Vcam1","Mafb","Lyz2","leiden_0.4","day"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
DC.obs['cell_type_subset'] = ['cDC1' if (x=='0'or x=='1'or x=="4" ) else 
                                 'cDC2' if (x=='3') else
                                 'CCR7+ cDC' if (x=='2' ) else
                                'p-DCs' if (x=='6') else
                                 'Macrophages' if ( x=='5') else
                                  'ERROR' for x in DC.obs['leiden_0.4']] 

In [None]:
sc.pl.umap(DC,color=['leiden_0.4','cell_type_subset',"Ccr7"],)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    DC, 
    color=["Clec9a","Xcr1","Irf8","Clec10a",
           "Sirpa","Ccr7","Fscn1",'Cd79a', 'Ms4a1', "Xbp1","Igkc","Msrb1","Cd4","Rorc","Il22",
          "Gata3","Rorc","Pxdc1","Ahr"], 
    ncols=6,
    outline_width=[0.6, 0.05],
    size=100,
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

#SIRPA DC2
#CCR7+ DC2

In [None]:
DC.uns['cell_type_subset_colors'] = ["#F1BB7B", "#FD6467", "#5B1A18", "#D67236"]

In [None]:
DC

In [None]:
DC.uns['Dendritic cell subsets_colors'] = [  "#39312F", "#D67236","#AA9486", "#EAD3BF","#B6854D", ]

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    DC, 
    color=['cell_type_subset'], 
    ncols=6,
    outline_width=[0.08, 0.06],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True,
)

In [None]:
ccr7DCs=DC[DC.obs['cell_type_subset'].isin(['CCR7+ cDC'])]

In [None]:
sc.tl.rank_genes_groups(ccr7DCs, 'day', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(ccr7DCs, n_genes=25, sharey=False) 

In [None]:
tmp = pd.crosstab(DC.obs['day'],DC.obs['cell_type_subset'], normalize='index')
tmp.plot.area(stacked=True).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)

## ILCs

In [None]:
sc.pl.umap(young)

In [None]:
ILC = young[young.obs['cell_type'].isin(['ILC',"NK cells", "DN/DPs"])]
#'DN/DPs',"NK cells"

In [None]:
sc.pl.umap(ILC)

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(ILC, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(ILC, n_top_genes=2000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(ILC, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(ILC)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(ILC, n_comps=30, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sc.pp.neighbors(ILC, n_neighbors=30, n_pcs=30)
sc.tl.umap(ILC,min_dist=0.5)

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(ILC, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    ILC, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.1, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)


In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    ILC, 
    color=["Cd4","Cd8a","Sox4","day","Ncr1","cell_type","Rorc","Ccr6","stage","Eomes","Cd4","Foxp3","Il23r"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
ILC.obs['cell_type_subset'] = ['ILC3' if (x=='2' ) else 
                               'ILC2' if (x=='4') else
                               'DN' if (x=='1') else
                               'DP' if (x=='3' ) else
                               'NK' if (x=='0') else
                               'ERROR' for x in ILC.obs['leiden_0.1']] 

In [None]:
sc.pl.umap(ILC, color=["leiden_0.1","cell_type",'cell_type_subset',"day"])

In [None]:
sc.tl.rank_genes_groups(ILC, 'leiden_0.4', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(ILC, n_genes=25, sharey=False) 

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    ILC, 
    color=['leiden_0.4','cell_type_subset','stage','day'], 
    ncols=6,
    outline_width=[0.08, 0.06],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

In [None]:
ILC.uns['cell_type_subset_colors'] = ["#F1BB7B", "#FD6467", "#5B1A18", "#D67236"]

In [None]:
tmp = pd.crosstab(ILC.obs['day'],ILC.obs['cell_type_subset'], normalize='index')
tmp.plot.area(stacked=True).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)

In [None]:
young

In [None]:
sc.pl.umap(young, color='cell_type')

In [None]:
annotated_subsets = pd.concat([T.obs['cell_type_subset'], DC.obs['cell_type_subset'], 
                               NKT.obs['cell_type_subset'], B.obs['cell_type_subset'],
                                ILC.obs['cell_type_subset']]
                              )

In [None]:
young.obs['cell_type_subset']=''

In [None]:
young.obs['cell_type_subset'][young.obs.index.isin(annotated_subsets.index) == True] = annotated_subsets

In [None]:
young

In [None]:
young=young[young.obs['cell_type_subset']!='']

In [None]:
sc.pl.umap(young,color='cell_type_subset')

In [None]:
young.uns['cell_type_subset_colors']=['#f6222e','#002FA7','#b57edc','#060047', '#ffbaba',#'#3283fe',
                                      '#006fa6','#809693', '#bec1d4', '#F2BE22',
                                      '#FFA200', '#BFDB38', '#00DFA2',
       '#1F8A70', 
                                      '#DD8D29', '#5a0007', '#46ACC8', '#ffff00', '#B40F20', 
      '#4fc601', '#964B00'
                ]




In [None]:

# Define the new order for the categories
new_order = [ 'Tregs', 'Naive CD4','CD4','GZMK+ CD8', 'Naive CD8', #'CD8',  
             'Invariant T', 'DN',"DP",'NKT', 'NK','ILC2',"ILC3", 'B cells', 'Plasma cells','cDC1', 'CCR7+ cDC' ,'cDC2', 'p-DCs', 'Macrophages',]

In [None]:

# Assign the new order to the cell_type_subset column
young.obs['cell_type_subset'] = pd.Categorical(young.obs['cell_type_subset'], categories=new_order, ordered=True)


In [None]:
sc.set_figure_params(dpi=300, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(young, color=['cell_type_subset'], 
                     color_map='Spectral_r',
                     use_raw=False, 
         #  "Cd4","Cd8a",
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.01], 
                     size=5,  
                     frameon=False, 
                     add_outline=False, 
                     sort_order = False, )



In [None]:
sc.pl.umap(young, color=['day'], 
                     color_map='Spectral_r',
                     use_raw=False, 
         #  "Cd4","Cd8a",
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.01], 
                     size=5,  
                     frameon=False, 
                     add_outline=False, 
                     sort_order = False)

In [None]:
sc.pl.violin(young, [ 'ribo_frac', 'mito_frac', ],  
             palette=user_defined_palette,  jitter=0.6, groupby = 'cell_type_subset', rotation= 90)

In [None]:
list(young.obs['cell_type_subset'].unique())


In [None]:
adata.uns['cell_type_subset_colors']=['#f6222e','#bdcdff','#E90064','#3283fe', '#060047',
                                      '#006fa6','#a30059', '#ffdbe5', '#F2BE22',
                                      '#0000a6', '#D4ADFC', '#00DFA2',
       '#1F8A70', '#BFDB38',
                                      '#DD8D29', '#5a0007', '#46ACC8', '#E58601', '#B40F20', 
      '#4fc601',
                ]


In [None]:
sc.pl.umap(young, color=['cell_type_subset',"stage","day"], 
                     color_map='Spectral_r',
                     use_raw=False, 
         #  "Cd4","Cd8a",
                     ncols=4, 
                     wspace = 0.7,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 

                     sort_order = False)

In [None]:


colors = young.uns['cell_type_subset_colors']

tmp = pd.crosstab(young[young.obs['stage']=="02mo"].obs['day'],young.obs["cell_type_subset"], normalize='index', )
tmp.plot.area(stacked=True, color=colors).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)
plt.title("2 month-old")


In [None]:
#path_to_h5ad = '/Users/alemarquis/Desktop/CD45/CD45new/youngnew.h5ad'

In [None]:
#young.write(path_to_h5ad)

In [None]:
#young= sc.read('/Users/xleana/Desktop/CD45/CD45new/young.h5ad')

In [None]:
sc.pl.umap(young, color="cell_type_subset")

In [None]:
young = young[young.obs['cell_type_subset'] != 'CD8']


In [None]:
TNFgenes = ['Tnfrsf1a', 'Tnfrsf1b', 'Tnfrsf10a', 'Tnfrsf10b', 'Tnfrsf10c', 'Tnfrsf10d', 'Tnfrsf11a', 'Tnfrsf11b', 'Tnfrsf12a', 'Tnfrsf13b', 'Tnfrsf13c', 'Tnfrsf14', 'Tnfrsf17', 'Tnfrsf18', 'Tnfrsf19', 'Tnfrsf21', 'Tnfrsf22', 'Tnfrsf23', 'Tnfrsf25', 'Tnfrsf4', 'Tnfrsf8', 'Tnfrsf9']



In [None]:
sc.pl.dotplot(young[young.obs["day"]=='d0'], ['Il1r1',"Il1rl1",'Il6ra',"Il18r1","Il18rap",  'Il6st',"Il18rap",'Tnfrsf1a', 'Tnfrsf1b',  'Tnfrsf11a', 'Tnfrsf11b', 'Tnfrsf12a', 'Tnfrsf13b', 'Tnfrsf13c', 'Tnfrsf14', 'Tnfrsf17', 'Tnfrsf18', 'Tnfrsf19', 'Tnfrsf21', 'Tnfrsf22', 'Tnfrsf23', 'Tnfrsf25', 'Tnfrsf4', 'Tnfrsf8', 'Tnfrsf9', ], ['cell_type_subset',], dendrogram=False,standard_scale='var', swap_axes=False)


In [None]:
sc.pl.dotplot(young[young.obs["day"]=='d0'], ['Il1r1',"Il1rl1",'Il6ra',"Il18r1","Il18rap" ], ['cell_type_subset',], dendrogram=False,standard_scale='var', swap_axes=False)


In [None]:
sc.pl.dotplot(young[young.obs["day"]=='d1'], ['Il1r1',"Il1rl1","Il18r1","Il18rap",'Il6ra',  'Il6st',"Il18rap",'Tnfrsf1a', 'Tnfrsf1b',  'Tnfrsf11a', 'Tnfrsf11b', 'Tnfrsf12a', 'Tnfrsf13b', 'Tnfrsf13c', 'Tnfrsf14', 'Tnfrsf17', 'Tnfrsf18', 'Tnfrsf19', 'Tnfrsf21', 'Tnfrsf22', 'Tnfrsf23', 'Tnfrsf25', 'Tnfrsf4', 'Tnfrsf8', 'Tnfrsf9',], ['cell_type_subset',], dendrogram=False,standard_scale='var', swap_axes=True)


In [None]:
sc.pl.dotplot(young[young.obs["day"]=='d4'], ["Il18r1","Il18rap"], ['cell_type_subset',], dendrogram=False,standard_scale='var', swap_axes=True)


In [None]:
sc.pl.dotplot(young[young.obs["day"]=='d7'], ["Il18r1","Il18rap"], ['cell_type_subset',], dendrogram=False,standard_scale='var', swap_axes=True)


In [None]:
Tregs= young[young.obs["cell_type_subset"]=='Tregs']

In [None]:
Treggenes= {'Treg activation': ['Foxp3','Il2ra', 'Cd81','Tnfrsf4',  'Ctla4','Ikzf2','Ikzf4','Rora', 'Tnfrsf18']}
Regeneration= { 'Regeneration': [ 'Areg', 'Tff1','Penk',]}
Cellstability= {'Stability': ['Zfp36l1','Cish','Sdc4',"Klrg1"]}

In [None]:
sc.pl.matrixplot(Tregs[Tregs.obs['stage']=="02mo"],Treggenes , 'day', dendrogram=False,  standard_scale='var', swap_axes=True,title="2 mo old")


In [None]:
sc.pl.matrixplot(Tregs[Tregs.obs['stage']=="02mo"], Regeneration , 'day', dendrogram=False,   swap_axes=True,title="2 mo old")


In [None]:
sc.pl.matrixplot(Tregs[Tregs.obs['stage']=="02mo"], Cellstability , 'day', dendrogram=False,   swap_axes=True,title="2 mo old",colorbar_title='column scaled\nexpression',)


### Reanalyze old only data after removal of bad quality cells

In [None]:
old=adata[adata.obs["stage"]=="18mo"]

In [None]:
old

In [None]:
sc.pp.highly_variable_genes(old, n_top_genes=2000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(old, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(old)


In [None]:
plt.plot(range(len(old.uns['pca']['variance_ratio'])), np.cumsum(old.uns['pca']['variance_ratio']) * 100, '.-')
plt.axvline(30, color = 'r')
plt.xlabel('Principal Component', fontsize = 14)
plt.ylabel('% Variance Explained', fontsize = 14)

In [None]:
sc.pp.neighbors(old, n_neighbors=50,n_pcs=30)

In [None]:
sc.tl.umap(old, min_dist=0.6)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    old, 
    color=["Sox4","Rorc","Ncr1","Klrk1","Cxcr6", 'Cd8b1',"Cd8a","Cd4",'Tnfrsf4',"Foxp3","H2-Aa","Clec9a","Xcr1",
           "Sirpa","Ccr7","Fscn1",'Cd79a', 'Ms4a1', "Xbp1","Igkc","Msrb1","stage", 'day'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
for resolution_parameter in [ 0.1, 0.2, 0.3, 0.4,0.5, 0.6, 0.7, 0.8, 0.9, 1.0,]:
    sc.tl.leiden(old, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
   old, 
    color=[ 'leiden_0.1', 'leiden_0.2', 'leiden_0.3','leiden_0.4','leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0',
         ], 
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    old, 
    color=[ 'leiden_0.2',"day","stage","Foxp3","Cd4"], 
)

In [None]:
sc.tl.rank_genes_groups(old, 'leiden_0.1', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(old, n_genes=25, sharey=False)  

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    old, 
    color=['leiden_0.2'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    size=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False,
    legend_loc="on data"
)

In [None]:
old.obs['cell_type'] = ['T cells' if  (x=='1' or x=='3') else
                          'B cells' if (x=='2' or x=='6' ) else  
                          'NKT and invariant cells' if (x=='0' or x=='4' or x=='8' )  else
                          'DCs and Macrophages' if (x=='7' )  else                             
                          'ILC' if x=='5'  else'ERROR' for x in old.obs['leiden_0.2']] 

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    old, 
    color=['leiden_0.1','cell_type','stage','day',"Foxp3"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.3, 0.05],
    size=15,
    vmax=2,
    frameon=False,
    add_outline=True,
    sort_order = False,
    legend_loc="on data"
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    old, 
    color=['cell_type'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.3, 0.05],
    size=15,
    vmax=2,
    frameon=False,
    add_outline=True,
    sort_order = False,
    legend_loc="on data"
)

In [None]:
sc.tl.rank_genes_groups(old, 'cell_type', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(old, n_genes=25, sharey=False)  

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    old, 
    color=["Sox4","Rorc","Ncr1","Klrk1","Cxcr6", 'Cd8b1',"Cd4",'Tnfrsf4',"Foxp3","H2-Aa","Clec9a","Xcr1",
           "Sirpa","Ccr7","Fscn1",'Cd79a', 'Ms4a1', "Xbp1","Igkc","Msrb1",'Fcer1g'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
genes = {'T-cells': ['Cd3e', 'Cd8a', 'Cd4', 'Il7r'], 
         'NKT and invariant cells':['Gzmb'],
         'NK cells':['Ncr1','Nkg7',	'Klrd1',],
         'ILC':[],
          'DN/DPs':[],
         'B-cells': ['Ms4a1', 'Cd19'], 
         'Plasmacells': ['Ppbp'], 
         'NKT and invariant T cells': ['Nkg7'], 
         'Dendritic-cells': ['Cst3', 'Fcer1a'],
         'Eosinophils':['Tyrobp']}

In [None]:
genes ={ 'B cells_n':	[	'Cst3',	'Cd79b',	'Ms4a1',	'H2-DMb2',	'Bank1',	'Ebf1',	'Ly6d',	'Mzb1',	'Igkc',	'Cd74',	'Napsa',	'Ighm',	'H2-Eb1',	'H2-Aa',	'H2-Ab1',	'Iglc3',	'Iglc2',	'Lyn',	'Ly86',	'Pkig',	'Plac8',	'Blnk',	'Syk',	'Cd37',	'Siglecg',	]	,
'DCs_n':	[	'Arpp21',	'Cd74',	'H2-Aa',	'Atox1',	'H2-Eb1',	'Spi1',	'Ifi30',	'Tyrobp',	'Psap',	'H2-Ab1',	'Tmsb4x',	'Ftl1',	'Syngr2',	'Cxcl16',	'Aif1',	'Ctsh',	'Ctsz',	'Actg1',	'Pkib',	'Tbc1d8',	'Atpif1',	'Flt3',	'Skap2',	'Fmnl2',	'Clic4',	]	,
'DN/DPs_n':	[	'Msrb1',	'Dntt',	'Sox4',	'Tcf7',	'Endou',	'Trbc2',	'Themis',	'Satb1',	'Ccr9',	'Rhoh',	'Cyb5a',	'Cd8b1',	'Hmgb1',	'H3f3a',	'Aqp11',	'Ramp1',	'Ap3s1',	'Cux1',	'Mier1',	'Edem1',	'Cd8a',	'Tcf12',	'Desi1',	'2610307P16Rik',	'Trbc1',	]	,
'EOS_n':	[	'Tmem176a',	'Tyrobp',	'Fcer1g',	'Ifitm3',	'Ftl1',	'Srgn',	'Il1b',	'Isg15',	'Fth1',	'S100a9',	'Rtp4',	'Slfn4',	'S100a8',	'Hdc',	'Csf3r',	'Acod1',	'Lst1',	'Rsad2',	'Ifitm2',	'Ifit3',	'Ifit1',	'Mxd1',	'Cebpb',	'Isg20',	'Txn1',	]	,
'ILC_n':	[	'Fcer1g',	'Tmem176b',	'Ramp1',	'Il23r',	'Il1r1',	'Emb',	'Ikzf3',	'Ckb',	'Igf1r',	'Lmo4',	'Pxdc1',	'Blk',	'St6galnac3',	'S100a4',	'Cxcr6',	'Il7r',	'Furin',	'Icos',	'Tcrg-C1',	'Rora',	'Zbtb16',	'Selenop',	'Serpinb1a',	'Avpi1',	'Il18r1',	]	,
'NK cells_n':	[	'Il12rb2',	'Tyrobp',	'Ncr1',	'Klre1',	'Klrb1c',	'Gzma',	'Xcl1',	'AW112010',	'Anxa2',	'Nkg7',	'Car2',	'Irf8',	'Klrk1',	'Klrd1',	'Prf1',	'Il2rb',	'Txk',	'Ccl5',	'Ccl4',	'Myl6',	'Klri2',	'Clnk',	'Serpinb9',	'Gem',	'Ptprc',	]	,
'NKT and invariant cells_n':	[	'Tox',	'Tmsb10',	'Ly6c2',	'Ctsw',	'Sh3bgrl3',	'Gzmb',	'Klrk1',	'Id2',	'Il2rb',	'Nkg7',	'Klrd1',	'Dennd4a',	'Satb1',	'Cxcr6',	'Klra9',	'Dusp2',	'Gimap4',	'Vps37b',	'Chn2',	'Pitpnc1',	'Xcl1',	'Klrb1c',	'Cd7',	'Inpp4b',	'Zfp36l2',	]	,
'T cells_n':	[	'Igkc',	'Ctla4',	'Themis',	'Emb',	'Prkca',	'Fam169b',	'Tnfrsf4',	'Fyb',	'Cd8b1',	'Trbc2',	'Sntb1',	'Itga4',	'Lat',	'Cd3d',	'Tnfsf8',	'Cd8a',	'Shisa5',	'Ikzf2',	'Ms4a6b',	'Itgav',	'Fyn',	'Cd2',	'Gzmk',	'Trps1',	'Smc4',	]	,
'plasmacells_n':	[		'Jchain',	'Xbp1',	'Txndc5',	'Mzb1',	'Iglc2',	'Eaf2',	'Derl3',	'Iglv1',	'Pdia4',	'Iglc3',	'Creld2',	'Herpud1',	'Serp1',	'Ssr4',	'Ckap4',	'Fkbp2',	'Hsp90b1',	'Prdx4',	'Sec11c',	'Edem2',	'Edem1',	'Iglc1',	'Pou2af1']}

In [None]:
sc.set_figure_params(scanpy=True, fontsize = 14)
ac = sc.pl.matrixplot(old, genes, groupby = 'cell_type', show = False, standard_scale = 'var')
ac['mainplot_ax'].set_xlabel('Genes')
ac['mainplot_ax'].set_ylabel('Clusters')

In [None]:
result = old.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(25)

## T cells old

In [None]:
Told = old[old.obs['cell_type'].isin(['T cells'])]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(Told, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(Told, n_top_genes=2000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(Told, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
def observe_variance(anndata_object):
    fig = plt.figure(figsize=(10,5))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)
    # variance per principal component
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = anndata_object.uns['pca']['variance_ratio']
    ax1.scatter(x,y,s=4)
    ax1.set_xlabel('PC')
    ax1.set_ylabel('Fraction of variance explained\n')
    ax1.set_title('Fraction of variance explained per PC\n')
    # cumulative variance explained
    cml_var_explained = np.cumsum(anndata_object.uns['pca']['variance_ratio'])
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = cml_var_explained
    ax2.scatter(x,y,s=4)
    ax2.set_xlabel('PC')
    ax2.set_ylabel('Cumulative fraction of variance\nexplained')
    ax2.set_title('Cumulative fraction of variance\nexplained by PCs')
    fig.tight_layout()
    plot = plt.show
    return(plot)
observe_variance(Told)

In [None]:
plt.plot(range(len(Told.uns['pca']['variance_ratio'])), np.cumsum(Told.uns['pca']['variance_ratio']) * 100, '.-')
plt.axvline(30, color = 'r',)
plt.xlabel('Principal Component', fontsize = 14)
plt.ylabel('% Variance Explained', fontsize = 14)

In [None]:
sc.pp.neighbors(Told, n_neighbors=30, n_pcs=30)
sc.tl.umap(Told, min_dist=0.5)

### T cells clustering and annotation


In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(Told, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
for resolution_parameter in [1.1,1.2,1.3,1.4,1.5]:
    sc.tl.leiden(Told, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.tl.rank_genes_groups(Told, 'leiden_0.6', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(Told, n_genes=25, sharey=False)  

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    Told, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    Told, 
    color=['leiden_0.4','day','Foxp3', 'Cd4',"Cd8a","Gzmk","Fcer1g","Sox4"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
Told.obs['cell_type_subset'] = ['Tregs' if (x=="4" ) else 
                                'CD4' if (x=='3' ) else
                              'CD8' if ( x=="5" ) else
                               'GZMK+ CD8' if (x=='0' or x=='1'or x=='2'  or x=='6' or x=='7' ) else
                                  #'Naive CD4' if ( x=='8' ) else
                                   'Naive CD8' if ( x=='8' ) else
                               'ERROR' for x in Told.obs['leiden_0.4']] 

In [None]:

# Define the new order for the categories
new_order = ['Tregs', 'CD4','GZMK+ CD8', 'CD8','Naive CD8' ]

# Assign the new order to the cell_type_subset column
Told.obs['cell_type_subset'] = pd.Categorical(Told.obs['cell_type_subset'], categories=new_order, ordered=True)


In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    Told, 
    color=['cell_type_subset','Cd8a',"Cd4",'Foxp3',"Cd40lg","Gzmk"] , 
    palette=user_defined_palette,  

    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    Told, 
    color=['cell_type_subset', 'day'] , 
    palette=user_defined_palette,  

    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    Told,
    color=['leiden_0.2',"day","stage","Ly6a",'Cd4', 'Cd40lg',"Icos",'Cd8a', "Cd8b1","Ccr7", "Stat1", 'Lef1','Foxp1',"Sell",'Foxp3', 'Ikzf2', 'Ctla4','Gzmk',"Nkg7","Ccl5","Foxp3","Ncr1"] , 
    palette=user_defined_palette,  

    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.tl.rank_genes_groups(Told, 'cell_type_subset', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(Told, n_genes=25, sharey=False) 

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    Told, 
    color=[ 'cell_type_subset','stage', 'day',], 
    ncols=6,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.4,
    use_raw=False,
    add_outline=True
)

In [None]:
colors = Told.uns['cell_type_subset_colors']

tmp = pd.crosstab(Told.obs['day'],Told.obs["cell_type_subset"], normalize='index', )
tmp.plot.area(stacked=True, color=colors).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)
plt.title("2 month-old")


In [None]:
Tregsold= Told[Told.obs["cell_type_subset"]=="Tregs"]

In [None]:
sc.tl.rank_genes_groups(Tregsold, 'day', groups=['d7'], reference='d0', method='wilcoxon', use_raw=False)
result = Tregsold.uns['rank_genes_groups']

In [None]:
sc.pl.rank_genes_groups(Tregsold, n_genes=30, sharey=False)


In [None]:
df_temp = pd.DataFrame({'umap_x': Told.obsm['X_umap'][:, 0], 'umap_y': Told.obsm['X_umap'][:, 1], 
                        'stage': Told.obs['day'], 'day': Told.obs['day']}, index = Told.obs.index)




In [None]:
Told.obs["day"]

In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['day'] == 'd0'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('d0', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['day'] == 'd1'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('d1', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['day'] == 'd4'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('d4', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['day'] == 'd7'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('d7', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
Told.uns['cell_type_subset_colors']=['#f6222e','#002FA7','#060047', '#3283fe','#ffbaba',]

In [None]:
colors = Told.uns['cell_type_subset_colors']

tmp = pd.crosstab(Told[Told.obs['stage']=="18mo"].obs['day'],Told.obs["cell_type_subset"], normalize='index', )
tmp.plot.area(stacked=True, color=colors).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)
plt.title("18 month-old")


In [None]:
sc.pl.umap(Told,color="cell_type_subset")

In [None]:
sc.pl.umap(Told, color=['cell_type_subset',"stage","day"], 
                     color_map='Spectral_r',
                     use_raw=False, 
         #  "Cd4","Cd8a",
                     ncols=4, 
                     wspace = 0.7,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 

                     sort_order = False)

In [None]:
sc.set_figure_params(dpi=300, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(Told, color=['cell_type_subset'], 
                     color_map='Spectral_r',
                     use_raw=False, 
         #  "Cd4","Cd8a",
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.01], 
                     size=5,  
                     frameon=False, 
                     add_outline=False, 
                     sort_order = False, )



In [None]:
sc.pl.umap(Told, color=['cell_type_subset'], 
                     color_map='Spectral_r',
                     use_raw=False, 
         #  "Cd4","Cd8a",
                     ncols=4, 
                     wspace = 0.7,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 

                     sort_order = False)

In [None]:
sc.pl.dotplot(T,var_names=["Zfp36l1",'Tff1',"Penk","Areg","Capg","Il1r2","Cd83","Cd74","Ccr6","Rora","Nfkbia","Ctla4",'Rgs2',"Tnfrsf9","Tnfrsf18", 'Il18r1',"Ctla4"] , groupby=["cell_type_subset",'day'])

In [None]:
#sc.pl.dotplot(Told,var_names=["Zfp36l1",'Tff1',"Penk","Areg","Capg","Il1r2","Cd83","Cd74","Ccr6","Rora","Nfkbia","Ctla4",'Rgs2',"Tnfrsf9","Tnfrsf18", 'Il18r1',"Ctla4"] , groupby=["cell_type_subset",'day'])

In [None]:
#path_to_h5ad = '/Users/xleana/Desktop/CD45/CD45new/CD45pos_02mo18mo_SLTBId147_Told.h5ad'
#Told.write(path_to_h5ad)

In [None]:
#Told=sc.read_h5ad('/Users/xleana/Desktop/CD45/CD45new/CD45pos_02mo18mo_SLTBId147_Told.h5ad')
#Told.uns['log1p']['base']=None


In [None]:
ToldD0=Told[Told.obs["day"]=="d0"]

In [None]:
ToldD1=Told[Told.obs["day"]=="d1"]

In [None]:
ToldD4=Told[Told.obs["day"]=="d4"]

In [None]:
ToldD7=Told[Told.obs["day"]=="d7"]

In [None]:
sc.tl.rank_genes_groups(ToldD0, groupby='cell_type_subset', method='wilcoxon',layers='norm_counts')
sc.pl.rank_genes_groups_dotplot(ToldD0, n_genes=50, dendrogram=False)


In [None]:
sc.tl.rank_genes_groups(ToldD1, groupby='cell_type_subset', method='wilcoxon',layers='norm_counts')
sc.pl.rank_genes_groups_dotplot(ToldD1, n_genes=50, dendrogram=False)


In [None]:
sc.tl.rank_genes_groups(ToldD4, groupby='cell_type_subset', method='wilcoxon',layers='norm_counts')
sc.pl.rank_genes_groups_dotplot(ToldD4, n_genes=50, dendrogram=False)


In [None]:
sc.tl.rank_genes_groups(ToldD7, groupby='cell_type_subset', method='wilcoxon',layers='norm_counts')
sc.pl.rank_genes_groups_dotplot(ToldD7, n_genes=50, dendrogram=False)


In [None]:
result = ToldD0.uns['rank_genes_groups']
groups = result['names'].dtype.names
df = pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names','logfoldchanges','pvals_adj','pvals',]})
df.to_csv('/Users/alemarquis/Desktop/Tyoung/ToldD0.csv')

In [None]:
result = ToldD1.uns['rank_genes_groups']
groups = result['names'].dtype.names
df = pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names','logfoldchanges','pvals_adj','pvals',]})
df.to_csv('/Users/alemarquis/Desktop/Tyoung/ToldD1.csv')

In [None]:
result = ToldD4.uns['rank_genes_groups']
groups = result['names'].dtype.names
df = pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names','logfoldchanges','pvals_adj','pvals',]})
df.to_csv('/Users/alemarquis/Desktop/Tyoung/ToldD4.csv')

In [None]:
result = ToldD7.uns['rank_genes_groups']
groups = result['names'].dtype.names
df = pd.DataFrame(
{group + '_' + key[:1]: result[key][group]
for group in groups for key in ['names','logfoldchanges','pvals_adj','pvals',]})
df.to_csv('/Users/alemarquis/Desktop/Tyoung/ToldD7.csv')

## NKT cells

In [None]:
NKTold = old[old.obs['cell_type'].isin(['NKT and invariant cells'])]


In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(NKTold, min_cells=4)

In [None]:
sc.pp.highly_variable_genes(NKTold, n_top_genes=5000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(NKTold, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(NKTold)

In [None]:
plt.plot(range(len(NKTold.uns['pca']['variance_ratio'])), np.cumsum(T.uns['pca']['variance_ratio']) * 100, '.-')
plt.axvline(30, color = 'r')
plt.xlabel('Principal Component', fontsize = 14)
plt.ylabel('% Variance Explained', fontsize = 14)

In [None]:
sc.pp.neighbors(NKTold, n_neighbors=30, n_pcs=30)
sc.tl.umap(NKTold, min_dist=0.9)

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(NKTold, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    NKTold, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    NKTold, 
    color=['leiden_0.1',"stage","Trac","Trdc","Cd8a","Cd4"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=6,
    size=15,
    wspace = 0.2,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    NKTold, 
    color=['leiden_0.1','leiden_0.3', 'stage', 'day', 'sample', "Il4",
           "Il17a","Icos","Ncr1","Cxcr6",'Fcer1g',], 
    ncols=6,
    use_raw=False,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.5,
    add_outline=True
)

In [None]:
NKTold.obs['cell_type_subset'] = [  'NK' if (x=='3' ) else
                                #'NKT' if (x=='0' or x=='2' or x=='1'   )else
                               'NKT' for x in NKTold.obs['leiden_0.1']] 

In [None]:
sc.tl.rank_genes_groups(NKTold, 'leiden_0.3', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(NKTold, n_genes=25, sharey=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    NKTold, 
    color=['leiden_0.3','cell_type_subset',"day"], 
    ncols=6,
    use_raw=False,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.5,
    add_outline=True
)

In [None]:
NKTold.uns['cell_type_subset_colors']=['#F2BE22','#006fa6']

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    NKTold, 
    color=['leiden_0.3','cell_type_subset',"day"], 
    ncols=6,
    use_raw=False,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.5,
    add_outline=True
)

In [None]:
tmp = pd.crosstab(NKTold.obs['day'],NKTold.obs['cell_type_subset'], normalize='index')
tmp.plot.area(stacked=True).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)

## B cells

In [None]:
Bold = old[old.obs['cell_type'].isin(['B cells'])]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(Bold, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(Bold, n_top_genes=2000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(Bold, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(Bold)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(Bold, n_comps=30, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sc.pp.neighbors(Bold, n_neighbors=15)
sc.tl.umap(Bold)

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(Bold, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
Bold

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    Bold, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0',"day","Cd3e","Igha", "Ighm"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.1, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)


In [None]:
sc.tl.rank_genes_groups(Bold, 'leiden_0.1', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(Bold, n_genes=25, sharey=False) 

In [None]:
Bold.obs['cell_type_subset'] = [  'B cells' if (x=='0' or x=='1' ) else
                                'Plasma cells' if ( x=='2'   )else
                           
                               'Error' for x in Bold.obs['leiden_0.1']] 

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    Bold, 
    color=['cell_type_subset',"Igha","Ighm" ], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.1, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)


In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    old, 
    color=['cell_type' ], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.1, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)


In [None]:
tmp = pd.crosstab(Bold.obs['day'],Bold.obs['cell_type_subset'], normalize='index')
tmp.plot.area(stacked=True).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)

## ILCs

In [None]:
NKILCold = old[old.obs['cell_type'].isin(['NK cells',"ILC","EOS"])]

In [None]:
tmp = pd.crosstab(NKILCold.obs['day'],NKILCold.obs['cell_type'], normalize='index')
tmp.plot.area(stacked=True).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)

## DCs

In [None]:
DCold = old[old.obs['cell_type'].isin(['DCs and Macrophages'])]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(DCold, min_cells=4)

In [None]:
sc.pp.highly_variable_genes(DCold, n_top_genes=3000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(DCold, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(DCold)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(DCold, n_comps=30, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sc.pp.neighbors(DCold, n_neighbors=30, n_pcs=30)
sc.tl.umap(DCold,min_dist=0.5)

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(DCold, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    DCold, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.1, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)


In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    DCold, 
    color=["H2-Aa","Clec9a","Irf8","Clec10a","Xcr1","Sirpa","Ccr7","Fscn1","Msrb1","Siglech","Csf1r","Zbtb46","Mertk","Spic","Timd4",
          "Vcam1","Mafb","Lyz2",'Ahr','Xbp1',"leiden_0.4","day"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
  
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
DCold.obs['cell_type_subset'] = ['cDC1' if (x=='0') else 
                                 'cDC2' if (x=='2'  ) else
                                 'CCR7+ cDC' if (x=='1') else
                                'p-DCs' if (x=='3') else
                                 #'Macrophages' if ( x=='4') else
                                  'ERROR' for x in DCold.obs['leiden_0.4']] 

In [None]:
sc.pl.umap(DCold,color=['leiden_0.4','cell_type_subset'],)

In [None]:
sc.pl.umap(DCold, color=["leiden_0.4","day","stage"])

In [None]:
sc.tl.rank_genes_groups(DCold, 'leiden_0.6', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(DCold, n_genes=25, sharey=False) 

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    DCold, 
    color=["Clec9a","Xcr1","Irf8","Clec10a",
           "Sirpa","Ccr7","Fscn1",'Cd79a', 'Ms4a1', "Xbp1","Igkc","Msrb1","Cd4","Rorc","Il22",
          "Gata3","Rorc","Pxdc1","Ahr"], 
    ncols=6,
    outline_width=[0.6, 0.05],
    size=100,
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

#SIRPA DC2
#CCR7+ DC2

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    DCold, 
    color=['leiden_0.6','cell_type_subset','stage','day',"Ccr7"], 
    ncols=6,
    outline_width=[0.08, 0.06],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

In [None]:
DCold.obs['Dendritic cell subsets'] =DCold.obs['cell_type_subset']

In [None]:
DCold.uns['cell_type_subset_colors'] = ["#F1BB7B", "#FD6467", "#5B1A18", "#D67236"]

In [None]:
DCold

In [None]:
DCold.uns['Dendritic cell subsets_colors'] = [  "#39312F", "#D67236","#AA9486", "#EAD3BF","#B6854D", ]

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    DCold, 
    color=['cell_type_subset'], 
    ncols=6,
    outline_width=[0.08, 0.06],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True,
)

In [None]:
tmp = pd.crosstab(DCold.obs['day'],DCold.obs['cell_type_subset'], normalize='index')
tmp.plot.area(stacked=True).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)

## ILCs

In [None]:
sc.pl.umap(old,color="cell_type")

In [None]:
ILCold = old[old.obs['cell_type'].isin(['ILC'])]
#'DN/DPs',"NK cells"

In [None]:
sc.pl.umap(ILCold)

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(ILCold, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(ILCold, n_top_genes=2000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(ILCold, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(ILCold)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(ILCold, n_comps=30, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sc.pp.neighbors(ILCold, n_neighbors=30, n_pcs=30)
sc.tl.umap(ILCold,min_dist=0.5)

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(ILCold, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    ILCold, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.1, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)


In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    ILCold, 
    color=["Cd4","Cd8a","Sox4","day","Ncr1","cell_type","Rorc","Ccr6","stage","Eomes","Cd4","Foxp3","Il23r"], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
ILCold.obs['cell_type_subset'] = ['ILC3' if (x=='0' ) else 
                               'ILC3' for x in ILCold.obs['leiden_0.1']] 

In [None]:
sc.pl.umap(ILCold, color=["leiden_0.1","cell_type",'cell_type_subset',"day"])

In [None]:
sc.tl.rank_genes_groups(ILCold, 'leiden_0.1', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(ILCold, n_genes=25, sharey=False) 

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    ILCold, 
    color=['leiden_0.4','cell_type_subset','stage','day'], 
    ncols=6,
    outline_width=[0.08, 0.06],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

In [None]:
ILCold.uns['cell_type_subset_colors'] = ["#F1BB7B", "#FD6467", "#5B1A18", "#D67236"]

In [None]:
tmp = pd.crosstab(ILCold.obs['day'],ILCold.obs['cell_type_subset'], normalize='index')
tmp.plot.area(stacked=True).legend(loc='center left', bbox_to_anchor=(1, 0.5),frameon=False)
plt.grid(False)

In [None]:
sc.pl.umap(DCold,color=["cell_type_subset", "Ccr7"])

In [None]:
annotated_subsets = pd.concat([Told.obs['cell_type_subset'], DCold.obs['cell_type_subset'], 
                               NKTold.obs['cell_type_subset'], Bold.obs['cell_type_subset'],
                                ILCold.obs['cell_type_subset']]
                              )

In [None]:
old.obs['cell_type_subset']=''

In [None]:
old.obs['cell_type_subset'][old.obs.index.isin(annotated_subsets.index) == True] = annotated_subsets

In [None]:
old=old[old.obs['cell_type_subset']!='']

In [None]:
old.uns['cell_type_subset_colors']=['#f6222e','#b57edc','#060047', '#ffbaba','#3283fe',
                                      '#F2BE22','#FFA200','#00DFA2','#1F8A70','#DD8D29',
                                    '#46ACC8', '#ffff00', '#B40F20', 
      '#4fc601', '#964B00',
                ]


In [None]:

# Define the new order for the categories
new_order = [ 'Tregs', #'Naive CD4',
             'CD4','GZMK+ CD8','Naive CD8', 'CD8', 'NKT', 'NK',"ILC3", 'B cells', 'Plasma cells','cDC1', 'CCR7+ cDC' ,'cDC2', 'p-DCs', 'Macrophages']

In [None]:

# Assign the new order to the cell_type_subset column
old.obs['cell_type_subset'] = pd.Categorical(old.obs['cell_type_subset'], categories=new_order, ordered=True)


In [None]:
sc.pl.umap(old, color=['cell_type_subset'], 
                     color_map='Spectral_r',
                     use_raw=False, 
         #  "Cd4","Cd8a",
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.01], 
                     size=5,  
                     frameon=False, 
                     add_outline=False, 
                     sort_order = False)

In [None]:
sc.pl.umap(old, color=['stage', 'day','cell_type_subset'], 
                     color_map='Spectral_r',
                     use_raw=False, 
         #  "Cd4","Cd8a",
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.01], 
                     size=5,  
                     frameon=False, 
                     add_outline=False, 
                     sort_order = False)

In [None]:
sc.pl.umap(old, color=['cell_type_subset'],
                     color_map='Spectral_r',
                     use_raw=False, 
         #  "Cd4","Cd8a",
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.01], 
                     size=5,  
                     frameon=False, 
                     add_outline=False, 
                     sort_order = False)

In [None]:
adata

In [None]:
sc.pp.highly_variable_genes(adata, n_top_genes=2000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(adata, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(adata)


In [None]:
plt.plot(range(len(adata.uns['pca']['variance_ratio'])), np.cumsum(adata.uns['pca']['variance_ratio']) * 100, '.-')
plt.axvline(30, color = 'r')
plt.xlabel('Principal Component', fontsize = 14)
plt.ylabel('% Variance Explained', fontsize = 14)

In [None]:
sc.pp.neighbors(adata, n_neighbors=50,n_pcs=30)

In [None]:
sc.tl.umap(adata, min_dist=0.6)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    adata, 
    color=["Sox4","Rorc","Ncr1","Klrk1","Cxcr6", 'Cd8b1',"Cd8a","Cd4",'Tnfrsf4',"Foxp3","H2-Aa","Clec9a","Xcr1",
           "Sirpa","Ccr7","Fscn1",'Cd79a', 'Ms4a1', "Xbp1","Igkc","Msrb1","stage", 'day'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
annotated_subsets = pd.concat([young.obs['cell_type_subset'], old.obs['cell_type_subset']] )

In [None]:
adata.obs['cell_type_subset']=''

In [None]:
adata.obs['cell_type_subset'][adata.obs.index.isin(annotated_subsets.index) == True] = annotated_subsets

In [None]:
adata=adata[adata.obs['cell_type_subset']!='']

In [None]:
sc.pl.umap(adata,color=["day",'cell_type_subset',])

In [None]:
annotated_subsets = pd.concat([young.obs['cell_type'], old.obs['cell_type']] )
adata.obs['cell_type']=''
adata.obs['cell_type'][adata.obs.index.isin(annotated_subsets.index) == True] = annotated_subsets
adata=adata[adata.obs['cell_type']!='']
sc.pl.umap(adata,color=["day",'cell_type',])

In [None]:
adata.uns['cell_type_subset_colors']=['#f6222e','#002FA7','#ff34ff','#060047', '#ffbaba','#3283fe',
                                      '#006fa6','#809693', '#bec1d4', '#F2BE22',
                                      '#FFA200', '#D4ADFC', '#00DFA2',
       '#1F8A70', '#BFDB38',
                                      '#DD8D29', '#5a0007', '#46ACC8', '#ffff00', '#B40F20', 
      '#4fc601', '#964B00'
                ]




In [None]:
sc.pl.umap(adata,color=["day",'cell_type_subset',])

In [None]:
sc.pl.umap(adata,color=["day",'cell_type_subset',])

In [None]:

# Define the new order for the categories
new_order = [ 'Tregs', 'Naive CD4','CD4','GZMK+ CD8', 'Naive CD8', 'CD8',  'Invariant T', 'DN',"DP",'NKT', 'NK','ILC2',"ILC3", 'B cells', 'Plasma cells','cDC1', 'CCR7+ cDC' ,'cDC2', 'p-DCs', 'Macrophages']

In [None]:

# Assign the new order to the cell_type_subset column
adata.obs['cell_type_subset'] = pd.Categorical(adata.obs['cell_type_subset'], categories=new_order, ordered=True)


In [None]:
sc.tl.rank_genes_groups(adata, 'cell_type_subset', method='wilcoxon', use_raw=False)
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)  

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    adata, 
    color=["Sox4","Rorc","Ncr1","Klrk1","Cxcr6", 'Cd8b1',"Cd4",'Tnfrsf4',"Foxp3","H2-Aa","Clec9a","Xcr1",
           "Sirpa","Ccr7","Fscn1",'Cd79a', 'Ms4a1', "Xbp1","Igkc","Msrb1",'Fcer1g'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
marker_genes_dict = {
    'Tregs':['Ikzf2',	'Foxp3',	'Ctla4','Itgav',],
    'Naive CD4': ['Prkca' , 'Fyb',],
                  #'Lef1',	'Igfbp4',	'Bach2','Npc2',],
    'CD4': ['Tnfsf8',	'Cd4',	'Tnfrsf4',],
    'GZMK+ CD8': ['Ccl5',	'Gzmk',	'Nkg7','Cd8b1',],
    'Naive CD8': ['Cd8b1',	'Cd8a',	'Igfbp4',	'Lef1'],
    'CD8': [	'Cd8b1',		'Cd8a'],
    'Invariant T':['Fcer1g' ,'Cd160',],
    "DN":['Ptma',	'Hmgb1',	'Stmn1',	'Dut',],
    "DP":['Ccr9',	'Themis',	'Sox4',	'Tcf7'],
    'NKT':['Il12rb2','Ctsw',	'Gzmb',],
    'NK cells':[	'Ncr1',	'Klre1',],
    'ILC2':['Furin',	'Itm2b',	'Rora',	'Il1rl1',],
    'ILC3':['Tmem176a',	'Tmem176b',	'Il23r',	'Ramp1'],
    'B cells':['Cd79a',	'Cd79b',	'Ms4a1',	'H2-DMb2',],
    'plasmacells':['Igkc',	'Jchain',	'Txndc5',	'Mzb1',],
    'cDC1':['Cst3',	'Psap',	'Ppt1',	'Plbd1',],
    'CCR7+cDC':['Fscn1',	'Ccr7','Marcks',	'Tmem123',	'Tmcc3',],
    'cDC2':['Ifi30',	'H2-Ab1',	'Cd74',	'H2-Aa',],
    'p-DCs':['Tcf4',	'Grn',	'Pld4',	'Ctsb',	'Rnase6',],
    'Macrophages':['Lyz2',	'Ctss',	'Gpx1',	'Lst1',],
    #'EOS':['Msrb1',	'Tyrobp',		'Ifitm3',]
}


In [None]:
sc.pl.dotplot(adata, marker_genes_dict, 'cell_type_subset', dendrogram=False,standard_scale='var')


In [None]:
genes = {'T-cells': ['Cd3e', 'Cd8a', 'Cd4', 'Il7r'], 
         'NKT and invariant cells':['Gzmb'],
         'NK cells':['Ncr1','Nkg7',	'Klrd1',],
         'ILC':[],
          'DN/DPs':[],
         'B-cells': ['Ms4a1', 'Cd19'], 
         'Plasmacells': ['Ppbp'], 
         'NKT and invariant T cells': ['Nkg7'], 
         'Dendritic-cells': ['Cst3', 'Fcer1a'],
         'Eosinophils':['Tyrobp']}

In [None]:
genes ={ 'B cells_n':	[	'Cst3',	'Cd79b',	'Ms4a1',	'H2-DMb2',	'Bank1',	'Ebf1',	'Ly6d',	'Mzb1',	'Igkc',	'Cd74',	'Napsa',	'Ighm',	'H2-Eb1',	'H2-Aa',	'H2-Ab1',	'Iglc3',	'Iglc2',	'Lyn',	'Ly86',	'Pkig',	'Plac8',	'Blnk',	'Syk',	'Cd37',	'Siglecg',	]	,
'DCs_n':	[	'Arpp21',	'Cd74',	'H2-Aa',	'Atox1',	'H2-Eb1',	'Spi1',	'Ifi30',	'Tyrobp',	'Psap',	'H2-Ab1',	'Tmsb4x',	'Ftl1',	'Syngr2',	'Cxcl16',	'Aif1',	'Ctsh',	'Ctsz',	'Actg1',	'Pkib',	'Tbc1d8',	'Atpif1',	'Flt3',	'Skap2',	'Fmnl2',	'Clic4',	]	,
'DN/DPs_n':	[	'Msrb1',	'Dntt',	'Sox4',	'Tcf7',	'Endou',	'Trbc2',	'Themis',	'Satb1',	'Ccr9',	'Rhoh',	'Cyb5a',	'Cd8b1',	'Hmgb1',	'H3f3a',	'Aqp11',	'Ramp1',	'Ap3s1',	'Cux1',	'Mier1',	'Edem1',	'Cd8a',	'Tcf12',	'Desi1',	'2610307P16Rik',	'Trbc1',	]	,
'EOS_n':	[	'Tmem176a',	'Tyrobp',	'Fcer1g',	'Ifitm3',	'Ftl1',	'Srgn',	'Il1b',	'Isg15',	'Fth1',	'S100a9',	'Rtp4',	'Slfn4',	'S100a8',	'Hdc',	'Csf3r',	'Acod1',	'Lst1',	'Rsad2',	'Ifitm2',	'Ifit3',	'Ifit1',	'Mxd1',	'Cebpb',	'Isg20',	'Txn1',	]	,
'ILC_n':	[	'Fcer1g',	'Tmem176b',	'Ramp1',	'Il23r',	'Il1r1',	'Emb',	'Ikzf3',	'Ckb',	'Igf1r',	'Lmo4',	'Pxdc1',	'Blk',	'St6galnac3',	'S100a4',	'Cxcr6',	'Il7r',	'Furin',	'Icos',	'Tcrg-C1',	'Rora',	'Zbtb16',	'Selenop',	'Serpinb1a',	'Avpi1',	'Il18r1',	]	,
'NK cells_n':	[	'Il12rb2',	'Tyrobp',	'Ncr1',	'Klre1',	'Klrb1c',	'Gzma',	'Xcl1',	'AW112010',	'Anxa2',	'Nkg7',	'Car2',	'Irf8',	'Klrk1',	'Klrd1',	'Prf1',	'Il2rb',	'Txk',	'Ccl5',	'Ccl4',	'Myl6',	'Klri2',	'Clnk',	'Serpinb9',	'Gem',	'Ptprc',	]	,
'NKT and invariant cells_n':	[	'Tox',	'Tmsb10',	'Ly6c2',	'Ctsw',	'Sh3bgrl3',	'Gzmb',	'Klrk1',	'Id2',	'Il2rb',	'Nkg7',	'Klrd1',	'Dennd4a',	'Satb1',	'Cxcr6',	'Klra9',	'Dusp2',	'Gimap4',	'Vps37b',	'Chn2',	'Pitpnc1',	'Xcl1',	'Klrb1c',	'Cd7',	'Inpp4b',	'Zfp36l2',	]	,
'T cells_n':	[	'Igkc',	'Ctla4',	'Themis',	'Emb',	'Prkca',	'Fam169b',	'Tnfrsf4',	'Fyb',	'Cd8b1',	'Trbc2',	'Sntb1',	'Itga4',	'Lat',	'Cd3d',	'Tnfsf8',	'Cd8a',	'Shisa5',	'Ikzf2',	'Ms4a6b',	'Itgav',	'Fyn',	'Cd2',	'Gzmk',	'Trps1',	'Smc4',	]	,
'plasmacells_n':	[		'Jchain',	'Xbp1',	'Txndc5',	'Mzb1',	'Iglc2',	'Eaf2',	'Derl3',	'Iglv1',	'Pdia4',	'Iglc3',	'Creld2',	'Herpud1',	'Serp1',	'Ssr4',	'Ckap4',	'Fkbp2',	'Hsp90b1',	'Prdx4',	'Sec11c',	'Edem2',	'Edem1',	'Iglc1',	'Pou2af1']}

In [None]:
#sc.set_figure_params(scanpy=True, fontsize = 14)
#ac = sc.pl.matrixplot(adata, genes, groupby = 'cell_type_subsets', show = False, standard_scale = 'var')
#ac['mainplot_ax'].set_xlabel('Genes')
#ac['mainplot_ax'].set_ylabel('Clusters')

In [None]:
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names']}).head(25)

In [None]:
genes = marker_genes_dict 

In [None]:
genes

In [None]:
adata.uns["cell_type_subset_colors"]

In [None]:
color_set = adata.uns["cell_type_subset_colors"]
celltype_color_map = dict(zip(genes.keys(), color_set))

In [None]:
celltype_color_map

In [None]:
genes_color_map = {}
for item, value in genes.items():
    for k in value:
        genes_color_map[k] = celltype_color_map[item]
    
genes_color_map

In [None]:
# Get the gene ids
gene_ids = [adata.var_names.get_loc(j) for j in genes_color_map.keys()]

# Check the shape of the selected data
selected_data_sparse = adata.X[:, gene_ids]

# Convert the sparse matrix to a dense array
selected_data_dense = selected_data_sparse.toarray()
print(selected_data_dense.shape)

# Create a data matrix with only genes of interest 
data_matrix = pd.DataFrame(selected_data_dense, index = adata.obs_names, columns = genes_color_map.keys())

# Add cluster labels to compute average
data_matrix['cell_type_subset'] = adata.obs['cell_type_subset']

In [None]:
# Compute the average for each cluster
avg_matrix = data_matrix.groupby(['cell_type_subset']).mean()

In [None]:
# Assuming your data is in avg_matrix
# You can create a transposed version of avg_matrix to switch x and y axes
#avg_matrix = np.transpose(avg_matrix)

In [None]:
sns.set_style("white")


In [None]:
import seaborn as sns

# Assuming avg_matrix, genes_color_map, and celltype_color_map are defined

# Create a heatmap
g = sns.clustermap(avg_matrix,row_cluster=False, col_cluster=False, standard_scale=1, cbar_pos=(1.05, .3, .03, .4), 
                   linewidth=0.5, figsize=(30, 10),  
                   col_colors=[color_use for color_use in genes_color_map.values()], row_colors=None)

# Set labels for axes
g.ax_heatmap.set_xlabel('Genes')
g.ax_heatmap.set_ylabel('celltype')

# Draw the legend bar for the classes                 
for label in celltype_color_map.keys():
    g.ax_col_dendrogram.bar(0, 0, color=celltype_color_map[label], label=label, linewidth=0)

# Add legend to the dendrogram
g.ax_col_dendrogram.legend(ncol=1, bbox_to_anchor=(0, 0.75))

# To save figure:

In [None]:
sc.pl.umap(young, color=[ 'Ikzf2', 'Foxp3', 'Ctla4', 'Itgav','Lef1', 'Igfbp4', 'Bach2', 'Npc2','Ccl5', 'Gzmk', 'Nkg7', 'Cd8b1', 'Cd8a', 'Igfbp4', 'Lef1',
'Fcer1g', 'Cd160','Ptma', 'Hmgb1', 'Stmn1', 'Dut','Ccr9', 'Themis', 'Sox4', 'Tcf7','Il12rb2', 'Sh3bgrl3', 'Ctsw', 'Gzmb','Ncr1', 'Klre1',
 'Furin', 'Itm2b', 'Rora', 'Il1rl1','Tmem176a', 'Tmem176b', 'Il23r', 'Ramp1','Cd79a', 'Cd79b', 'Ms4a1', 'H2-DMb2','Igkc', 'Jchain', 'Txndc5', 'Mzb1',
'Cst3', 'Psap', 'Ppt1', 'Plbd1','Fscn1', 'Ccr7', 'Marcks', 'Tmem123', 'Tmcc3','Ifi30', 'H2-Ab1', 'Cd74', 'H2-Aa','Tcf4', 'Grn', 'Pld4', 'Ctsb', 'Rnase6',
'Lyz2', 'Ctss', 'Gpx1', 'Lst1','Msrb1', 'Tyrobp', 'Ifitm3'] , vmax='p98', vmin='p05',ncols=9)

## T cells

In [None]:
Tall = adata[adata.obs['cell_type'].isin(['T cells'])]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(Tall, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(Tall, n_top_genes=2000, n_bins=20, flavor='seurat_v3')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(Tall, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
plt.plot(range(len(Tall.uns['pca']['variance_ratio'])), np.cumsum(T.uns['pca']['variance_ratio']) * 100, '.-')
plt.axvline(30, color = 'r',)
plt.xlabel('Principal Component', fontsize = 14)
plt.ylabel('% Variance Explained', fontsize = 14)

In [None]:
sc.pp.neighbors(Tall, n_neighbors=30, n_pcs=30)
sc.tl.umap(Tall, min_dist=0.5)

In [None]:
sc.pl.umap(Tall, color=["stage","day","cell_type_subset",'Tcf7'],vmax=4)

In [None]:
Tall.uns['cell_type_subset_colors']=['#f6222e','#b57edc', '#002FA7', '#060047', '#ffbaba','#3283fe',]

In [None]:
sc.set_figure_params(dpi=300, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(Tall, color=['cell_type_subset'], 
                     color_map='Spectral_r',
                     use_raw=False, 
         #  "Cd4","Cd8a",
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.01], 
                     size=5,  
                     frameon=False, 
                     add_outline=False, 
                     sort_order = False, )



In [None]:
sc.set_figure_params(dpi=300, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(Tall, color=['stage'], 
                     color_map='Spectral_r',
                     use_raw=False, 
         #  "Cd4","Cd8a",
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.01], 
                     size=5,  
                     frameon=False, 
                     add_outline=False, 
                     sort_order = False, )



In [None]:
Td0=Tall[Tall.obs["day"]=="d0"]
Td1=Tall[Tall.obs["day"]=="d1"]
Td4=Tall[Tall.obs["day"]=="d4"]
Td7=Tall[Tall.obs["day"]=="d7"]

In [None]:
df_temp = pd.DataFrame({'umap_x': Td0.obsm['X_umap'][:, 0], 'umap_y': Td0.obsm['X_umap'][:, 1], 
                        'stage': Td0.obs['stage'], 'day': Td0.obs['day']}, index = Td0.obs.index)




In [None]:
Td0.obs["stage"]

In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '02mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('young', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '18mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('old', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
# anndata2ri interconverts AnnData and Single Cell Experiment objects
anndata2ri.activate()
%load_ext rpy2.ipython
#%reload_ext rpy2.ipython

In [None]:
Td0.layers['norm_counts'] = Td0.X.copy()

In [None]:
adata_milo = sc.AnnData(Td0.layers['norm_counts'].copy(), 
                        obs = Td0.obs[['stage', 'day', 'cell_type_subset',"sample"]], 
                        var = Td0.var)
adata_milo.obsm['X_pca'] = Td0.obsm['X_pca']
adata_milo.obsm['X_umap'] = Td0.obsm['X_umap']

In [None]:
%%R
library(igraph)

library(miloR)

In [None]:
%%R -i adata_milo
adata_milo

In [None]:
%%R 
myeloid_milo <- Milo(adata_milo)
myeloid_milo

In [None]:
%%R 
myeloid_milo <- buildGraph(myeloid_milo, k=30, d=30, reduced.dim = "PCA")

In [None]:
design_df = adata_milo.obs[['sample',"stage","day",]].copy()
design_df.drop_duplicates(inplace=True)
design_df.index = design_df['sample']
design_df

In [None]:
%%R -i design_df -o DA_results_myeloid
## Define neighbourhoods
myeloid_milo <- makeNhoods(myeloid_milo, prop = 0.1, k = 30, d=30, refined = TRUE, reduced_dims = "PCA")

## Count cells in neighbourhoods
myeloid_milo <- countCells(myeloid_milo, meta.data = data.frame(colData(myeloid_milo)), sample="sample")

## Calculate distances between cells in neighbourhoods
## for spatial FDR correction
myeloid_milo <- calcNhoodDistance(myeloid_milo, d=30, reduced.dim = "PCA")


## Test for differential abundance
DA_results_myeloid <- testNhoods(myeloid_milo, design = ~stage, design.df = design_df)


In [None]:
DA_results_myeloid.head()

In [None]:
plt.plot(DA_results_myeloid.logFC, -np.log10(DA_results_myeloid.SpatialFDR), '.');
plt.xlabel("log-Fold Change");
plt.ylabel("- log10(Spatial FDR)")

In [None]:
%%R
myeloid_milo <- buildNhoodGraph(myeloid_milo)

In [None]:
%%R 
head(DA_results_myeloid)

In [None]:
%%R -w 800 -h 600
alpha_val = 1
library(ggplot2)
p1 <- plotNhoodGraphDA(myeloid_milo, DA_results_myeloid, res_column = 'SpatialFDR', alpha=alpha_val, 
                 layout="UMAP", size_range = c(2, 8), node_stroke =0.8)
p1

In [None]:
%%R -w 800 -h 600
alpha_val = 1
library(ggplot2)
p1 <- plotNhoodGraphDA(myeloid_milo, DA_results_myeloid, res_column = 'logFC', alpha=alpha_val, 
                 layout="UMAP", size_range = c(2, 8), node_stroke =0.8)
p1

In [None]:
%%R
myeloid_milo

In [None]:
%%R 
DA_results_myeloid <- annotateNhoods(myeloid_milo, DA_results_myeloid, coldata_col = 'cell_type_subset')
head(DA_results_myeloid)

In [None]:
%%R
library(ggplot2)
ggplot(DA_results_myeloid, aes(cell_type_subset_fraction)) + geom_histogram(bins=50)

In [None]:
%%R -o DA_results_myeloid
DA_results_myeloid$Celltypes <- ifelse(DA_results_myeloid$cell_type_subset_fraction < 0.8, "Mixed", DA_results_myeloid$cell_type_subset)
head(DA_results_myeloid)

In [None]:
%%R
plotDAbeeswarm(DA_results_myeloid, group.by = "cell_type_subset", alpha = 1)

In [None]:
import matplotlib.colors as mcolors
import matplotlib.cm as cm


for j, item in enumerate(['FDR', 'SpatialFDR', 'PValue']):
    fig = plt.figure(figsize = (8, 12))
    DA_results_myeloid['log_' + item] = -np.log10(DA_results_myeloid[item])
    ax = fig.add_subplot(1, 1, 1)
    plot = sns.stripplot(x='logFC', y="cell_type_subset", hue='log_' + item, data=DA_results_myeloid, size = 6, 
              palette='cividis', 
              jitter=0.2, edgecolor='none', ax = ax)
    plot.get_legend().set_visible(False)
    #ax.set_xticklabels(ax.get_xticks(), fontsize = 18)
    #ax.set_yticklabels(ax.get_yticks(), fontsize = 18)
    ax.tick_params(axis='both', which='major', labelsize=18)
    ax.set_ylabel('T cell subsets', fontsize = 18)
    ax.set_xlabel('logFC', fontsize = 18)
    sns.despine()


    # Drawing the side color bar
    normalize = mcolors.Normalize(vmin=DA_results_myeloid['log_' + item].min(), 
                              vmax=DA_results_myeloid['log_' + item].max())
    colormap = cm.cividis

    for n in DA_results_myeloid['log_' + item]:
        plt.plot(color=colormap(normalize(n)))

    scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
    scalarmappaple.set_array(DA_results_myeloid['log_' + item])
    cbar = fig.colorbar(scalarmappaple)
    cbar.ax.set_yticklabels(cbar.ax.get_yticks(), fontsize = 18)
    cbar.ax.set_ylabel('-log10(' + item + ')',  labelpad = 20, rotation=90, fontsize = 18)
    ax.grid(False)
    #fig.savefig(outbase + 'milor_myeloid_swarmplot_colored_by_log_' + item + '.pdf', dpi = 300, 
                #bbox_inches = 'tight')

In [None]:
df_temp = pd.DataFrame({'umap_x': Td1.obsm['X_umap'][:, 0], 'umap_y': Td1.obsm['X_umap'][:, 1], 
                        'stage': Td1.obs['stage'], 'day': Td1.obs['day']}, index = Td1.obs.index)




In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '02mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('young', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '18mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('old', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
# anndata2ri interconverts AnnData and Single Cell Experiment objects
anndata2ri.activate()
%load_ext rpy2.ipython
#%reload_ext rpy2.ipython

In [None]:
Td1.layers['norm_counts'] = Td1.X.copy()

In [None]:
adata_milo = sc.AnnData(Td1.layers['norm_counts'].copy(), 
                        obs = Td1.obs[['stage', 'day', 'cell_type_subset',"sample"]], 
                        var = Td1.var)
adata_milo.obsm['X_pca'] = Td1.obsm['X_pca']
adata_milo.obsm['X_umap'] = Td1.obsm['X_umap']

In [None]:
%%R
library(igraph)

library(miloR)

In [None]:
%%R -i adata_milo
adata_milo

In [None]:
%%R 
myeloid_milo <- Milo(adata_milo)
myeloid_milo

In [None]:
%%R 
myeloid_milo <- buildGraph(myeloid_milo, k=30, d=30, reduced.dim = "PCA")

In [None]:
design_df = adata_milo.obs[['sample',"stage","day",]].copy()
design_df.drop_duplicates(inplace=True)
design_df.index = design_df['sample']
design_df

In [None]:
%%R -i design_df -o DA_results_myeloid
## Define neighbourhoods
myeloid_milo <- makeNhoods(myeloid_milo, prop = 0.1, k = 30, d=30, refined = TRUE, reduced_dims = "PCA")

## Count cells in neighbourhoods
myeloid_milo <- countCells(myeloid_milo, meta.data = data.frame(colData(myeloid_milo)), sample="sample")

## Calculate distances between cells in neighbourhoods
## for spatial FDR correction
myeloid_milo <- calcNhoodDistance(myeloid_milo, d=30, reduced.dim = "PCA")


## Test for differential abundance
DA_results_myeloid <- testNhoods(myeloid_milo, design = ~stage, design.df = design_df)


In [None]:
DA_results_myeloid.head()

In [None]:
plt.plot(DA_results_myeloid.logFC, -np.log10(DA_results_myeloid.SpatialFDR), '.');
plt.xlabel("log-Fold Change");
plt.ylabel("- log10(Spatial FDR)")

In [None]:
%%R
myeloid_milo <- buildNhoodGraph(myeloid_milo)

In [None]:
%%R 
head(DA_results_myeloid)

In [None]:
%%R -w 800 -h 600
alpha_val = 1
library(ggplot2)
p1 <- plotNhoodGraphDA(myeloid_milo, DA_results_myeloid, res_column = 'SpatialFDR', alpha=alpha_val, 
                 layout="UMAP", size_range = c(2, 8), node_stroke =0.8)
p1

In [None]:
%%R -w 800 -h 600
alpha_val = 1
library(ggplot2)
p1 <- plotNhoodGraphDA(myeloid_milo, DA_results_myeloid, res_column = 'logFC', alpha=alpha_val, 
                 layout="UMAP", size_range = c(2, 8), node_stroke =0.8)
p1

In [None]:
%%R
myeloid_milo

In [None]:
%%R 
DA_results_myeloid <- annotateNhoods(myeloid_milo, DA_results_myeloid, coldata_col = 'cell_type_subset')
head(DA_results_myeloid)

In [None]:
%%R
library(ggplot2)
ggplot(DA_results_myeloid, aes(cell_type_subset_fraction)) + geom_histogram(bins=50)

In [None]:
%%R -o DA_results_myeloid
DA_results_myeloid$Celltypes <- ifelse(DA_results_myeloid$cell_type_subset_fraction < 0.8, "Mixed", DA_results_myeloid$cell_type_subset)
head(DA_results_myeloid)

In [None]:
%%R
plotDAbeeswarm(DA_results_myeloid, group.by = "cell_type_subset", alpha = 1)

In [None]:
import matplotlib.colors as mcolors
import matplotlib.cm as cm


for j, item in enumerate(['FDR', 'SpatialFDR', 'PValue']):
    fig = plt.figure(figsize = (8, 12))
    DA_results_myeloid['log_' + item] = -np.log10(DA_results_myeloid[item])
    ax = fig.add_subplot(1, 1, 1)
    plot = sns.stripplot(x='logFC', y="cell_type_subset", hue='log_' + item, data=DA_results_myeloid, size = 6, 
              palette='cividis', 
              jitter=0.2, edgecolor='none', ax = ax)
    plot.get_legend().set_visible(False)
    #ax.set_xticklabels(ax.get_xticks(), fontsize = 18)
    #ax.set_yticklabels(ax.get_yticks(), fontsize = 18)
    ax.tick_params(axis='both', which='major', labelsize=18)
    ax.set_ylabel('T cell subsets', fontsize = 18)
    ax.set_xlabel('logFC', fontsize = 18)
    sns.despine()


    # Drawing the side color bar
    normalize = mcolors.Normalize(vmin=DA_results_myeloid['log_' + item].min(), 
                              vmax=DA_results_myeloid['log_' + item].max())
    colormap = cm.cividis

    for n in DA_results_myeloid['log_' + item]:
        plt.plot(color=colormap(normalize(n)))

    scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
    scalarmappaple.set_array(DA_results_myeloid['log_' + item])
    cbar = fig.colorbar(scalarmappaple)
    cbar.ax.set_yticklabels(cbar.ax.get_yticks(), fontsize = 18)
    cbar.ax.set_ylabel('-log10(' + item + ')',  labelpad = 20, rotation=90, fontsize = 18)
    ax.grid(False)
    #fig.savefig(outbase + 'milor_myeloid_swarmplot_colored_by_log_' + item + '.pdf', dpi = 300, 
                #bbox_inches = 'tight')

In [None]:
df_temp = pd.DataFrame({'umap_x': Td4.obsm['X_umap'][:, 0], 'umap_y': Td4.obsm['X_umap'][:, 1], 
                        'stage': Td4.obs['stage'], 'day': Td4.obs['day']}, index = Td4.obs.index)




In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '02mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('young', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '18mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('old', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
# anndata2ri interconverts AnnData and Single Cell Experiment objects
anndata2ri.activate()
%load_ext rpy2.ipython
#%reload_ext rpy2.ipython

In [None]:
Td4.layers['norm_counts'] = Td4.X.copy()

In [None]:
adata_milo = sc.AnnData(Td4.layers['norm_counts'].copy(), 
                        obs = Td4.obs[['stage', 'day', 'cell_type_subset',"sample"]], 
                        var = Td4.var)
adata_milo.obsm['X_pca'] = Td4.obsm['X_pca']
adata_milo.obsm['X_umap'] = Td4.obsm['X_umap']

In [None]:
%%R
library(igraph)

library(miloR)

In [None]:
%%R -i adata_milo
adata_milo

In [None]:
%%R 
myeloid_milo <- Milo(adata_milo)
myeloid_milo

In [None]:
%%R 
myeloid_milo <- buildGraph(myeloid_milo, k=30, d=30, reduced.dim = "PCA")

In [None]:
design_df = adata_milo.obs[['sample',"stage","day",]].copy()
design_df.drop_duplicates(inplace=True)
design_df.index = design_df['sample']
design_df

In [None]:
%%R -i design_df -o DA_results_myeloid
## Define neighbourhoods
myeloid_milo <- makeNhoods(myeloid_milo, prop = 0.1, k = 30, d=30, refined = TRUE, reduced_dims = "PCA")

## Count cells in neighbourhoods
myeloid_milo <- countCells(myeloid_milo, meta.data = data.frame(colData(myeloid_milo)), sample="sample")

## Calculate distances between cells in neighbourhoods
## for spatial FDR correction
myeloid_milo <- calcNhoodDistance(myeloid_milo, d=30, reduced.dim = "PCA")


## Test for differential abundance
DA_results_myeloid <- testNhoods(myeloid_milo, design = ~stage, design.df = design_df)


In [None]:
DA_results_myeloid.head()

In [None]:
plt.plot(DA_results_myeloid.logFC, -np.log10(DA_results_myeloid.SpatialFDR), '.');
plt.xlabel("log-Fold Change");
plt.ylabel("- log10(Spatial FDR)")

In [None]:
%%R
myeloid_milo <- buildNhoodGraph(myeloid_milo)

In [None]:
%%R 
head(DA_results_myeloid)

In [None]:
%%R -w 800 -h 600
alpha_val = 1
library(ggplot2)
p1 <- plotNhoodGraphDA(myeloid_milo, DA_results_myeloid, res_column = 'SpatialFDR', alpha=alpha_val, 
                 layout="UMAP", size_range = c(2, 8), node_stroke =0.8)
p1

In [None]:
%%R -w 800 -h 600
alpha_val = 1
library(ggplot2)
p1 <- plotNhoodGraphDA(myeloid_milo, DA_results_myeloid, res_column = 'logFC', alpha=alpha_val, 
                 layout="UMAP", size_range = c(2, 8), node_stroke =0.8)
p1

In [None]:
%%R
myeloid_milo

In [None]:
%%R 
DA_results_myeloid <- annotateNhoods(myeloid_milo, DA_results_myeloid, coldata_col = 'cell_type_subset')
head(DA_results_myeloid)

In [None]:
%%R
library(ggplot2)
ggplot(DA_results_myeloid, aes(cell_type_subset_fraction)) + geom_histogram(bins=50)

In [None]:
%%R -o DA_results_myeloid
DA_results_myeloid$Celltypes <- ifelse(DA_results_myeloid$cell_type_subset_fraction < 0.8, "Mixed", DA_results_myeloid$cell_type_subset)
head(DA_results_myeloid)

In [None]:
%%R
plotDAbeeswarm(DA_results_myeloid, group.by = "cell_type_subset", alpha = 1)

In [None]:
import matplotlib.colors as mcolors
import matplotlib.cm as cm


for j, item in enumerate(['FDR', 'SpatialFDR', 'PValue']):
    fig = plt.figure(figsize = (8, 12))
    DA_results_myeloid['log_' + item] = -np.log10(DA_results_myeloid[item])
    ax = fig.add_subplot(1, 1, 1)
    plot = sns.stripplot(x='logFC', y="cell_type_subset", hue='log_' + item, data=DA_results_myeloid, size = 6, 
              palette='cividis', 
              jitter=0.2, edgecolor='none', ax = ax)
    plot.get_legend().set_visible(False)
    #ax.set_xticklabels(ax.get_xticks(), fontsize = 18)
    #ax.set_yticklabels(ax.get_yticks(), fontsize = 18)
    ax.tick_params(axis='both', which='major', labelsize=18)
    ax.set_ylabel('T cell subsets', fontsize = 18)
    ax.set_xlabel('logFC', fontsize = 18)
    sns.despine()


    # Drawing the side color bar
    normalize = mcolors.Normalize(vmin=DA_results_myeloid['log_' + item].min(), 
                              vmax=DA_results_myeloid['log_' + item].max())
    colormap = cm.cividis

    for n in DA_results_myeloid['log_' + item]:
        plt.plot(color=colormap(normalize(n)))

    scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
    scalarmappaple.set_array(DA_results_myeloid['log_' + item])
    cbar = fig.colorbar(scalarmappaple)
    cbar.ax.set_yticklabels(cbar.ax.get_yticks(), fontsize = 18)
    cbar.ax.set_ylabel('-log10(' + item + ')',  labelpad = 20, rotation=90, fontsize = 18)
    ax.grid(False)
    #fig.savefig(outbase + 'milor_myeloid_swarmplot_colored_by_log_' + item + '.pdf', dpi = 300, 
                #bbox_inches = 'tight')

In [None]:
df_temp = pd.DataFrame({'umap_x': Td7.obsm['X_umap'][:, 0], 'umap_y': Td7.obsm['X_umap'][:, 1], 
                        'stage': Td7.obs['stage'], 'day': Td7.obs['day']}, index = Td7.obs.index)




In [None]:
Td7.obs["stage"]

In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '02mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('young', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '18mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('old', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
# anndata2ri interconverts AnnData and Single Cell Experiment objects
anndata2ri.activate()
%load_ext rpy2.ipython
#%reload_ext rpy2.ipython

In [None]:
Td7.layers['norm_counts'] = Td7.X.copy()

In [None]:
adata_milo = sc.AnnData(Td7.layers['norm_counts'].copy(), 
                        obs = Td7.obs[['stage', 'day', 'cell_type_subset',"sample"]], 
                        var = Td7.var)
adata_milo.obsm['X_pca'] = Td7.obsm['X_pca']
adata_milo.obsm['X_umap'] = Td7.obsm['X_umap']

In [None]:
%%R
library(igraph)

library(miloR)

In [None]:
%%R -i adata_milo
adata_milo

In [None]:
%%R 
myeloid_milo <- Milo(adata_milo)
myeloid_milo

In [None]:
%%R 
myeloid_milo <- buildGraph(myeloid_milo, k=30, d=30, reduced.dim = "PCA")

In [None]:
design_df = adata_milo.obs[['sample',"stage","day",]].copy()
design_df.drop_duplicates(inplace=True)
design_df.index = design_df['sample']
design_df

In [None]:
%%R -i design_df -o DA_results_myeloid
## Define neighbourhoods
myeloid_milo <- makeNhoods(myeloid_milo, prop = 0.1, k = 30, d=30, refined = TRUE, reduced_dims = "PCA")

## Count cells in neighbourhoods
myeloid_milo <- countCells(myeloid_milo, meta.data = data.frame(colData(myeloid_milo)), sample="sample")

## Calculate distances between cells in neighbourhoods
## for spatial FDR correction
myeloid_milo <- calcNhoodDistance(myeloid_milo, d=30, reduced.dim = "PCA")


## Test for differential abundance
DA_results_myeloid <- testNhoods(myeloid_milo, design = ~stage, design.df = design_df)


In [None]:
DA_results_myeloid.head()

In [None]:
plt.plot(DA_results_myeloid.logFC, -np.log10(DA_results_myeloid.SpatialFDR), '.');
plt.xlabel("log-Fold Change");
plt.ylabel("- log10(Spatial FDR)")

In [None]:
%%R
myeloid_milo <- buildNhoodGraph(myeloid_milo)

In [None]:
%%R 
head(DA_results_myeloid)

In [None]:
%%R -w 800 -h 600
alpha_val = 1
library(ggplot2)
p1 <- plotNhoodGraphDA(myeloid_milo, DA_results_myeloid, res_column = 'SpatialFDR', alpha=alpha_val, 
                 layout="UMAP", size_range = c(2, 8), node_stroke =0.8)
p1

In [None]:
%%R -w 800 -h 600
alpha_val = 1
library(ggplot2)
p1 <- plotNhoodGraphDA(myeloid_milo, DA_results_myeloid, res_column = 'logFC', alpha=alpha_val, 
                 layout="UMAP", size_range = c(2, 8), node_stroke =0.8)
p1

In [None]:
%%R
myeloid_milo

In [None]:
%%R 
DA_results_myeloid <- annotateNhoods(myeloid_milo, DA_results_myeloid, coldata_col = 'cell_type_subset')
head(DA_results_myeloid)

In [None]:
%%R
library(ggplot2)
ggplot(DA_results_myeloid, aes(cell_type_subset_fraction)) + geom_histogram(bins=50)

In [None]:
%%R -o DA_results_myeloid
DA_results_myeloid$Celltypes <- ifelse(DA_results_myeloid$cell_type_subset_fraction < 0.8, "Mixed", DA_results_myeloid$cell_type_subset)
head(DA_results_myeloid)

In [None]:
%%R
plotDAbeeswarm(DA_results_myeloid, group.by = "cell_type_subset", alpha = 1)

In [None]:
DA_results_myeloid

In [None]:
import matplotlib.colors as mcolors
import matplotlib.cm as cm


for j, item in enumerate(['FDR', 'SpatialFDR', 'PValue']):
    fig = plt.figure(figsize = (8, 12))
    DA_results_myeloid['log_' + item] = -np.log10(DA_results_myeloid[item])
    ax = fig.add_subplot(1, 1, 1)
    plot = sns.stripplot(x='logFC', y="cell_type_subset", hue='log_' + item, data=DA_results_myeloid, size = 6, 
              palette='cividis', 
              jitter=0.2, edgecolor='none', ax = ax)
    plot.get_legend().set_visible(False)
    #ax.set_xticklabels(ax.get_xticks(), fontsize = 18)
    #ax.set_yticklabels(ax.get_yticks(), fontsize = 18)
    ax.tick_params(axis='both', which='major', labelsize=18)
    ax.set_ylabel('T cell subsets', fontsize = 18)
    ax.set_xlabel('logFC', fontsize = 18)
    sns.despine()


    # Drawing the side color bar
    normalize = mcolors.Normalize(vmin=DA_results_myeloid['log_' + item].min(), 
                              vmax=3)
    colormap = cm.cividis

    for n in DA_results_myeloid['log_' + item]:
        plt.plot(color=colormap(normalize(n)))

    scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
    scalarmappaple.set_array(DA_results_myeloid['log_' + item])
    cbar = fig.colorbar(scalarmappaple)
    cbar.ax.set_yticklabels(cbar.ax.get_yticks(), fontsize = 18)
    cbar.ax.set_ylabel('-log10(' + item + ')',  labelpad = 20, rotation=90, fontsize = 18)
    ax.grid(False)
    #fig.savefig(outbase + 'milor_myeloid_swarmplot_colored_by_log_' + item + '.pdf', dpi = 300, 
                #bbox_inches = 'tight')

In [None]:
#import packages
import numpy as np
import json 
import scanpy as sc
from collections import OrderedDict
import scipy 
import pandas as pd
import matplotlib.pyplot as plt

#spectra imports 
import Spectra as spc
from Spectra import Spectra_util as spc_tl
from Spectra import K_est as kst
from Spectra import default_gene_sets

In [None]:
sc.pl.dotplot(young[young.obs["day"]=='d0'], ['Il1r1',"Il1rl1",'Il6ra',"Il18r1","Il18rap" ], ['cell_type_subset',], dendrogram=False,standard_scale='var', swap_axes=False)


In [None]:
young = young[~(young.obs['cell_type_subset'] == 'CD8')]

In [None]:
gene_set_annotations = {
"global": {'all_GLU_metabolism':['Slc38a1','Gpt','Slc1a3','Slc7a11','Got2','Aldh18a1','Slc1a1','Slc1a5',
'Slc38a5','Got1','Slc25a13','Slc3a2','Slc25a18','Slc7a6','Slc6a13','Gpt2',
'Slc6a11','Slc7a7','Nags','Slc6a1','Slc1a6','Slc17a7','Slc32a1','Slc25a22',
'Glul','Gls2','Oplah','Acy1','Slc6a12','Slc1a2','Slc17a8','Glud2','Slc38a2',
'Slc38a3','Slc1a7','Gls','Slc25a12','Glud1','Slc38a4','Aldh4a1','Slc17a6'],
'all_fatty-acid_synthesis':['Elovl1','Hacd4','Acaca','Elovl2','Elovl6','Fads3','Hacd1','Tecrl',
'Elovl4','Fads1','Fads6','Fasn','Slc25a1','Fads2','Tecr','Aacs','Mcat',
'Elovl3','Hacd3','Hacd2','Scd5','Acacb','Scd','Elovl7','Hsd17b12','Elovl5'],
'all_NOTCH_signaling':['Heyl','Lfng','Dll1','Jag1','Ccnd1','Fzd1','Aph1a','Cul1','Dtx1','Notch1',
'Kat2a','Dtx2','Fbxw11','Hey1','Maml2','Arrb1','Tcf7l2','Fzd5','Psenen','Wnt5a',
'Hes1','Hey2','Rbx1','St3gal6','Ppard','Notch2','Skp1','Hes5','Psen2','Wnt2',
'Notch3','Sap30','Fzd7','Prkca','Dtx4'],
'global_AGING_GOBP_AGING':['Adra1b','Adra1a','Adra1d','Agtr1a','Agtr1b','Alpl','Comp','Edn1','Ednra','Ercc1','Sec63','Gna11','Gna12','Gna13','Gnaq','Hyal2','Enpp1','Pmp22','Slc1a2','Terc','Trp63','Wrn','Rnf165','Lncpint','Helt','Avpr1b','Nr5a1','Serp1','Ndufs6','Avpr1a','Arhgef12'],
'global_all_MYC_targets':['Nop16','Phb','Gcsh','Nhp2','Ppat','Fasn','Cad','Noc4l','Ncl','Ddx10','Odc1',
                          'Polr2h','Mgst1','Prps2','Adm','Slc39a6','Slc20a1','Shmt1','Tarpb1','Matr3','Psmg1',
                          'Ddx18','Bcat1','Mrto4','Mthfd1','Tsr1','Pno1','Mxi1','Rrp1b','Srm','Rsl1d1','Ak4',
                          'Rcc1','Cdk4','Matr3','Aimp2','Tuba4a','Ppif','Ebnalbp2','Gnl3','Apex1','Iars1',
                          'Ccnd2','Gpd1l','Ldha','Nop56','Fxn','Slpi','Nampt','Pa2g4','Nme1','Ctsc','Nolc1',
                          'Fbl','Uck2','Cebpz','Hspa9','Akap1','Ddx21','Socs3','Mettl1','Trap1','Fkbp4','C1qbp',
                          'Pycr1','Fabp5','Pold2','Tfrc','Paics','Hspd1','Cks2','Ranbp1','Slc19a1','Ndufaf4',
                          'Surf2','Plscr1','Asns','Grwd1','Slc16a1','Ppp1r14b','Hspe1','Ahcy','Emp1','Exosc7'],     
'global_EGF':['Areg',"Tff1", 'Egf', 'Csnk2a1','Egfr', 'Elk1', 'Fos', 'Grb2', 'Hras', 'Jak1', 'Jun','Map2k1', 
              'Map2k4', 'Map3k1', 'Mapk3','Mapk8', 'Pik3ca', 'Pik3r1', 'Plcg1',
'Prkca', 'Prkcb', 'Raf1', 'Rasa1','Shc1', 'Sos1', 'Srf', 'Stat1','Stat3', 'Stat5a'],
'GOBP_ANIMAL_ORGAN_REGENERATION' :['Ace','Gfer','Apoa1','Apoa2','Apoh','Ccnd1','Cdk1','Cebpb','Egfr','Ezh1','Ezh2','Gata1','Gli1','Hmox1','Il6','Itpr1','Lif','Lifr','Pkm','Med1','Reg1','Cxcl5','Cxcl12','Aurka','Tgfb1','Vtn','Wnt1','Upf2','Sulf2'],          
'global_all_glycolysis':['Gapdh','Gck','Pfkfb1','Eno3','Pgk1','Pgm2','Pkm','Pfkp','Eno4','Eno1','Aldoc',
'Eno2','Hk3','Pfkfb2','Pgm5','Tpi1','Aldoa','Aldob','Pklr','Pgk2','Pgm3','Pfkm',
'Pfkl','Hk2','Pgm1','Gpi','Hk1'],
'global_all_glutathione_metabolism':['Gss','Gpx1','Gpx6','Gstm1','Hagh','Gpx4','Gpx5','Prdx3','Esd','Glrx2',
'Gpx2','Prdx1','Glrx','Gpx3','Gclc','Gclm','Gpx7','Ggt1','Adh5','Cth',
'Gpx8','Gsr','Prdx2','Cbs'],
'global_all_IL6-JAK-STAT3_signaling':['Tyk2','Il18r1','Itga4','Csf2ra','Socs1','Cxcl11','Cd14','Ifnar1',
'Ifngr1','Ltb','Map3k8','Ebi3','Il1b','Cbl','Stat1','Pik3r5','Dntt',
'Stat3','Cntfr','Socs3','Reg1a','Tnfrsf12a','Cxcl3','Cd44','Cd38',
'Il4r','Csf2rb','Itgb3','Fas','Hmox1','Irf1','Inhbe','Pf4','Myd88',
'Grb2','Stam2','Acvrl1','Cxcl13','Tnfrsf1a','Ptpn11','Pla2g2a','Tgfb1',
'Ccr1','Cxcl9','Ltbr','Jun','Il3ra','Acvr1b','Osmr','Tnf','Tnfrsf1b',
'Hax1','Bak1','Il15ra','Cxcl1','Il12rb1','Lepr','Csf1','Tnfrsf21',
'Il1r1','Ccl7','Il13ra1','Pim1','Il2ra','Csf2','Il6','Irf9','Cd9',
'Il6st','Stat2','Il1r2','A2m','Cd36','Pdgfc','Tlr2','Crlf2','Il9r',
'Cxcl10','Il2rg','Ifngr2','Il17rb','Il17ra','Ptpn1','Il7','Il10rb'],
'global_all_autophagy-chaperone-mediated':['Eef1a2','Snrnp70','Snca','Eef1a1','Clu','Gfap','Hspa8',
'Hsp90aa1','Bag3','Lamp2','Plk3','Atp13a2','Stub1','Ctsa', 'Synpo2','Atg7'], },

    
'Tregs':{'Tregs':['Ikzf2','Tnfrsf4','Ctla4','Itgav','Nrp1','Tnfrsf18','Zfp36l1','Tox','Ldlrad4','Sntb1','Rabgap1l','Ltb','Rora','Cd2','Prkca','Ctss','Fam169b','Gpx4','Bmyc','Nfkbia' ,],},
'Naive CD4':{"'Naive CD4":['Lef1','Igfbp4','Bach2','Foxp1','Tpt1','Satb1','Fau','Klf2','Arhgap15','Npc2','Gimap6','Limd2','Dgka','Scml4','S1pr1','Rgs10','Grap2','Tgfbr2','Tcf7','Cytip' ,]},
'CD4':{"CD4":[ 'Prkca','Themis','Cd28','Tnfsf8','Fyb','Ly6a','Slamf6','Ifi27l2a','Cd4','Itgb1','Atxn1','Ms4a6b','Ctla4','Hivep2','Tnfrsf4','Grap2','Maml2','Itga4','Mndal','Emb'  ]},
'GZMK+ CD8':{"GZMK+ CD8":['Ccl5','Cd8b1','Cd8a','Nkg7','Themis','Gzmk','Ms4a4b','Atxn1','Gimap7','Grap2','Itga4','Fyn','Hcst','Ms4a6b','AW112010','Cst7','Ccl4','Cd3d','Fau','H2-Q7' ,],},
'Naive CD8':{'Naive CD8':['Cd8b1','Cd8a','Igfbp4','Lef1','Themis','Klf2','Dnajc15','Nme2','Fam241a','Grap2','Ccr7','Sell','Saraf','Rasgrp2','Naca','Coro1a','Cmah','Arhgdib','Rras2','Pdlim1' ,],},
'Invariant T':{'Invariant':['Cd7','Cd160','Anxa2','Fcer1g','Ikzf2','Pglyrp1','Ccl5','Xcl1','Klhdc2','Klrd1','Sidt1','Emb','Pfn1','Ripor2','Nkg7','Car2','Klrb1a','Nr4a2','Cotl1','Serf2' ,],},
'DN':{"DN":['Ptma','Hmgb1','Dut','Stmn1','H2afz','Dntt','Pclaf','Hmgb2','Endou','Anp32e','Arpp21','Anp32b','Ran','Tuba1b','Ppia','Gapdh','Hnrnpab','Rrm2','Snrpd1','1500009L16Rik' ,],},
'DP':{"DP":['Themis','Ccr9','Sox4','Tcf7','Satb1','Arpp21','Dntt','Rhoh','Trbc2','Endou','Cd27','Cd8b1','Cd247','Mier1','Tcf12','Lck','Lat','Ap3s1','Edem1','2610307P16Rik' ],},
'NK':{'NK cells':['Fcer1g','Tyrobp','Ncr1','Klre1','Ccl5','Xcl1','Car2','Klrb1c','AW112010','Klrk1','Gzma','Nkg7','Irf8','Anxa2','Prf1','Klrd1','Ccl4','Il2rb','Txk','Gem' ,],},
'NKT':{'NKT':['Nkg7','Gzmb','Ctsw','Sh3bgrl3','Id2','Il12rb2','Il2rb','Ly6c2','Gimap4','Chn2','Klrd1','Dennd4a','Xcl1','Cxcr6','Ctsd','Inpp4b','Dusp2','Tmsb10','Pitpnc1','AW112010' ,],},
'ILC2':{'ILC2':['Furin','Itm2b','Il1rl1','Rora','Nfkb1','Gadd45b','Gata3','Emb','Nfkbia','Nfkbiz','Ccdc184','Areg','Nav2','Ltb4r1','Nfkbid','Arg1','Samsn1','Srgn','Tcrg-C1','Plscr1' ,],},
'ILC3':{'ILC3':['Tmem176a','Il23r','Il1r1','Tmem176b','Ramp1','Emb','Lmo4','Furin','Rexo2','Pxdc1','Blk','Ikzf3','Icos','Cxcr6','Zbtb16','Serpinb1a','Ckb','Il7r','Selenop','S100a4' , ],},
'B cells':{'Bcells':['Cd79a','Cd79b','Ebf1','Ms4a1','Ly6d','H2-DMb2','Igkc','Bank1','Cd74','Iglc2','H2-Eb1','H2-Aa','H2-Ab1','Mzb1','Iglc3','Ighm','Ly86','Mef2c','Cd37','Napsa' ,],},
'cDC1':{'cDC1':['Cst3','Psap','Ppt1','Naaa','Plbd1','H2-DMb1','H2-Ab1','Wdfy4','H2-Aa','H2-Eb1','Cd74','Xcr1','Irf8','Alox5ap','Rab7b','Mpeg1','H2-DMa','Aif1','Ifi205','Pkib' , ],},
'Plasma cells':{'plasmacells':['Ighm','Jchain','Igkc','Mzb1','Txndc5','Iglc2','Iglc1','Xbp1','Herpud1','Iglc3','Pou2af1','Cd79a','Ssr4','Eaf2','Serp1','Fkbp2','Iglv1','Sec11c','Ly6d','Prg2' ,],},
'CCR7+ cDC': {'CCR7+ cDC':['Ifi30','Cd74','H2-Aa','H2-Ab1','H2-DMa','Ctsh','H2-Eb1','Ms4a6c','Spi1','Napsa','Alox5ap','Gpx1','H2-DMb1','Ctsz','Cst3','Syngr2','Gsn','Plbd1','Tyrobp','Ctss' , ],},
'cDC2':{'cDC2':['Fscn1','Tmcc3','Tbc1d8','Marcks','Tmem123','Cxcl16','Relb','Lrrk1','Cacnb3','Tbc1d4','Samsn1','Strip2','Marcksl1','Cd63','Rogdi','Anxa3','Basp1','Cst3','Etv6','Syngr2' ,],},
'p-DCs':{'p-DCs':['Siglech','Lair1','Tcf4','Pltp','Ctsl','Pld4','Grn','Rnase6','Ctsb','Ctsh','Irf8','Tyrobp','Atp1b1','Cd300c','Blnk','Eldr','Mpeg1','Kmo','Nucb2','Cyb561a3' ,]},
'Macrophages':{'Macrophages':['Lyz2','Ctss','Csf1r','Gpx1','Cybb','Ms4a6c','Tyrobp','Lst1','Spi1','Fcer1g','Tgfbi','Psap','Zeb2','Plac8','Ctsh','Ftl1','Pld4','Ifitm3','Ly86','Ctsz' , ],},

}

In [None]:
gene_set_annotations

In [None]:
annotations=gene_set_annotations

In [None]:
def check_gene_set_dictionary(young, annotations, obs_key='cell_type_subset',global_key='global', return_dict = True):
    '''
    Filters annotations dictionary contains only genes contained in the adata. 
    Checks that annotations dictionary cell type keys and adata cell types are identical.
    Checks that all gene sets in annotations dictionary contain >2 genes after filtering.
    
    adata: AnnData , data to use with Spectra
    annotations: dict , gene set annotations dictionary to use with Spectra
    obs_key: str , column name for cell type annotations in adata.obs
    global_key: str , key for global gene sests in gene set annotation dictionary
    return_dict: bool , return filtered gene set annotation dictionary
    
    returns: dict , filtered gene set annotation dictionary
    
    '''
    #test if keys match
    adata_labels  = list(set(young.obs[obs_key]))+['global']#cell type labels in adata object
    annotation_labels = list(annotations.keys())
    matching_celltype_labels = list(set(adata_labels).intersection(annotation_labels))
    if set(annotation_labels)==set(adata_labels):
        print('Cell type labels in gene set annotation dictionary and AnnData object are identical')
        dict_keys_OK = True
    if len(annotation_labels)<len(adata_labels):
        print('The following labels are missing in the gene set annotation dictionary:',set(adata_labels)-set(annotation_labels))
        dict_keys_OK = False
    if len(adata_labels)<len(annotation_labels):
        print('The following labels are missing in the AnnData object:',set(annotation_labels)-set(adata_labels))
        dict_keys_OK = False
        
    #check that gene sets in dictionary have len >2
    Counter = 0
    annotations_new = {}
    for k,v in annotations.items():
        annotations_new[k] = {}
        for k2,v2 in v.items():
            annotations_new[k][k2]= [x for x in v2 if x in young.var_names]
            length = len(v2)
            if length<3:
                print('gene set',k2,'for cell type',k,'is of length',length)
                Counter = Counter+1
            
    if Counter > 0:
        print(Counter,'gene sets are too small. Gene sets must contain at least 3 genes')
    elif Counter == 0 and dict_keys_OK:
        print('Your gene set annotation dictionary is correctly formatted.')
    if return_dict:
        return annotations_new

In [None]:
#define data paths
#adata_path = '/Users/xleana/Desktop/Single_cell_course/spectra/data/sample_data.h5ad'#indicate where to find the gene expression AnnData object
obs_key = 'cell_type_subset' #indicat the column name for the dataframe in adata.obs where to find the cell type lab


In [None]:
annotations = check_gene_set_dictionary(young, annotations, obs_key='cell_type_subset',global_key='global')

In [None]:
annotations = check_gene_set_dictionary(young, gene_set_annotations, obs_key='cell_type_subset',global_key='global')

In [None]:
sc.pp.highly_variable_genes(young, n_top_genes=4000, n_bins=20, flavor='seurat_v3')

In [None]:
#fit the model 
model = spc.est_spectra(adata = young, gene_set_dictionary = gene_set_annotations, 
                        use_highly_variable = True, cell_type_key = 'cell_type_subset', 
                        use_weights = True, lam = 0.1, 
                        delta=0.001,kappa = 0.00001, rho = 0.00001, 
                        use_cell_types = True, n_top_vals = 25, 
                        label_factors = True, #whether to label the factors by their overlap coefficient with the input gene sets
                        overlap_threshold = 0.2, #minimum overlap coefficient that has to be surpassed to assign a label to a factor
                        num_epochs=10000 #We run the recommended 10,000 epochs
                       )

In [None]:
young.uns['SPECTRA_overlap'] 

In [None]:
#visualize factor cell scores 
factor_of_interest = young.uns['SPECTRA_overlap'].index[42] 
print('plotting factor:',young.uns['SPECTRA_overlap'].index[42])

#add cell scores to obs
cell_scores = young.obsm['SPECTRA_cell_scores'][:,42].astype(float)
young.obs[factor_of_interest] = cell_scores
sc.pl.umap(young,color=factor_of_interest,s=30,vmax=np.quantile(cell_scores,0.98))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    young, 
    color=factor_of_interest, 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmax=np.quantile(cell_scores,0.98)
)

In [None]:
#visualize factor cell scores (this is poorly fitted bc we only ran 2 epochs)
factor_of_interest = young.uns['SPECTRA_overlap'].index[43]
print('plotting factor:',young.uns['SPECTRA_overlap'].index[43])

#add cell scores to obs
cell_scores = young.obsm['SPECTRA_cell_scores'][:,43].astype(float)
young.obs[factor_of_interest] = cell_scores
sc.pl.umap(young,color=factor_of_interest,s=30,vmax=np.quantile(cell_scores,0.98))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    young, 
    color=factor_of_interest, 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmax=np.quantile(cell_scores,0.98)
)

In [None]:
#so you can construct a dataframe for the factor gene weights

#include cell type specificity as a prefix into the index
index_labels = young.uns['SPECTRA_overlap'].index
gene_weights = pd.DataFrame(young.uns['SPECTRA_factors'], 
                            index= index_labels,
                            columns=young.var[young.var['spectra_vocab']].index)
gene_weights

In [None]:
gene_weightsT=gene_weights.T

In [None]:
gene_weightsT

In [None]:
gene_weightsT.to_csv('/Users/alemarquis/Desktop/Manuscript_Tregs/Revisions2/Spectra.csv')

In [None]:
import magic

In [None]:
magic_op = magic.MAGIC()

In [None]:
magic_op.set_params(knn=5, t=4)

In [None]:
adataT=young[young.obs["cell_type"]=="T cells"]

In [None]:
adataCD4_magic = magic_op.fit_transform(young, genes=gene_weightsT[['42-X-Tregs-X-Tregs']].sort_values(by = '42-X-Tregs-X-Tregs', ascending = False)[:200].index)

In [None]:
young

In [None]:
adataCD4_magic.uns["cell_type_subset_colors"]=young.uns["cell_type_subset_colors"]

In [None]:
import scvelo as scv
scv.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True, format='pdf')

In [None]:
scv.pl.heatmap(adataCD4_magic, var_names=gene_weightsT[['42-X-Tregs-X-Tregs']].sort_values(by = '42-X-Tregs-X-Tregs', ascending = False)[:200].index,
               sortby='42-X-Tregs-X-Tregs', color_map='viridis', col_color=['cell_type_subset'], col_cluster= ['cell_type_subset'],             
               n_convolve=100, figsize=(5,5),yticklabels=False) #save='46-X-Tregs-X-Tregs_viridis.pdf')

In [None]:
adataCD4_magic2 = magic_op.fit_transform(young, genes=gene_weightsT[['43-X-Tregs-X-43']].sort_values(by = '43-X-Tregs-X-43', ascending = False)[:200].index)

In [None]:
adataCD4_magic2.uns["cell_type_subset_colors"]=young.uns["cell_type_subset_colors"]

In [None]:
import scvelo as scv
scv.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True, format='pdf')

In [None]:
scv.pl.heatmap(adataCD4_magic2, var_names=gene_weightsT[['43-X-Tregs-X-43']].sort_values(by = '43-X-Tregs-X-43', ascending = False)[:200].index,
               sortby='43-X-Tregs-X-43', color_map='viridis', col_color=['cell_type_subset'], col_cluster= ['cell_type_subset'],             
               n_convolve=100, figsize=(5,5),yticklabels=False) #save='46-X-Tregs-X-Tregs_viridis.pdf')

### MiloR 

In [None]:
df_temp = pd.DataFrame({'umap_x': adata.obsm['X_umap'][:, 0], 'umap_y': adata.obsm['X_umap'][:, 1], 
                        'stage': adata.obs['stage'], 'day': adata.obs['day']}, index = adata.obs.index)




In [None]:
adata.obs["stage"]

In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '02mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('young', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '18mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('old', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
df_temp = pd.DataFrame({'umap_x': adata.obsm['X_umap'][:, 0], 'umap_y': adata.obsm['X_umap'][:, 1], 
                        'stage': adata.obs['stage'], 'day': adata.obs['day']}, index = adata.obs.index)




In [None]:
df_temp

In [None]:
adata.obs["stage"]

In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['day'] == 'd1'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('d1', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['day'] == 'd4'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('d4', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['day'] == 'd0'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('d0', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['day'] == 'd7'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('d7', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
adata.obs["stage"]

In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['day'] == 'd0'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('young', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['day'] == 'd4'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('old', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
df_temp = pd.DataFrame({'umap_x': adata[adata.obs["day"]=="d0"].obsm['X_umap'][:, 0], 'umap_y': adata[adata.obs["day"]=="d0"].obsm['X_umap'][:, 1], 
                        'stage': adata[adata.obs["day"]=="d0"].obs['stage'], 'day': adata[adata.obs["day"]=="d0"].obs['day']}, index = adata[adata.obs["day"]=="d0"].obs.index)




In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '02mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('young', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '18mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('old', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
df_temp = pd.DataFrame({'umap_x': adata[adata.obs["day"]=="d1"].obsm['X_umap'][:, 0], 'umap_y': adata[adata.obs["day"]=="d1"].obsm['X_umap'][:, 1], 
                        'stage': adata[adata.obs["day"]=="d1"].obs['stage'], 'day': adata[adata.obs["day"]=="d1"].obs['day']}, index = adata[adata.obs["day"]=="d1"].obs.index)




In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '02mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('young', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '18mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('old', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
df_temp = pd.DataFrame({'umap_x': adata[adata.obs["day"]=="d4"].obsm['X_umap'][:, 0], 'umap_y': adata[adata.obs["day"]=="d4"].obsm['X_umap'][:, 1], 
                        'stage': adata[adata.obs["day"]=="d4"].obs['stage'], 'day': adata[adata.obs["day"]=="d4"].obs['day']}, index = adata[adata.obs["day"]=="d4"].obs.index)




In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '02mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('young', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '18mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('old', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
df_temp = pd.DataFrame({'umap_x': adata[adata.obs["day"]=="d7"].obsm['X_umap'][:, 0], 'umap_y': adata[adata.obs["day"]=="d7"].obsm['X_umap'][:, 1], 
                        'stage': adata[adata.obs["day"]=="d7"].obs['stage'], 'day': adata[adata.obs["day"]=="d7"].obs['day']}, index = adata[adata.obs["day"]=="d7"].obs.index)




In [None]:
import seaborn as sns
fig = plt.figure(figsize = (8*2, 6))
ax = fig.add_subplot(1, 2, 1)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 1, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '02mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('young', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

ax = fig.add_subplot(1, 2, 2)
sns.scatterplot(data = df_temp, x = 'umap_x', y = 'umap_y', s = 0, ax = ax)
sns.kdeplot(data=df_temp[df_temp['stage'] == '18mo'], x="umap_x", y="umap_y",
    fill=True, thresh=0, levels=10, cmap="Purples", ax = ax, cut = 4)
ax.set_xticks([]);
ax.set_yticks([]);
ax.set_title('old', fontsize = 16)
ax.set_xlabel('UMAP-1')
ax.set_ylabel('UMAP-2')
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)

#fig.savefig(outbase + 'Ctrl_DT_kdeplot_endo.png', dpi = 150, bbox_inches = 'tight')

In [None]:
# anndata2ri interconverts AnnData and Single Cell Experiment objects
anndata2ri.activate()
%load_ext rpy2.ipython
#%reload_ext rpy2.ipython

In [None]:
adata.layers['norm_counts'] = adata.X.copy()

In [None]:
adata_milo = sc.AnnData(adata.layers['norm_counts'].copy(), 
                        obs = adata.obs[['stage', 'day', 'cell_type_subset',"sample"]], 
                        var = adata.var)
adata_milo.obsm['X_pca'] = adata.obsm['X_pca']
adata_milo.obsm['X_umap'] = adata.obsm['X_umap']

In [None]:
%%R
library(igraph)

library(miloR)

In [None]:
%%R -i adata_milo
adata_milo

In [None]:
%%R 
myeloid_milo <- Milo(adata_milo)
myeloid_milo

In [None]:
%%R
myeloid_milo

In [None]:
%%R 
myeloid_milo <- buildGraph(myeloid_milo, k=30, d=30, reduced.dim = "PCA")

In [None]:
adata_milo

In [None]:
design_df = adata_milo.obs[['sample',"stage","day",]].copy()
design_df.drop_duplicates(inplace=True)
design_df.index = design_df['sample']
design_df

In [None]:
%%R -i design_df -o DA_results_myeloid
## Define neighbourhoods
myeloid_milo <- makeNhoods(myeloid_milo, prop = 0.1, k = 30, d=30, refined = TRUE, reduced_dims = "PCA")

## Count cells in neighbourhoods
myeloid_milo <- countCells(myeloid_milo, meta.data = data.frame(colData(myeloid_milo)), sample="sample")

## Calculate distances between cells in neighbourhoods
## for spatial FDR correction
myeloid_milo <- calcNhoodDistance(myeloid_milo, d=30, reduced.dim = "PCA")


## Test for differential abundance
DA_results_myeloid <- testNhoods(myeloid_milo, design = ~stage, design.df = design_df)

In [None]:
DA_results_myeloid.head()

In [None]:
plt.plot(DA_results_myeloid.logFC, -np.log10(DA_results_myeloid.SpatialFDR), '.');
plt.xlabel("log-Fold Change");
plt.ylabel("- log10(Spatial FDR)")

In [None]:
%%R
myeloid_milo <- buildNhoodGraph(myeloid_milo)

In [None]:
%%R 
head(DA_results_myeloid)

In [None]:
%%R -w 800 -h 600
alpha_val = 1
library(ggplot2)
p1 <- plotNhoodGraphDA(myeloid_milo, DA_results_myeloid, res_column = 'SpatialFDR', alpha=alpha_val, 
                 layout="UMAP", size_range = c(2, 8), node_stroke =0.8)
p1

In [None]:
%%R -w 800 -h 600
alpha_val = 1
library(ggplot2)
p1 <- plotNhoodGraphDA(myeloid_milo, DA_results_myeloid, res_column = 'logFC', alpha=alpha_val, 
                 layout="UMAP", size_range = c(2, 8), node_stroke =0.8)
p1

In [None]:
%%R
myeloid_milo

In [None]:
%%R 
DA_results_myeloid <- annotateNhoods(myeloid_milo, DA_results_myeloid, coldata_col = 'cell_type_subset')
head(DA_results_myeloid)

In [None]:
%%R
library(ggplot2)
ggplot(DA_results_myeloid, aes(cell_type_subset_fraction)) + geom_histogram(bins=50)

In [None]:
%%R -o DA_results_myeloid
DA_results_myeloid$Celltypes <- ifelse(DA_results_myeloid$cell_type_subset_fraction < 0.8, "Mixed", DA_results_myeloid$cell_type_subset)
head(DA_results_myeloid)

In [None]:
%%R
plotDAbeeswarm(DA_results_myeloid, group.by = "cell_type_subset", alpha = 1)

In [None]:
import matplotlib.colors as mcolors
import matplotlib.cm as cm


for j, item in enumerate(['FDR', 'SpatialFDR', 'PValue']):
    fig = plt.figure(figsize = (8, 12))
    DA_results_myeloid['log_' + item] = -np.log10(DA_results_myeloid[item])
    ax = fig.add_subplot(1, 1, 1)
    plot = sns.stripplot(x='logFC', y="cell_type_subset", hue='log_' + item, data=DA_results_myeloid, size = 6, 
              palette='cividis', 
              jitter=0.2, edgecolor='none', ax = ax)
    plot.get_legend().set_visible(False)
    #ax.set_xticklabels(ax.get_xticks(), fontsize = 18)
    #ax.set_yticklabels(ax.get_yticks(), fontsize = 18)
    ax.tick_params(axis='both', which='major', labelsize=18)
    ax.set_ylabel('T cell subsets', fontsize = 18)
    ax.set_xlabel('logFC', fontsize = 18)
    sns.despine()


    # Drawing the side color bar
    normalize = mcolors.Normalize(vmin=DA_results_myeloid['log_' + item].min(), 
                              vmax=DA_results_myeloid['log_' + item].max())
    colormap = cm.cividis

    for n in DA_results_myeloid['log_' + item]:
        plt.plot(color=colormap(normalize(n)))

    scalarmappaple = cm.ScalarMappable(norm=normalize, cmap=colormap)
    scalarmappaple.set_array(DA_results_myeloid['log_' + item])
    cbar = fig.colorbar(scalarmappaple)
    cbar.ax.set_yticklabels(cbar.ax.get_yticks(), fontsize = 18)
    cbar.ax.set_ylabel('-log10(' + item + ')',  labelpad = 20, rotation=90, fontsize = 18)
    ax.grid(False)
    #fig.savefig(outbase + 'milor_myeloid_swarmplot_colored_by_log_' + item + '.pdf', dpi = 300, 
                #bbox_inches = 'tight')

In [None]:
sc.pl.umap(adata, color="cell_type_subset")

In [None]:
adatad0=adata[adata.obs['day']=='d0']

In [None]:
sc.pl.umap(adatad0,color="stage")

In [None]:
adatad1=adata[adata.obs['day']=='d1']
sc.pl.umap(adatad1,color="stage")

In [None]:
adatad4=adata[adata.obs['day']=='d4']
sc.pl.umap(adatad4,color="stage")

In [None]:
adatad7=adata[adata.obs['day']=='d7']
sc.pl.umap(adatad7,color="stage")