# Combined analysis (mice)

In [None]:
#%matplotlib nbagg
import numpy as np
import matplotlib.pyplot as plt
import scanpy as sc
import pandas as pd
from os import listdir
from os.path import isfile, join
import re
import anndata
import seaborn as sns

plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42

sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()

from matplotlib.colors import LinearSegmentedColormap
cmap = LinearSegmentedColormap.from_list(name='gene_cmap', colors=['lightgrey', 'thistle', 'red', 'darkred']) 

sc.settings.set_figure_params(dpi=80, color_map='viridis', vector_friendly=False,  dpi_save=300)

In [None]:
# EMTAB7365 - Farbehi 2019 Elife
Farbehi1 = sc.read('/nfs01data1/Groups/Wangxiaonan/share/rev_MI/EMTAB7365/write/EMTAB7365_processed.h5ad', cache=True)
# EMTAB7376 - Farbehi 2019 Elife
Farbehi2 = sc.read('/nfs01data1/Groups/Wangxiaonan/share/rev_MI/EMTAB7376/write/EMTAB7376_processed.h5ad', cache=True)
# EMTAB7895 - Forte 2020 CellReports
Forte = sc.read('/nfs01data1/Groups/Wangxiaonan/share/rev_MI/EMTAB7895/write/EMTAB7895_processed.h5ad', cache=True)
# EMTAB9816 - Tombor 2021 NC
Tombor1 = sc.read('/nfs01data1/Groups/Wangxiaonan/share/rev_MI/EMTAB9816/write/EMTAB9816_processed.h5ad', cache=True)
# EMTAB9817 - Tombor 2021 NC
Tombor2 = sc.read('/nfs01data1/Groups/Wangxiaonan/share/rev_MI/EMTAB9817/write/EMTAB9817_processed.h5ad', cache=True)
# GSE102048 - Kretzschmar 2018 PNAS
Kretzschmar = sc.read('/nfs01data1/Groups/Wangxiaonan/share/rev_MI/GSE102048/write/GSE102048_processed.h5ad', cache=True)
# GSE132146 - Ruiz Villalba 2020 Circulation
RuizVillalba = sc.read('/nfs01data1/Groups/Wangxiaonan/share/rev_MI/GSE132146/write/GSE132146_processed.h5ad', cache=True)
# GSE146285 - Molenaar 2021 CommBiology
Molenaar = sc.read('/nfs01data1/Groups/Wangxiaonan/share/rev_MI/GSE146285/write/GSE146285_processed.h5ad', cache=True)
# GSE153480 - Wang 2020 CellReports
Wang = sc.read('/nfs01data1/Groups/Wangxiaonan/share/rev_MI/GSE153480/write/GSE153480_processed.h5ad', cache=True)

In [None]:
Farbehi1 = anndata.AnnData(X=Farbehi1.raw.X, obs=Farbehi1.obs, var=Farbehi1.raw.var, obsm=Farbehi1.obsm)
Farbehi2 = anndata.AnnData(X=Farbehi2.raw.X, obs=Farbehi2.obs, var=Farbehi2.raw.var, obsm=Farbehi2.obsm)
Forte = anndata.AnnData(X=Forte.raw.X, obs=Forte.obs, var=Forte.raw.var, obsm=Forte.obsm)
Tombor1 = anndata.AnnData(X=Tombor1.raw.X, obs=Tombor1.obs, var=Tombor1.raw.var, obsm=Tombor1.obsm)
Tombor2 = anndata.AnnData(X=Tombor2.raw.X, obs=Tombor2.obs, var=Tombor2.raw.var, obsm=Tombor2.obsm)
Kretzschmar = anndata.AnnData(X=Kretzschmar.raw.X, obs=Kretzschmar.obs, var=Kretzschmar.raw.var, obsm=Kretzschmar.obsm)
RuizVillalba = anndata.AnnData(X=RuizVillalba.raw.X, obs=RuizVillalba.obs, var=RuizVillalba.raw.var, obsm=RuizVillalba.obsm)
Molenaar = anndata.AnnData(X=Molenaar.raw.X, obs=Molenaar.obs, var=Molenaar.raw.var, obsm=Molenaar.obsm)
Wang = anndata.AnnData(X=Wang.raw.X, obs=Wang.obs, var=Wang.raw.var, obsm=Wang.obsm)

In [None]:
print('Farbehi1'+str(Farbehi1.shape))
print('Farbehi2'+str(Farbehi2.shape))
print('Forte'+str(Forte.shape))
print('Tombor1'+str(Tombor1.shape))
print('Tombor2'+str(Tombor2.shape))
print('Kretzschmar'+str(Kretzschmar.shape))
print('RuizVillalba'+str(RuizVillalba.shape))
print('Molenaar'+str(Molenaar.shape))
print('Wang'+str(Wang.shape))

In [None]:
adata = [Farbehi1, Farbehi2, Forte, Tombor1, Tombor2, Kretzschmar, RuizVillalba, Molenaar, Wang]

In [None]:
adata[0].var_names = [x.replace('+AC0', '') for x in adata[0].var_names]

In [None]:
adata[0].var_names

In [None]:
# for i in range(len(adata)):
#     adata[i].var_names = [x[0].upper()+x[1:].lower() for x in adata[i].var_names] 

In [None]:
for i in range(len(adata)):
    print(adata[i].shape)
    adata[i].var_names_make_unique()

In [None]:
#combine and save the raw counts
adata1 = anndata.AnnData.concatenate(*adata,join='outer')

In [None]:
adata1.shape

In [None]:
adata1.obs.to_csv('adata_all.csv')

In [None]:
adata1.write('./Analysis_yj/write/combined_org.h5ad')

# Integrate data

In [None]:
adata = sc.read('./Analysis_yj/write/combined_org.h5ad',cache=True)

In [None]:
adata = anndata.AnnData(X=adata.raw.X,obs=adata.obs,var=adata.raw.var,obsm=adata.obsm)

In [None]:
print(adata.obs_keys())

In [None]:
sc.external.pp.harmony_integrate(adata, key='Index', adjusted_basis = 'X_pca_harmony_Index')

In [None]:
'X_pca_harmony_Index' in adata.obsm

In [None]:
sc.pp.neighbors(adata, use_rep = 'X_pca_harmony_Index')

In [None]:
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color= ['Condition', 'Day', 'Age', 'StudyID'], wspace=0.3)

In [None]:
adata.write('../../rev_MI/Analysis_mouse/WithCyclings/Outer/combined_outer.h5ad')

# Harmony - hvg all

In [None]:
adata.raw = adata

In [None]:
np.min(adata.X)

In [None]:
np.max(adata.X)

In [None]:
# find highly variable genes
sc.pp.highly_variable_genes(
    adata, min_mean=0.02, max_mean=3, min_disp=0.3, batch_key='Index', inplace=True)

In [None]:
sc.pl.highly_variable_genes(adata)

In [None]:
print(np.sum(adata.var.highly_variable))

In [None]:
adata = adata[:, adata.var.highly_variable].copy()

In [None]:
# scale for pca
sc.pp.scale(adata)
# pca
sc.tl.pca(adata, svd_solver='arpack')

In [None]:
sc.pl.pca_variance_ratio(adata, log=True)

In [None]:
sc.external.pp.harmony_integrate(adata, key='Index', adjusted_basis = 'X_pca_harmony_Index_combined')

In [None]:
sc.pp.neighbors(adata, use_rep = 'X_pca_harmony_Index_combined')

In [None]:
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color= ['Condition', 'Day', 'Age', 'StudyID'], wspace=0.3)

In [None]:
adata.write('../../rev_MI/Analysis_mouse/WithCyclings/Outer/combined_outer_overall.h5ad')

In [None]:
adata = sc.read('../../rev_MI/Analysis_mouse/WithCyclings/Outer/combined_outer_overall.h5ad')

In [None]:
sc.tl.leiden(adata, resolution=0.5, key_added='Leiden_v1')

In [None]:
sc.pl.umap(adata, color=['Leiden_v1'], legend_loc='on data',legend_fontsize=8)

In [None]:
adata.obs.Leiden_v1.value_counts()

In [None]:
sc.pl.umap(adata, color='phase')

In [None]:
ct = {
    'Endo': ['Fabp4', 'Pecam1'],
    'EndoI': ['Ly6a', 'Sox17'],
    'EndoII': ['Sema3g', 'Efnb2', 'Sox17', 'Hey1'],
    'EndoIII': ['Nr2f2', 'Vwf'],
    'SMC': ['Rgs5', 'Vtn', 'Kcnj8', 'Cspg4', 'Pdgfrb'],
    'Schwann': ['Plp1', 'Kcna1'],
    'Gran': ['S100a8'],
    'MonoI': ['Plac8'],
    'MonoII': ['Saa3', 'Arg1'],
    'Macro': ['C1qa', 'Cd68', 'Itgam'],
    'DC': ['Cd74', 'Itgam'],
    'NK': ['Ms4a4b'],
    'T': ['Cd3e', 'Cd3d', 'Lef1'],
    'CD8T': ['Cd8a'],
    'FibroI': ['Col1a1', 'Gsn', 'Dcn'],
    'FibroII': ['Wif1', 'Dkk3'],
    'FibroIII': ['Mt2', 'Timp1'],
    'MyoF': ['Cthrc1', 'Acta2', 'Postn'],
    'Epi': ['Clu', 'Dmkn'],
    'CM': ['Myh6', 'Actc1']
}

In [None]:
sc.pl.dotplot(adata, groupby='Leiden_v1', var_names=ct, cmap=cmap, swap_axes=True, standard_scale='var',dendrogram=True,
             save='Mice_markers.pdf')

In [None]:
sc.pl.dotplot(adata, groupby='Leiden_v1', var_names=ct, cmap=cmap, swap_axes=True, standard_scale='var')

In [None]:
sc.tl.rank_genes_groups(adata, groupby='Leiden_v1', n_genes=adata.raw.shape[1], method='t-test', key_added='DE_Leiden_v1')

In [None]:
adata.write('../../rev_MI/Analysis_mouse/WithCyclings/Outer/combined_outer_overall.h5ad')

# Define cell type

In [None]:
adata = sc.read('../../rev_MI/Analysis_mouse/WithCyclings/Outer/combined_outer_overall.h5ad')

In [None]:
tochange = {
    '0': 'Fibro',
    '1': 'Fibro',
    '2': 'Myeloid',
    '3': 'Fibro',
    '4': 'Fibro',
    '5': 'Fibro',
    '6': 'CM',
    '7': 'Lympho',
    '8': 'Lympho',
    '9': 'Myeloid',
    '10': 'SMC',
    '11': 'Cycling',
    '12': 'Fibro',
    '13': 'Myeloid',
    '14': 'CM',
    '15': 'Fibro',
    '16': 'Endo',
    '17': 'Epi',
    '18': 'Endo',
    '19': 'Schwann',
    '20': 'CM',
    '21': 'Fibro',
    '22': 'Unknown',
    '23': 'Unknown',
}


In [None]:
adata.obs['CellType_v1'] = [tochange[x] for x in adata.obs.Leiden_v1]

In [None]:
fig, ax = plt.subplots(figsize=(6,6))
sc.pl.umap(adata, color=['CellType_v1'], legend_fontsize='small', legend_loc='on data', ax=ax, show=False, title='All Cells')
plt.savefig('./figures/Mice_All.png', bbox_inches = 'tight', dpi=300)

In [None]:
adata.write('../../rev_MI/Analysis_mouse/WithCyclings/Outer/combined_outer_overall.h5ad')

# Split myeloid

In [None]:
adata = sc.read('../../rev_MI/Analysis_mouse/WithCyclings/Outer/combined_outer_overall.h5ad')

In [None]:
myeloid = adata[adata.obs'CellType_v1'=='Myeloid']

In [None]:
myeloid.shape

In [None]:
myeloid.write('./WithCyclings/Outer/Myeloid.h5ad')