In [None]:
from anndata import AnnData
import scanpy as sc
import squidpy as sq
import numba
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats
from anndata import read_h5ad

In [None]:
plt.ion()
sc.settings.set_figure_params(dpi=200, fontsize=10)

if you have ready .h5ad files from individual weeks you can concatenate them. We first performed individual clustering and remove the artefacts. Then merged them in this notebook.

In [None]:
week6 = read_h5ad('6week_adata.h5ad')
week8_5 = read_h5ad('8.5week_adata.h5ad')
week11 = read_h5ad('11week_adata.h5ad')
week12 = read_h5ad('12week_adata.h5ad')
week13 = read_h5ad('13week_adata.h5ad')

In [None]:
adata = week6.concatenate(week8_5,week11,week12,week13)

# Clustering

batch correction with Combat

In [None]:
sc.pp.combat(adata, key='age')

In [None]:
sc.tl.pca(adata)

In [None]:
sc.pp.neighbors(adata,n_neighbors= 10)

In [None]:
sc.tl.umap(adata,min_dist= 0.1, spread= 0.3, negative_sample_rate= 4)

In [None]:
sc.tl.leiden(adata,resolution=1)

In [None]:
sc.pl.umap(adata, color='leiden')


In [None]:
sc.pl.umap(adata, color='age')


In [None]:
adata.uns['annotation_colors'] = ['#e6194b', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#e6beff', '#bcf60c', '#fabebe', '#008080','#a9a9a9']

In [None]:
sc.pl.umap(adata, color='annotation')

In [None]:
t=adata.X
pd.DataFrame(data=t, index=adata.obs_names, columns=adata.var_names).to_csv('batchcorrected_combat.csv')

Plotting marker expressions in each cluster with a dot plot

In [None]:
sc.tl.dendrogram(adata,groupby='leiden')
adata.obs.leiden= adata.obs.leiden.astype(str)
markers = adata.var.index
sc.pl.dotplot(adata, markers,groupby= 'leiden', dendrogram=True, size_title=None, save='batch_dot_plot_unannotated.pdf')

In [None]:
#resolution=1 annotations
old_to_new = {
    '0':'Endothelial',
    '1':'Endothelial',
    '2':'Endothelial',
    '3':'Mesenchymal',
    '4':'SOX9 high epit.',
    '5':'SOX2 high epit.',
    '6':'ASM',
    '7':'Immune',
    '8':'Mesenchymal',
    '9':'Mesenchymal',
    '10':'Endothelial',
    '11':'SOX2 high epit.',
    '12':'Mesenchymal',
    '13':'Mesenchymal',
    '14':'SOX2 high epit.',
    '15':'Ki67+ mes.',
    '16':'ASM',
    '17':'Mesenchymal',
    '18':'SOX9 high epit.',
    '19':'Lymp.endo.',
    '20':'Mesenchymal',
    '21':'Mesenchymal',
    '22':'Endothelial',
    '23':'SOX9 high epit.',
    '24':'Neuronal',
    '25':'Mesenchymal',
    '26':'Mesenchymal',
    '27':'ASM',
    '28':'Chondroblast',
    '29':'VSM',
    '30':'SOX2 high epit.',
    '31':'VSM',
    '32':'Immune',
    '33':'Mesenchymal',
    '34':'SOX9 high epit.',
    '35':'Mesenchymal',
    '36':'SOX9 high epit.',
    '37':'Mesenchymal'

}
adata.obs['annotation'] = adata.obs['leiden'].map(old_to_new)

In [None]:
sc.pl.umap(adata, color='annotation', save='batchcorrected.pdf')

percentages for the cell types

In [None]:
data = {
    'Cell Type': ['Mesenchymal', 'Endothelial', 'SOX2 high epit.', 'SOX9 high epit.', 
                  'ASM', 'Immune', 'Ki67+ mes.', 'Lymp.endo.', 'Neuronal', 'VSM', 'Chondroblast'],
    'Count': [281153, 226747, 120243, 89328, 84389, 44208, 32013, 19781, 10482, 6967, 6260]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Calculate the total count
total_count = df['Count'].sum()

# Calculate percentages
df['Percentage'] = (df['Count'] / total_count) * 100

# Assuming color hex codes are provided, here's an example of how they might look
colors = ['#e6beff','#f58231','#fabebe','#008080','#e6194b','#911eb4','#46f0f0','#f032e6','#bcf60c','#a9a9a9','#4363d8']

# Creating a bar plot with specific colors
plt.figure(figsize=(12, 8))
bars = plt.bar(df['Cell Type'], df['Percentage'], color=colors)
plt.grid(False)
# Adding labels and title
plt.xlabel('Cell Type')
plt.ylabel('Percentage')
plt.title('Percentage of Each Cell Type')
plt.xticks(rotation=45)  # Rotate labels to fit them better

# Show the plot
plt.savefig('percentages of the cell types.pdf')
plt.show()

In [None]:
adata.obs.leiden= adata.obs.leiden.astype(str)
markers = adata.var.index
sc.pl.dotplot(adata, markers,groupby= 'annotation', dendrogram=True, size_title=None,save='batch_dotplot.pdf')

In [None]:
sc.pl.embedding_density(adata, groupby='age',save='batch_age_density.pdf')

In [None]:
adata.write_h5ad('090524_batchcorr.h5ad')