In [None]:
# Import dependencies
%matplotlib inline
import os
import numpy as np
import scanpy as sc
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import anndata as ad
import warnings
import cellhint 

# Ignore all warnings
warnings.simplefilter("ignore")

matplotlib.rcParams['font.family'] = 'sans-serif'

# Initialize random seed
import random
random.seed(111)

# Print date and time:
import datetime
e = datetime.datetime.now()
print ("Current date and time = %s" % e)

# wdir = "/ceph/project/tendonhca/akurjan/analysis/"
wdir = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/"
os.chdir( wdir )

# folder structures
INPUT_FOLDERNAME = "adult/integration/results/"
RESULTS_FOLDERNAME = "adult/annotation/results/"
FIGURES_FOLDERNAME = "adult/annotation/figures/"

if not os.path.exists(RESULTS_FOLDERNAME):
    os.makedirs(RESULTS_FOLDERNAME)
if not os.path.exists(FIGURES_FOLDERNAME):
    os.makedirs(FIGURES_FOLDERNAME)

# Set folder for saving figures into
sc.settings.figdir = FIGURES_FOLDERNAME

def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.savefig(os.path.join(folder, fname), format='svg')

def plot_umaps(anndata, parameters: list, filename: str):
    n_plots = len(parameters)
    fig, axs = plt.subplots(n_plots, 1, figsize=(10, 4*n_plots))
    for i, param in enumerate(parameters):
        sc.pl.umap(anndata, color=param, ax=axs[i], show=False, frameon=False, s=2)
        axs[i].set_title(param)
    plt.tight_layout()
    savesvg(filename, fig)
    plt.show()    
    
# Set other settings
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.set_figure_params(dpi=150, fontsize=10, dpi_save=600)

In [None]:
adata = sc.read_h5ad(os.path.join(INPUT_FOLDERNAME, 'adultdev_combined_scANVI.h5ad'))
adata.var_names_make_unique()
adata

In [None]:
print(adata.X[0:10,0:10])

In [None]:
adata = adata.raw.to_adata()
sc.pp.filter_genes(adata, min_counts=50, inplace=True)

In [None]:
print(adata.X[0:10,0:10])
adata.layers["counts"] = adata.X.copy()

In [None]:
sc.pp.normalize_total(adata, target_sum=None, inplace=True)

In [None]:
print(adata.X[0:10,0:10])

In [None]:
sc.pp.log1p(adata)
print(adata.X[0:10, 0:10])
adata.layers["log1p_norm"] = adata.X.copy()

In [None]:
sc.tl.leiden(adata, resolution=0.6, restrict_to=('group', ['Adult']))

In [None]:
sc.pl.umap(adata, color=["leiden_R", "tendon_status", "annotations_orig_full"], 
           legend_loc="on data", frameon=False, legend_fontsize=5)

In [None]:
for cluster in ['Adult,13', 'Adult,2', 'Adult,1', 'Adult,9', 'Adult,19', 'Adult,14']:
    adata.obs['leiden_R'] = np.where(
        adata.obs['leiden_R'] == cluster, 'AdultFibros', adata.obs['leiden_R']
    )
sc.pl.umap(adata, color=["leiden_R", "annotations_orig_full"], 
           legend_loc="on data", frameon=False, legend_fontsize=5)

In [None]:
adata.obs['leiden_fibros'] = adata.obs['leiden_R'].copy()

In [None]:
sc.tl.leiden(adata, resolution=0.1, restrict_to=('leiden_fibros', ['AdultFibros']), key_added='leiden_fibros')

In [None]:
sc.pl.umap(adata, color=["leiden_fibros", "annotations_orig_full"], 
           legend_loc="on data", frameon=False, legend_fontsize=5)

In [None]:
counts = adata.obs['leiden_fibros'].value_counts()
groups_to_remove = counts[counts < 40].index
adata = adata[~adata.obs['leiden_fibros'].isin(groups_to_remove)]
print(adata.obs['leiden_fibros'].value_counts())

In [None]:
adata.obs['leiden_06'] = adata.obs['leiden_fibros'].copy()
adata.obs['annotations_new'] = adata.obs['annotations_orig_full'].copy()

In [None]:
adata.obs['leiden_fibros'] = adata.obs['leiden_fibros'].astype(str)
adata.obs['annotations_new'] = adata.obs['annotations_new'].astype(str)

mask = ~adata.obs['annotations_new'].str.startswith('Adult_')
filtered_annotations = adata.obs.loc[mask, 'annotations_new']

adata.obs['leiden_fibros'].update(adata.obs.loc[mask, 'annotations_new'])
print(adata.obs['leiden_fibros'].value_counts())

In [None]:
sc.pl.umap(adata, color=["leiden_fibros", "tendon_status", "annotations_orig_full"], 
           legend_loc="on data", frameon=False, legend_fontsize=6)

In [None]:
sc.tl.rank_genes_groups(adata, groupby='leiden_fibros', method='wilcoxon', key_added='wilcoxon_06', 
                        use_raw=False, layer='log1p_norm')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, key='wilcoxon_06')

In [None]:
sc.pl.umap(adata, color=['leiden_fibros', 'annotations_orig_full'], legend_loc='on data', frameon=False, legend_fontsize=4)

In [None]:
canonical_TSPC = ['ENG', 'THY1', 'CD44', 'NES', 'MCAM']
sheath_TSPC = ['TPPP3', 'PDGFRA', 'ACTA2']
injury_TSPC = ['AXIN2', 'GLAST']

foetalmarkers = {
'ABI3BP fibroblasts': ['ABI3BP', 'GAS2', 'SCX', 'MKX', 'SOX5', 'COL1A1', 'THBS2', 'TNMD', 'KERA', 'FMOD', 'ACAN', 'COMP', 'COL5A1', 'COL6A1', 'COL11A1', 'COL11A2', 'COL12A1', 'COL14A1'],
'COL6A6/COL3A1 fibroblasts': ['COL6A6', 'FNDC1', 'TSHZ2', 'PLAGL1', 'DCLK1', 'COL3A1', 'COL1A1', 'COL6A6', 'VCAN', 'FBN1', 'THBS2', 'COL5A1', 'COL6A1', 'COL11A1', 'COL11A2', 'COL12A1', 'COL14A1', 'TPPP3', 'PDGFRA', 'LUM'],
'FGF14 fibroblasts': ['FGF14', 'THBS4', 'COL1A1', 'COL3A1', 'COL4A1', 'COL5A1', 'COL6A1', 'COL11A1', 'COL11A2', 'COL12A1', 'EBF1', 'TSHZ2', 'DLK1', 'BMP5', 'SLIT3', 'FSTL5', 'CDH12', 'COL22A1'],
'NEGR1 fibroblasts': ['NEGR1', 'SCN7A', 'TNXB', 'LUM', 'VCAN', 'COL1A1', 'COL4A1', 'COL22A1', 'NOVA1', 'NAV3', 'SEMA5A'],
'canonical TSPCs': canonical_TSPC,
'sheath TSPCs': sheath_TSPC,
'injury TSPCs': injury_TSPC
}

foetalmarkers_filtered = {}

all_genes_in_adata = set(adata.var_names)

for cell_type, genes in foetalmarkers.items():
    filtered_genes = [gene for gene in genes if gene in all_genes_in_adata]
    foetalmarkers_filtered[cell_type] = filtered_genes  

In [None]:
sc.tl.dendrogram(adata, groupby='leiden_fibros', use_rep='X_scANVI')

In [None]:
#sc.pp.scale(adata)
#print(adata.X[0:5,0:5])
#adata.layers['scaled'] = adata.X.copy()

In [None]:
name = "combineddata"

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered['ABI3BP fibroblasts'], 'leiden_fibros', dendrogram=True,
              use_raw=False, layer="log1p_norm", 
              #vmin=-2, vmax=2, 
              cmap='Reds', figsize=(8,14),
              title='ABI3BP Foetal Fibroblast Markers',
              save=f'{name}_annotation_abi3bp.svg')

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered['COL6A6/COL3A1 fibroblasts'], 'leiden_fibros', dendrogram=True,
              use_raw=False, layer="log1p_norm", #vmin=-2, vmax=2, 
              cmap='Reds', figsize=(8,14),
              title='COL3/COL6 Foetal Fibroblast Markers',
              save=f'{name}_annotation_col3col6.svg')

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered['FGF14 fibroblasts'], 'leiden_fibros', dendrogram=True,
              use_raw=False, layer="log1p_norm", #vmin=-2, vmax=2, 
              cmap='Reds', figsize=(8,14),
             title='FGF14 Foetal Fibroblast Markers',
             save=f'{name}_annotation_fgf14.svg')

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered['NEGR1 fibroblasts'], 'leiden_fibros', dendrogram=True,
              use_raw=False, layer="log1p_norm", #vmin=-2, vmax=2, 
              cmap='Reds', figsize=(8,14),
             title='NEGR1 Foetal Fibroblast Markers',
             save=f'{name}_annotation_negr1.svg')

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered, 'leiden_fibros', dendrogram=True,
              use_raw=False, layer="log1p_norm", #vmin=-2, vmax=2, 
              cmap='Reds', figsize=(25,14),
              save=f'{name}_annotation_full.svg'
              )

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered, 'leiden_fibros', dendrogram=True,
             use_raw=False, layer="log1p_norm", vmin=0, vmax=5, cmap='Reds', figsize=(25,14),
             save=f'{name}_annotation_full_normalised.svg'
             )

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered, 'annotations_upd2', dendrogram=True,
             use_raw=False, layer="log1p_norm", vmin=0, vmax=5, cmap='Reds', figsize=(25,14),
             save=f'{name}_annotation_upd_normalised.svg'
             )

In [None]:
result = adata.uns['wilcoxon_06']
groups = result['names'].dtype.names
df = pd.DataFrame(
    {group + '_' + key: result[key][group]
    for group in groups 
    for key in ['names','scores','logfoldchanges', 'pvals', 'pvals_adj']})
df.to_csv(os.path.join(RESULTS_FOLDERNAME, 'fulldata_wilcoxon_DGE_leiden_06adult_01fibros.csv'))

In [None]:
df

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'adultdev_combined_scANVI.h5ad'))

In [None]:
data = pd.read_csv(os.path.join(RESULTS_FOLDERNAME, "Barcodes_and_celltypes_for_Alina.csv"), index_col='barcodes')
data

In [None]:
sc.tl.score_genes(adata, gene_list=foetalmarkers_filtered['ABI3BP fibroblasts'], 
                  score_name='abi3bp_marker_score', use_raw=False,
                  ctrl_size=len(foetalmarkers_filtered['ABI3BP fibroblasts']), n_bins=50,
                  random_state=1)

sc.tl.score_genes(adata, gene_list=foetalmarkers_filtered['COL6A6/COL3A1 fibroblasts'], 
                  score_name='col6a6/col3a1_marker_score', use_raw=False,
                  ctrl_size=len(foetalmarkers_filtered['COL6A6/COL3A1 fibroblasts']), n_bins=50, 
                  random_state=1)

sc.tl.score_genes(adata, gene_list=foetalmarkers_filtered['FGF14 fibroblasts'], 
                  score_name='fgf14_marker_score', use_raw=False,
                  ctrl_size=len(foetalmarkers_filtered['FGF14 fibroblasts']), n_bins=50,
                  random_state=1)

sc.tl.score_genes(adata, gene_list=foetalmarkers_filtered['NEGR1 fibroblasts'], 
                  score_name='negr1_marker_score', use_raw=False,
                  ctrl_size=len(foetalmarkers_filtered['NEGR1 fibroblasts']), n_bins=50,
                  random_state=1)

In [None]:
sc.pl.umap(adata, color=['abi3bp_marker_score', 'col6a6/col3a1_marker_score', 'fgf14_marker_score', 'negr1_marker_score'], 
           frameon=False,
           sort_order=False,
           cmap="bwr", vmax="p95", vcenter=0,
           save=f'{name}_marker_scores_95perc.svg'
          )

In [None]:
sc.pl.umap(adata, color=['abi3bp_marker_score', 'col6a6/col3a1_marker_score', 'fgf14_marker_score', 'negr1_marker_score'], 
           frameon=False,
           sort_order=False,
           cmap="bwr", vmax="p99", vcenter=0,
           save=f'{name}_marker_scores_99perc.svg'
          )

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'adultdev_combined_scANVI.h5ad'))

In [None]:
del adata.raw

In [None]:
foetal = sc.read_h5ad(os.path.join('foetal/results/scVI/dev_scANVI.h5ad'))
foetal.obs['C_scANVI_orig'] = foetal.obs['C_scANVI']

foetal.obs['C_scANVI'] = np.where(
    foetal.obs['C_scANVI'] == 'ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts', foetal.obs['C_scANVI']
)
foetal.obs['C_scANVI'] = np.where(
    foetal.obs['C_scANVI'] == 'ABI3BP GAS2 Fibroblasts 2', 'ABI3BP GAS2 Fibroblasts', foetal.obs['C_scANVI']
)
foetal

In [None]:
sc.tl.rank_genes_groups(foetal, groupby='C_scANVI', method='wilcoxon', key_added='wilcoxon_scanvi', 
                        use_raw=False, layer='log1p_norm')
sc.pl.rank_genes_groups(foetal, n_genes=25, sharey=False, key='wilcoxon_scanvi')

In [None]:
categories = adata.obs['annotations_upd'].cat.categories
adult_categories = [cat for cat in categories if cat.startswith('Adult_')]
adult_categories

In [None]:
foetal_uns_dict = foetal.uns['wilcoxon_scanvi']
foetal_ct = foetal.obs['C_scANVI']

In [None]:
del foetal

In [None]:
dev_categories = [cat for cat in categories if cat not in adult_categories]
dev_categories

In [None]:
foetal_categories = ['ABI3BP GAS2 Fibroblasts',
 'COL3A1 PI16 Fibroblasts',
 'COL6A6 FNDC1 Fibroblasts',
 'Chondrocytes',
 'FGF14 THBS4 Fibroblasts',
 'Immune Cells',
 'NEGR1 SCN7A Fibroblasts',
 'Nervous System Cells',
 'Satellite Cells',
 'Skeletal Myocytes',
 'Smooth Myocytes',
 'lymEndothelial Cells',
 'vasEndothelial Cells']

In [None]:
sc.tl.rank_genes_groups(adata, groupby='annotations_upd', 
                        groups=foetal_categories,
                        method='wilcoxon', key_added='wilcoxon_dev', 
                        use_raw=False, layer='log1p_norm')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, key='wilcoxon_dev')

In [None]:
result = adata.uns['wilcoxon_dev']
groups = result['names'].dtype.names
df = pd.DataFrame(
    {group + '_' + key: result[key][group]
    for group in groups 
    for key in ['names','scores','logfoldchanges', 'pvals', 'pvals_adj']})
df.to_csv(os.path.join(RESULTS_FOLDERNAME, 'devonly_wilcoxon_DGE.csv'))

In [None]:
#adata.uns['log1p']['base'] = None
sc.tl.rank_genes_groups(adata, groupby='annotations_upd', 
                        groups=adult_categories,
                        method='wilcoxon', key_added='wilcoxon_adult', 
                        use_raw=False, layer='log1p_norm')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, key='wilcoxon_adult')

In [None]:
result = adata.uns['wilcoxon_adult']
groups = result['names'].dtype.names
df = pd.DataFrame(
    {group + '_' + key: result[key][group]
    for group in groups 
    for key in ['names','scores','logfoldchanges', 'pvals', 'pvals_adj']})
df.to_csv(os.path.join(RESULTS_FOLDERNAME, 'adultonly_wilcoxon_DGE.csv'))

In [None]:
foetal_markers_dict_2 = {}
for cell_type in foetal_categories:
    top_degs = adata.uns['wilcoxon_dev']['names'][cell_type][0:50]
    foetal_markers_dict_2[cell_type] = top_degs

cell_annotation_norm = sc.tl.marker_gene_overlap(adata, foetal_markers_dict_2, key='wilcoxon_adult', normalize='reference')

fig, ax = plt.subplots(figsize=(12,14))
sns.heatmap(cell_annotation_norm.T, linewidths=0.5, linecolor='white', cbar=True, annot=True, ax=ax)
plt.grid(False)
savesvg('onefile_Foetal_vs_Adult_similaritymatrix_50degs.svg', plt)
plt.show()

In [None]:
g = sns.clustermap(cell_annotation_norm, figsize=(13, 8), 
                   linewidths=0.5, linecolor='white', cbar=True, annot=False,
                   cmap='Reds', 
                   cbar_kws={'label': 'Overlap Proportion', 'shrink': 0.9, 'use_gridspec': False, 'location': "left"})
g.cax.set_position([0.035, 0.43, .02, 0.2]) # [left, bottom, width, height]
g.cax.yaxis.set_ticks_position('right')
g.ax_heatmap.set_xlabel('Adult cell types')
g.ax_heatmap.set_ylabel('Foetal cell types') 
g.ax_heatmap.grid(False)  
g.cax.grid(False)  
savesvg('onefile_Foetal_vs_Adult_similaritymatrix_clustered_50degs.svg', g)
plt.show()

In [None]:
adata.X.max()

In [None]:
ms_list = []
for cell_type, genes in foetal_markers_dict_2.items():
    sc.tl.score_genes(adata, gene_list=genes, use_raw=False,
                      score_name=f'{cell_type}_ms', 
                      ctrl_size=len(genes), n_bins=50,
                      random_state=1)
    ms_list.append(f'{cell_type}_ms')
ms_list

In [None]:
ms_list.append('grouptype')

In [None]:
sc.pl.umap(adata, color=ms_list,
           frameon=False,
           sort_order=False,
           cmap="bwr", vmax="p95", vcenter=0, ncols=5,
           save='_foetalct_marker_scores_95perc_50degs.svg'
          )

In [None]:
sc.pl.umap(adata, color=ms_list,
           frameon=False,
           sort_order=False,
           cmap="bwr", vmax="p99", vcenter=0, ncols=4,
           save='_foetalct_marker_scores_99perc_50degs4cols.svg'
          )

In [None]:
all_annotations = {}
if 'annotations_orig_full_colors' in adata.uns:
    cell_types = adata.obs['annotations_orig_full'].cat.categories
    colors = adata.uns['annotations_orig_full_colors']
    for cell_type, color in zip(cell_types, colors):
        all_annotations[cell_type] = color
        #print(f"'{cell_type}': '{color}',")
else:
    print("Color palette for 'annotations_orig' not found. Run a plot first.")


group_annotations = adata.obs[adata.obs['group'] == 'Foetal']['annotations_orig_full']
unique_annotations = list(pd.unique(group_annotations))
highlighted_clusters = {annotation: all_annotations[annotation] for annotation in unique_annotations 
                        if annotation in all_annotations}
unique_clusters = adata.obs['annotations_orig_full'].cat.categories
color_palette = [highlighted_clusters.get(cluster, 'lightgray') for cluster in unique_clusters]

In [None]:
sc.pl.umap(adata, color=['annotations_orig_full'], s=1,
           legend_loc='on data', legend_fontsize=3, palette=color_palette,
           frameon=False, save='_scANVI_foetalct_annotated.svg'
          )

In [None]:
ms_list2

In [None]:
data = pd.DataFrame(adata.obs[['annotations_orig_full', 'age', 'tendon_status', 'ABI3BP GAS2 Fibroblasts_ms',
 'COL3A1 PI16 Fibroblasts_ms', 'COL6A6 FNDC1 Fibroblasts_ms', 'FGF14 THBS4 Fibroblasts_ms', 'NEGR1 SCN7A Fibroblasts_ms']])
data

In [None]:
data['annotations_orig_full'] = data['annotations_orig_full'].astype(str).str.strip()
data['tendon_status'] = data['tendon_status'].astype(str).str.strip()
data['age'] = data['age'].astype(str).str.strip()
print(data.dtypes)

In [None]:
data = data.sort_values(by=['annotations_orig_full', 'age', 'tendon_status'])

aggregated_data = data.groupby(['annotations_orig_full', 'age', 'tendon_status']).mean()
aggregated_data

In [None]:
ms_columns = ['ABI3BP GAS2 Fibroblasts_ms', 'COL3A1 PI16 Fibroblasts_ms', 
              'COL6A6 FNDC1 Fibroblasts_ms', 'FGF14 THBS4 Fibroblasts_ms', 
              'NEGR1 SCN7A Fibroblasts_ms']

# Create a dictionary to specify aggregation functions for each column
agg_funcs = {col: ['mean', 'max', 'min', 'count'] for col in ms_columns}

# Group by annotations, age, and tendon status, then aggregate
aggregated_data = data.groupby(['annotations_orig_full', 'age', 'tendon_status']).agg(agg_funcs)
aggregated_data

In [None]:
aggregated_data.loc['Adult_Quad_Osteoblasts']

In [None]:
aggregated_data.loc['Adult_Quad_Dividing fibroblasts / mural cells']

In [None]:
aggregated_data.to_csv(os.path.join(RESULTS_FOLDERNAME, 'ct_data_summaryofmsscores.csv'))

In [None]:
sc.pl.umap(adata, color='annotations_orig_full')

In [None]:
sc.pl.umap(adata, color=['C_scANVI'], legend_loc='on data',
          legend_fontsize=5, frameon=False)

In [None]:
adata

In [None]:
data = pd.read_csv(os.path.join(RESULTS_FOLDERNAME, "Barcodes_and_celltypes_for_Alina.csv"), index_col='barcodes')
data

In [None]:
data['cluster_id'] = 'Adult_Quad' +'_'+ data['cluster_id'].astype(str)
data

In [None]:
adata.obs['annotations_upd'] = adata.obs['annotations_orig_full'].copy()

barcode_to_cluster_id = data['cluster_id'].to_dict()

def update_annotation(barcode):
    if barcode in barcode_to_cluster_id:
        return barcode_to_cluster_id[barcode]
    else:
        return adata.obs.loc[barcode, 'annotations_upd']

# Apply the function to the annotations column
adata.obs['annotations_upd'] = adata.obs.index.map(update_annotation)

adata.obs['annotations_upd'].value_counts()

In [None]:
sc.pl.umap(adata, color=['annotations_upd'], legend_loc='right margin',
          legend_fontsize=4, frameon=False)

In [None]:
plt.figure(figsize=(30, 35))
sc.tl.dendrogram(adata, 'annotations_upd', use_rep='X_scANVI')
ax_list = sc.pl.correlation_matrix(adata, 'annotations_upd', cmap='PuOr_r', show=False)
for ax in ax_list:
    ax.grid(False)
plt.savefig(os.path.join(FIGURES_FOLDERNAME,'annotation_upd_correlation.svg'), bbox_inches='tight')
plt.show()

In [None]:
adata.obs['annotations_lineages'] = adata.obs.annotations_orig_full.copy()

lineages = {
    "MSC Precursors": "Mesenchymal: stem",
    "COL6A6 FSTL1 DCLK1 Progenitors": "Mesenchymal: tenogenic",
    "SOX5 CREB5 Chondrocyte Progenitors": "Mesenchymal: chondrogenic/tenogenic",
    "Embryonic Chondrocytes": "Mesenchymal: chondrogenic",
    "RUNX2 THBS2 COL11A1 Progenitors": "Mesenchymal: chondrogenic/tenogenic",
    "SCX FGF14 THBS4 FSTL5 Progenitors": "Mesenchymal: tenogenic",
    "MKX TNMD ABI3BP GAS2 Progenitors": "Mesenchymal: tenogenic",
    "ABI3BP GAS2 Fibroblasts": "Mesenchymal: tenogenic",
    "COL6A6 FNDC1 Fibroblasts": "Mesenchymal: tenogenic",
    "COL3A1 PI16 Fibroblasts": "Mesenchymal: tenogenic",
    "vasEndothelial Cells": "Endothelial",
    "NEGR1 SCN7A Fibroblasts": "Mesenchymal: myogenic/tenogenic",
    "Smooth Myocytes": "Mesenchymal: myogenic",
    "Chondrocytes": "Mesenchymal: chondrogenic",
    "Immune Cells": "Immune Cells: myeloid",
    "FGF14 THBS4 Fibroblasts": "Mesenchymal: tenogenic",
    "Satellite Cells": "Mesenchymal: myogenic",
    "Skeletal Myocytes": "Mesenchymal: myogenic",
    "Nervous System Cells": "Neural",
    "lymEndothelial Cells": "Endothelial",
    "Adult_Ach_Adipocytes": "Mesenchymal: adipogenic",
    "Adult_Ach_Mural cells": "Mesenchymal: mural",
    "Adult_Ach_ITGA10hi Fibroblasts": "Mesenchymal: tenogenic",
    "Adult_Ach_Vascular endothelial cells": "Endothelial",
    "Adult_Ach_NEGR1hi Fibroblasts": "Mesenchymal: tenogenic",
    "Adult_Ach_Macrophages": "Immune Cells: myeloid",
    "Adult_Ach_T cells": "Immune Cells: lymphoid",
    "Adult_Ach_Lymphatic endothelial cells": "Endothelial",
    "Adult_Ach_Slow-twitch skeletal muscle cells": "Mesenchymal: myogenic",
    "Adult_Ach_Nervous system cells": "Neural",
    "Adult_Ach_Fast-twitch skeletal muscle cells": "Mesenchymal: myogenic",
    "Adult_Ach_Granulocytes": "Immune Cells: myeloid",
    "Adult_Ach_Satellite cells": "Mesenchymal: myogenic",
    "Adult_Ach_Transitional skeletal muscle cells": "Mesenchymal: myogenic",
    "Adult_Ach_B cells": "Immune Cells: lymphoid",
    "Adult_Quad_Dividing macrophages": "Immune Cells: myeloid",
    "Adult_Quad_Macrophages": "Immune Cells: myeloid",
    "Adult_Quad_Vascular endothelial cells": "Endothelial",
    "Adult_Quad_Fibroblasts": "Mesenchymal: tenogenic",
    "Adult_Quad_Osteoclasts": "Mesenchymal: osteogenic",
    "Adult_Quad_Mural cells": "Mesenchymal: mural",
    "Adult_Quad_Dendritic cells": "Immune Cells: myeloid",
    "Adult_Quad_Osteoblasts": "Mesenchymal: osteogenic",
    "Adult_Quad_Lymphatic endothelial cells": "Endothelial",
    "Adult_Quad_Dividing fibroblasts / mural cells": "Mesenchymal: tenogenic/mural",
    "Adult_Quad_T cells": "Immune Cells: lymphoid",
    "Adult_Quad_Adipocytes": "Mesenchymal: adipogenic",
    "Adult_Quad_Granulocytes": "Immune Cells: myeloid",
    "Adult_Quad_Nervous system cells": "Neural"
}

adata.obs['annotations_lineages'] = adata.obs['annotations_lineages'].map(lineages)
adata.obs['annotations_lineages'].value_counts()

In [None]:
sc.pl.umap(adata, color='annotations_lineages', frameon=False, palette=color_map, s=2,
          #legend_loc='on data', legend_fontsize=5
           save='_lineage_annotations.svg'
          )

In [None]:
colors = ['#98df8a', '#ff7f00', '#b15928', '#33a02c', '#e31a1c', '#1f78b4', '#a6cee3', 
          '#fdbf6f', '#b2df8a', '#fb9a99', '#00aebc',
 '#6a3d9a', 
 '#fffb00',
 '#c5b0d5']
color_map = {lineage: color for lineage, color in zip(percentages.columns, colors[:len(percentages.columns)])}
color_map

In [None]:
df = adata.obs[['annotations_lineages', 'grouptype']]
grouptype_order = [
    'Embryonic_Quad_Healthy', 'Foetal_Quad_Healthy', 'Foetal_Ach_Healthy',
    'Adult_Ach_Healthy', 'Adult_Quad_Healthy', 'Adult_Quad_Rupture'
]
df['grouptype'] = pd.Categorical(df['grouptype'], categories=grouptype_order, ordered=True)
counts = df.groupby(['grouptype', 'annotations_lineages']).size().unstack(fill_value=0)
percentages = counts.div(counts.sum(axis=1), axis=0) * 100

#lineage_colors = adata.uns['annotations_lineages_colors']
#color_map = {lineage: color for lineage, color in zip(percentages.columns, lineage_colors)}
percentages = percentages.loc[grouptype_order]

ax = percentages.plot(kind='bar', stacked=True, figsize=(3, 4), color=[color_map[col] for col in percentages.columns])

plt.ylabel('Protportion of total (%)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(False)
plt.tight_layout()
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'bar_lineages.svg'), bbox_inches='tight')
plt.show()

In [None]:
plt.figure(figsize=(30, 35))
sc.tl.dendrogram(adata, 'annotations_lineages', use_rep='X_scANVI', use_raw=False)
ax_list = sc.pl.correlation_matrix(adata, 'annotations_lineages', cmap='PuOr_r', show=False)
for ax in ax_list:
    ax.grid(False)
plt.savefig(os.path.join(FIGURES_FOLDERNAME,'annotation_lineages_correlation.svg'), bbox_inches='tight')
plt.show()

# CellHint Label Harmonisation

In [None]:
import cellhint

In [None]:
adata.obs['grouptype'] = adata.obs['grouptype'].astype(str) + '_' + adata.obs['tendon_status'].astype(str)
adata.obs['grouptype'] = adata.obs['grouptype'].astype("category")

In [None]:
sc.pl.umap(adata, color='grouptype')

In [None]:
adata.X.max()

In [None]:
plt.figure(figsize=(30, 35))
sc.tl.dendrogram(adata, 'grouptype', use_rep='X_scANVI', use_raw=False)
ax_list = sc.pl.correlation_matrix(adata, 'grouptype', cmap='PuOr_r', show=False)
for ax in ax_list:
    ax.grid(False)
plt.savefig(os.path.join(FIGURES_FOLDERNAME,'grouptype_correlation.svg'), bbox_inches='tight')
plt.show()

In [None]:
adata.obs['annotations_upd2'] = adata.obs['annotations_upd'].str.replace(r'^Adult_Quad_|^Adult_Ach_', '', regex=True)
adata.obs['annotations_upd2'].value_counts()

In [None]:
alignment = cellhint.harmonize(adata, 'grouptype', 'annotations_upd2', 
                               use_rep='X_scVI', metric='euclidean',
                               random_state=100,
                               dataset_order = ['Embryonic_Quad_Healthy', 'Foetal_Quad_Healthy', 'Foetal_Ach_Healthy', 
                                                'Adult_Ach_Healthy', 'Adult_Quad_Healthy', 'Adult_Quad_Rupture'])

In [None]:
alignment

In [None]:
alignment.relation.head(10)

In [None]:
alignment.groups

In [None]:
adata.obs[['low_hierarchy', 'high_hierarchy']] = alignment.reannotation.loc[adata.obs_names, ['reannotation', 'group']]
adata.obs[['low_hierarchy', 'high_hierarchy']]

In [None]:
sc.pl.umap(adata, color=['high_hierarchy', 'annotations_upd2', 'leiden_fibros'], frameon=False,
          legend_loc='on data', legend_fontsize=6)

In [None]:
alignment.relation[alignment.groups == 'Group1']

In [None]:
member_mat = alignment.base_distance.to_meta(turn_binary = True)
member_mat.iloc[:5, :5]

In [None]:
plot = sns.clustermap(member_mat, figsize=(20, 20))
plot.ax_heatmap.grid(False)
savesvg('cellhint_cellassignmentclustermap.svg', plot)

In [None]:
flag = member_mat.index.str.contains(r'Fibroblasts|fibroblasts|Progenitors|Chondrocytes|chondrocytes')
plot = sns.clustermap(member_mat.loc[flag,flag], figsize=(12, 12),
                      linewidths=0.5, linecolor='gray', 
                      #cbar=True, annot=False,
                      #cmap='Reds', 
                      cbar_kws={'label': 'Similarity', 'shrink': 0.9, 'use_gridspec': False, 'location': "left"})
plot.ax_heatmap.grid(False)
plot.cax.set_position([0.05, 0.88, .02, 0.12]) # [left, bottom, width, height]
plot.cax.yaxis.set_ticks_position('right')
plot.cax.grid(False)  
savesvg('cellhint_cellassignmentclustermap_fibros.svg', plot)

In [None]:
flag = member_mat.index.str.contains(r'Ach.*(Fibroblasts|fibroblasts|Progenitors)')
plot = sns.clustermap(member_mat.loc[flag,flag], figsize=(8, 8),
                      #linewidths=0.5, #linecolor='black', 
                      #cbar=True, annot=False,
                      #cmap='Reds', 
                      cbar_kws={'label': 'Similarity', 'shrink': 0.9, 'use_gridspec': False, 'location': "left"})
plot.ax_heatmap.grid(False)
plot.cax.set_position([0.05, 0.88, .02, 0.12]) # [left, bottom, width, height]
plot.cax.yaxis.set_ticks_position('right')
plot.cax.grid(False)  
savesvg('cellhint_cellassignmentclustermap_fibrosach.svg', plot)

In [None]:
flag = member_mat.index.str.contains(r'Quad.*(Fibroblasts|fibroblasts|Progenitors)')
plot = sns.clustermap(member_mat.loc[flag,flag], figsize=(12, 12),
                      #linewidths=0.5, #linecolor='black', 
                      #cbar=True, annot=False,
                      #cmap='Reds', 
                      cbar_kws={'label': 'Similarity', 'shrink': 0.9, 'use_gridspec': False, 'location': "left"})
plot.ax_heatmap.grid(False)
plot.cax.set_position([0.05, 0.88, .02, 0.12]) # [left, bottom, width, height]
plot.cax.yaxis.set_ticks_position('right')
plot.cax.grid(False)  
savesvg('cellhint_cellassignmentclustermap_fibrosquad.svg', plot)

In [None]:
alignment.write(os.path.join(RESULTS_FOLDERNAME, 'cellhint_alignment_rs100_scVI.pkl'))

In [None]:
alignment.relation.to_csv(os.path.join(RESULTS_FOLDERNAME, 'cellhint_alignment_relations_scVI.csv'), sep = ',', index = False)

In [None]:
adata.obs.high_hierarchy.cat.categories

In [None]:
for group in adata.obs.high_hierarchy.cat.categories:
    ct = np.unique(adata.obs.low_hierarchy[adata.obs.high_hierarchy == group])
    sc.pl.umap(adata, color = 'low_hierarchy', groups = list(ct), size = 5, frameon=False,
              save=f'{group}_low_hierarchy_scVI.svg')

In [None]:
adata.uns['low_hierarchy_colors']

In [None]:
len(adata.obs['low_hierarchy'].cat.categories)

In [None]:
len(adata.uns['low_hierarchy_colors'])

In [None]:
sc.pl.umap(adata, color='low_hierarchy')

In [None]:
# Assuming you truncate or verify the necessary number of colors
necessary_colors = adata.uns['low_hierarchy_colors'][:len(adata.obs['low_hierarchy'].cat.categories)]

# Map the categorical codes to the appropriate colors
category_codes = adata.obs['low_hierarchy'].cat.codes
cell_type_colors = [necessary_colors[code] for code in category_codes]

# Create the DataFrame
df_final = pd.DataFrame({
    'dataset': alignment.reannotation['dataset'],
    'cell_type': alignment.reannotation['cell_type'],
    'color': cell_type_colors
})
df_final

In [None]:
unique_df_final = df_final.drop_duplicates(subset=['dataset', 'cell_type'])
unique_df_final

In [None]:
len(unique_df_final['color'].unique())

In [None]:
cellhint.treeplot(alignment, node_color=unique_df_final, node_size=10,
                  #save=os.path.join(FIGURES_FOLDERNAME, 'cellhint_treeplot_scVI.svg')
                 )

In [None]:
for group in adata.obs.high_hierarchy.cat.categories:
    cellhint.treeplot(alignment.relation[alignment.groups == group],
                      node_size=10, save=os.path.join(FIGURES_FOLDERNAME, f'{group}_treeplot.svg'))

In [None]:
adata.write(os.path.join(RESULTS_FOLDERNAME, 'adultdev_cellhint.h5ad'))

# MAKING PLOTS

In [None]:
adata = sc.read_h5ad(os.path.join(RESULTS_FOLDERNAME, 'adultdev_cellhint.h5ad'))
adata

In [None]:
cell_type_names = adata.obs['annotations_upd2'].cat.categories
color_array = adata.uns['annotations_upd2_colors']
color_dict = dict(zip(cell_type_names, color_array))

In [None]:
color_dict['grey'] = 'lightgray'

In [None]:
def plot_umap_with_background(adata, group, color_dict):
    # Create a copy of the adata object to avoid altering original data
    adata_temp = adata.copy()
    
    # Set all cells to grey
    adata_temp.obs['highlight'] = 'grey' 
    
    # Update the color for cells in the specified group
    adata_temp.obs.loc[adata_temp.obs['grouptype'] == group, 'highlight'] = adata_temp.obs['annotations_upd2']

    # Overlay the cells of the specified group in their specific color
    sc.pl.umap(adata_temp, color='highlight', 
               groups=adata_temp.obs['annotations_upd2'][adata_temp.obs['grouptype'] == group].unique(),
               legend_loc='on data', legend_fontsize=6,
               size=5, frameon=False, title=f'{group}', 
               save=f'{group}_celltypes_correct_withbackground_leg.svg', 
               palette=color_dict
               #list(adata.uns['annotations_upd2_colors'])
              )
    
    del adata_temp

In [None]:
for group in adata.obs['grouptype'].cat.categories:
    plot_umap_with_background(adata, group, color_dict)

In [None]:
for group in adata.obs.grouptype.cat.categories:
    sc.pl.umap(adata[adata.obs.grouptype == group], color = 'annotations_upd2', 
               size = 5, frameon=False,
               title=f'{group}',
               save=f'{group}_celltypes_correct.svg')

In [None]:
df = adata.obs[['annotations_upd2', 'grouptype']]
grouptype_order = [
    'Embryonic_Quad_Healthy', 'Foetal_Quad_Healthy', 'Foetal_Ach_Healthy',
    'Adult_Ach_Healthy', 'Adult_Quad_Healthy', 'Adult_Quad_Rupture'
]
df['grouptype'] = pd.Categorical(df['grouptype'], categories=grouptype_order, ordered=True)
counts = df.groupby(['grouptype', 'annotations_upd2']).size().unstack(fill_value=0)
percentages = counts.div(counts.sum(axis=1), axis=0) * 100
percentages

In [None]:
counts

In [None]:
#lineage_colors = adata.uns['annotations_upd2_colors']
#color_map = {lineage: color for lineage, color in zip(percentages.columns, lineage_colors)}
percentages = percentages.loc[grouptype_order]

ax = percentages.plot(kind='bar', stacked=True, figsize=(5, 6), 
                      color=[color_dict[col] for col in percentages.columns]
                     )

plt.ylabel('Proportion of total (%)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(False)
plt.tight_layout()
ax.spines['top'].set_visible(False)  # Remove top border
ax.spines['right'].set_visible(False)  # Remove right border
#ax.spines['bottom'].set_visible(False)  # Remove bottom border
#ax.spines['left'].set_visible(False)  # Remove left border
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'bar_annotation_proportions.svg'), bbox_inches='tight')
plt.show()

In [None]:
percentages2

In [None]:
percentages2 = percentages.tail(2)
#percentages2 = percentages2.loc[:, (percentages2 != 0).all(axis=0)]
ax = percentages2.plot(kind='bar', stacked=True, figsize=(3, 6), 
                      color=[color_dict[col] for col in percentages2.columns]
                     )

plt.ylabel('Proportion of total (%)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(False)
plt.tight_layout()
ax.spines['top'].set_visible(False)  # Remove top border
ax.spines['right'].set_visible(False)  # Remove right border
#ax.spines['bottom'].set_visible(False)  # Remove bottom border
#ax.spines['left'].set_visible(False)  # Remove left border
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'bar_annotation_proportions_quads.svg'), bbox_inches='tight')
plt.show()

In [None]:
percentages2 = percentages.head(1)
#percentages2 = percentages2.loc[:, (percentages2 != 0.0).all(axis=0)]
ax = percentages2.plot(kind='bar', stacked=True, figsize=(2, 6), 
                      color=[color_dict[col] for col in percentages2.columns]
                     )

plt.ylabel('Proportion of total (%)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(False)
plt.tight_layout()
ax.spines['top'].set_visible(False)  # Remove top border
ax.spines['right'].set_visible(False)  # Remove right border
#ax.spines['bottom'].set_visible(False)  # Remove bottom border
#ax.spines['left'].set_visible(False)  # Remove left border
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'bar_annotation_proportions_embryonic.svg'), bbox_inches='tight')
plt.show()

In [None]:
percentages2 = percentages.iloc[1:3]
#percentages2 = percentages2.loc[:, (percentages2 != 0.0).all(axis=0)]
ax = percentages2.plot(kind='bar', stacked=True, figsize=(3, 6), 
                      color=[color_dict[col] for col in percentages2.columns]
                     )

plt.ylabel('Proportion of total (%)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(False)
plt.tight_layout()
ax.spines['top'].set_visible(False)  # Remove top border
ax.spines['right'].set_visible(False)  # Remove right border
#ax.spines['bottom'].set_visible(False)  # Remove bottom border
#ax.spines['left'].set_visible(False)  # Remove left border
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'bar_annotation_proportions_foetal.svg'), bbox_inches='tight')
plt.show()

In [None]:
percentages2 = percentages.iloc[3:4]
#percentages2 = percentages2.loc[:, (percentages2 != 0.0).all(axis=0)]
ax = percentages2.plot(kind='bar', stacked=True, figsize=(2, 6), 
                      color=[color_dict[col] for col in percentages2.columns]
                     )

plt.ylabel('Proportion of total (%)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(False)
plt.tight_layout()
ax.spines['top'].set_visible(False)  # Remove top border
ax.spines['right'].set_visible(False)  # Remove right border
#ax.spines['bottom'].set_visible(False)  # Remove bottom border
#ax.spines['left'].set_visible(False)  # Remove left border
plt.savefig(os.path.join(FIGURES_FOLDERNAME, 'bar_annotation_proportions_ach.svg'), bbox_inches='tight')
plt.show()

In [None]:
adata.X.max()

In [None]:
adata.uns['log1p']['base'] = None
sc.tl.rank_genes_groups(adata, groupby='annotations_upd2', method='wilcoxon', key_added='wilcoxon_upd2', 
                        use_raw=False, layer='log1p_norm')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, key='wilcoxon_upd2',
                       save='wilcoxon_ranked_annotations_upd2.svg')