In [None]:
# Import dependencies
%matplotlib inline
import os
import numpy as np
import scanpy as sc
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import anndata as ad
import warnings

# Ignore all warnings
warnings.simplefilter("ignore")

matplotlib.rcParams['font.family'] = 'sans-serif'

# Initialize random seed
import random
random.seed(111)

# Print date and time:
import datetime
e = datetime.datetime.now()
print ("Current date and time = %s" % e)

# wdir = "/ceph/project/tendonhca/akurjan/analysis/"
wdir = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/"
os.chdir( wdir )

# folder structures
INPUT_FOLDERNAME = "adult/integration/results/"
RESULTS_FOLDERNAME = "adult/annotation/results/"
FIGURES_FOLDERNAME = "adult/annotation/figures/"

if not os.path.exists(RESULTS_FOLDERNAME):
    os.makedirs(RESULTS_FOLDERNAME)
if not os.path.exists(FIGURES_FOLDERNAME):
    os.makedirs(FIGURES_FOLDERNAME)

# Set folder for saving figures into
sc.settings.figdir = FIGURES_FOLDERNAME

def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.savefig(os.path.join(folder, fname), format='svg')

def plot_umaps(anndata, parameters: list, filename: str):
    n_plots = len(parameters)
    fig, axs = plt.subplots(n_plots, 1, figsize=(10, 4*n_plots))
    for i, param in enumerate(parameters):
        sc.pl.umap(anndata, color=param, ax=axs[i], show=False, frameon=False, s=2)
        axs[i].set_title(param)
    plt.tight_layout()
    savesvg(filename, fig)
    plt.show()    
    
# Set other settings
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.set_figure_params(dpi=150, fontsize=10, dpi_save=600)

In [None]:
adata = sc.read_h5ad(os.path.join(INPUT_FOLDERNAME, 'adultdev_combined_scANVI.h5ad'))
adata.var_names_make_unique()
adata

In [None]:
print(adata.X[0:10,0:10])

In [None]:
adata = adata.raw.to_adata()
sc.pp.filter_genes(adata, min_counts=50, inplace=True)

In [None]:
print(adata.X[0:10,0:10])
adata.layers["counts"] = adata.X.copy()

In [None]:
sc.pp.normalize_total(adata, target_sum=None, inplace=True)

In [None]:
print(adata.X[0:10,0:10])

In [None]:
sc.pp.log1p(adata)
print(adata.X[0:10, 0:10])
adata.layers["log1p_norm"] = adata.X.copy()

In [None]:
adata.obs['C_scANVI_orig'] = adata.obs['C_scANVI'].copy()
adata.obs['annotations_new'] = adata.obs['annotations_orig_full'].copy()

In [None]:
adata.obs['C_scANVI'] = adata.obs['C_scANVI_orig'].copy()
adata.obs['annotations_new'] = adata.obs['annotations_orig_full'].copy()

In [None]:
adata.obs['C_scANVI'] = adata.obs['C_scANVI'].astype(str)
adata.obs['annotations_new'] = adata.obs['annotations_new'].astype(str)

mask = ~adata.obs['annotations_new'].str.startswith('Adult_')
filtered_annotations = adata.obs.loc[mask, 'annotations_new']

adata.obs['C_scANVI'].update(adata.obs.loc[mask, 'annotations_new'])
print(adata.obs['C_scANVI'].value_counts())

In [None]:
sc.pl.umap(adata,color='C_scANVI', legend_loc='on data', 
           frameon=False, legend_fontsize=4)

In [None]:
#sc.pp.scale(adata)
#print(adata.X[0:5,0:5])
#adata.layers['scaled'] = adata.X.copy()

In [None]:
canonical_TSPC = ['ENG', 'THY1', 'CD44', 'NES', 'MCAM']
sheath_TSPC = ['TPPP3', 'PDGFRA', 'ACTA2']
injury_TSPC = ['AXIN2']

foetalmarkers = {
'ABI3BP fibroblasts': ['ABI3BP', 'GAS2', 'SCX', 'MKX', 'SOX5', 'COL1A1', 'THBS2', 'TNMD', 'KERA', 'FMOD', 'ACAN', 'COMP', 'COL5A1', 'COL6A1', 'COL11A1', 'COL11A2', 'COL12A1', 'COL14A1'],
'COL6A6/COL3A1 fibroblasts': ['COL6A6', 'FNDC1', 'TSHZ2', 'PLAGL1', 'DCLK1', 'COL3A1', 'COL1A1', 'COL6A6', 'VCAN', 'FBN1', 'THBS2', 'COL5A1', 'COL6A1', 'COL11A1', 'COL11A2', 'COL12A1', 'COL14A1', 'TPPP3', 'PDGFRA', 'LUM'],
'FGF14 fibroblasts': ['FGF14', 'THBS4', 'COL1A1', 'COL3A1', 'COL4A1', 'COL5A1', 'COL6A1', 'COL11A1', 'COL11A2', 'COL12A1', 'EBF1', 'TSHZ2', 'DLK1', 'BMP5', 'SLIT3', 'FSTL5', 'CDH12', 'COL22A1'],
'NEGR1 fibroblasts': ['NEGR1', 'SCN7A', 'TNXB', 'LUM', 'VCAN', 'COL1A1', 'COL4A1', 'COL22A1', 'NOVA1', 'NAV3', 'SEMA5A'],
'canonical TSPCs': canonical_TSPC,
'sheath TSPCs': sheath_TSPC,
#'injury TSPCs': injury_TSPC - nada
}

foetalmarkers_filtered = {}

all_genes_in_adata = set(adata.var_names)

for cell_type, genes in foetalmarkers.items():
    filtered_genes = [gene for gene in genes if gene in all_genes_in_adata]
    foetalmarkers_filtered[cell_type] = filtered_genes  

In [None]:
sc.tl.dendrogram(adata, groupby='annotations_orig_full', use_rep='X_scANVI')

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered['ABI3BP fibroblasts'], 'annotations_orig_full', dendrogram=True,
             use_raw=False, layer="scaled", vmin=-2, vmax=2, cmap='RdBu_r', figsize=(8,14),
             title='ABI3BP Foetal Fibroblast Markers',
             save='_annotation_abi3bp.svg')

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered['COL6A6/COL3A1 fibroblasts'], 'annotations_orig_full', dendrogram=True,
             use_raw=False, layer="scaled", vmin=-2, vmax=2, cmap='RdBu_r', figsize=(8,14),
             title='COL3/COL6 Foetal Fibroblast Markers',
             save='_annotation_col3col6.svg')

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered['FGF14 fibroblasts'], 'annotations_orig_full', dendrogram=True,
             use_raw=False, layer="scaled", vmin=-2, vmax=2, cmap='RdBu_r', figsize=(8,14),
             title='FGF14 Foetal Fibroblast Markers',
             save='_annotation_fgf14.svg')

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered['NEGR1 fibroblasts'], 'annotations_orig_full', dendrogram=True,
             use_raw=False, layer="scaled", vmin=-2, vmax=2, cmap='RdBu_r', figsize=(8,14),
             title='NEGR1 Foetal Fibroblast Markers',
             save='_annotation_negr1.svg')

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered, 'annotations_orig_full', dendrogram=True,
             use_raw=False, layer="scaled", vmin=-2, vmax=2, cmap='RdBu_r', figsize=(25,14),
             save='_annotation_full.svg'
             )

In [None]:
sc.pl.dotplot(adata, foetalmarkers_filtered, 'annotations_orig_full', dendrogram=True,
             use_raw=False, layer="log1p_norm", vmin=0, vmax=5, cmap='Reds', figsize=(25,14),
             save='_annotation_full_normalised.svg'
             )

In [None]:
sc.tl.score_genes(adata, gene_list=foetalmarkers_filtered['ABI3BP fibroblasts'], 
                  score_name='abi3bp_marker_score', use_raw=False,
                  ctrl_size=len(foetalmarkers_filtered['ABI3BP fibroblasts']), n_bins=50,
                  random_state=1)

sc.tl.score_genes(adata, gene_list=foetalmarkers_filtered['COL6A6/COL3A1 fibroblasts'], 
                  score_name='col6a6/col3a1_marker_score', use_raw=False,
                  ctrl_size=len(foetalmarkers_filtered['COL6A6/COL3A1 fibroblasts']), n_bins=50, 
                  random_state=1)

sc.tl.score_genes(adata, gene_list=foetalmarkers_filtered['FGF14 fibroblasts'], 
                  score_name='fgf14_marker_score', use_raw=False,
                  ctrl_size=len(foetalmarkers_filtered['FGF14 fibroblasts']), n_bins=50,
                  random_state=1)

sc.tl.score_genes(adata, gene_list=foetalmarkers_filtered['NEGR1 fibroblasts'], 
                  score_name='negr1_marker_score', use_raw=False,
                  ctrl_size=len(foetalmarkers_filtered['NEGR1 fibroblasts']), n_bins=50,
                  random_state=1)

In [None]:
sc.pl.umap(adata, color=['abi3bp_marker_score', 'col6a6/col3a1_marker_score', 'fgf14_marker_score', 'negr1_marker_score'], 
           frameon=False,
           sort_order=False,
           cmap="bwr", vmax="p95", vcenter=0,
           save='_marker_scores_95perc.svg'
          )

In [None]:
del adata.raw

In [None]:
sc.pl.umap(adata, color=['abi3bp_marker_score', 'col6a6/col3a1_marker_score', 'fgf14_marker_score', 'negr1_marker_score'], 
           frameon=False,
           sort_order=False,
           cmap="bwr", vmax="p99", vcenter=0,
           save='_marker_scores_99perc.svg'
          )

In [None]:
foetal = sc.read_h5ad(os.path.join('foetal/results/scVI/dev_scANVI.h5ad'))
foetal.obs['C_scANVI_orig'] = foetal.obs['C_scANVI']

foetal.obs['C_scANVI'] = np.where(
    foetal.obs['C_scANVI'] == 'ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts', foetal.obs['C_scANVI']
)
foetal.obs['C_scANVI'] = np.where(
    foetal.obs['C_scANVI'] == 'ABI3BP GAS2 Fibroblasts 2', 'ABI3BP GAS2 Fibroblasts', foetal.obs['C_scANVI']
)
foetal

In [None]:
sc.tl.rank_genes_groups(foetal, groupby='C_scANVI', method='wilcoxon', key_added='wilcoxon_scanvi', 
                        use_raw=False, layer='log1p_norm')
sc.pl.rank_genes_groups(foetal, n_genes=25, sharey=False, key='wilcoxon_scanvi')

In [None]:
categories = adata.obs['annotations_orig_full'].cat.categories
adult_categories = [cat for cat in categories if cat.startswith('Adult_')]
adult_categories

In [None]:
foetal_uns_dict = foetal.uns['wilcoxon_scanvi']
foetal_ct = foetal.obs['C_scANVI']

In [None]:
del foetal

In [None]:
adata.uns['log1p']['base'] = None
sc.tl.rank_genes_groups(adata, groupby='annotations_orig_full', 
                        groups=adult_categories,
                        method='wilcoxon', key_added='wilcoxon_adult', 
                        use_raw=False, layer='log1p_norm')

In [None]:
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, key='wilcoxon_adult')

In [None]:
dev_categories = [cat for cat in categories if cat not in adult_categories]
dev_categories

In [None]:
foetal_categories = ['ABI3BP GAS2 Fibroblasts',
 'COL3A1 PI16 Fibroblasts',
 'COL6A6 FNDC1 Fibroblasts',
 'Chondrocytes',
 'FGF14 THBS4 Fibroblasts',
 'Immune Cells',
 'NEGR1 SCN7A Fibroblasts',
 'Nervous System Cells',
 'Satellite Cells',
 'Skeletal Myocytes',
 'Smooth Myocytes',
 'lymEndothelial Cells',
 'vasEndothelial Cells']

In [None]:
sc.tl.rank_genes_groups(adata, groupby='annotations_orig_full', 
                        groups=foetal_categories,
                        method='wilcoxon', key_added='wilcoxon_dev', 
                        use_raw=False, layer='log1p_norm')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, key='wilcoxon_dev')

In [None]:
result = adata.uns['wilcoxon_dev']
groups = result['names'].dtype.names
df = pd.DataFrame(
    {group + '_' + key: result[key][group]
    for group in groups 
    for key in ['names','scores','logfoldchanges', 'pvals', 'pvals_adj']})
df.to_csv(os.path.join(RESULTS_FOLDERNAME, 'devonly_wilcoxon_DGE.csv'))

In [None]:
result = adata.uns['wilcoxon_adult']
groups = result['names'].dtype.names
df = pd.DataFrame(
    {group + '_' + key: result[key][group]
    for group in groups 
    for key in ['names','scores','logfoldchanges', 'pvals', 'pvals_adj']})
df.to_csv(os.path.join(RESULTS_FOLDERNAME, 'adultonly_wilcoxon_DGE.csv'))

In [None]:
df

In [None]:
foetal_markers_dict = {}
for cell_type in foetal_ct.unique():
    top_degs = foetal_uns_dict['names'][cell_type][0:50]
    foetal_markers_dict[cell_type] = top_degs
    
#cell_annotation = sc.tl.marker_gene_overlap(adata, foetal_markers_dict, key='wilcoxon_adult')
cell_annotation_norm = sc.tl.marker_gene_overlap(adata, foetal_markers_dict, key='wilcoxon_adult', normalize='reference')

fig, ax = plt.subplots(figsize=(12,14))
sns.heatmap(cell_annotation_norm.T, linewidths=0.5, linecolor='white', cbar=True, annot=True, ax=ax)
plt.grid(False)
savesvg('FoetalscANVI_vs_Adult_similaritymatrix_50degs.svg', plt)
plt.show()

In [None]:
g = sns.clustermap(cell_annotation_norm.T, figsize=(12, 14), 
                   linewidths=0.5, linecolor='white', cbar=True, annot=True)
g.ax_heatmap.grid(False)
savesvg('scANVIfileFoetal_vs_Adult_similaritymatrix_clustered_50degs.svg', g)
plt.show()

In [None]:
foetal_markers_dict_2 = {}
for cell_type in foetal_categories:
    top_degs = adata.uns['wilcoxon_dev']['names'][cell_type][0:500]
    foetal_markers_dict_2[cell_type] = top_degs
    
#cell_annotation = sc.tl.marker_gene_overlap(adata, foetal_markers_dict, key='wilcoxon_adult')
cell_annotation_norm = sc.tl.marker_gene_overlap(adata, foetal_markers_dict_2, key='wilcoxon_adult', normalize='reference')

fig, ax = plt.subplots(figsize=(12,14))
sns.heatmap(cell_annotation_norm.T, linewidths=0.5, linecolor='white', cbar=True, annot=True, ax=ax)
plt.grid(False)
savesvg('onefile_Foetal_vs_Adult_similaritymatrix_50degs.svg', plt)
plt.show()

In [None]:
g = sns.clustermap(cell_annotation_norm.T, figsize=(12, 14), 
                   linewidths=0.5, linecolor='white', cbar=True, annot=True)
g.ax_heatmap.grid(False)
savesvg('onefile_Foetal_vs_Adult_similaritymatrix_clustered_50degs.svg', g)
plt.show()

In [None]:
adata.X.max()

In [None]:
ms_list = []
for cell_type, genes in foetal_markers_dict.items():
    sc.tl.score_genes(adata, gene_list=genes, use_raw=False,
                      score_name=f'{cell_type}_ms', 
                      ctrl_size=len(genes), n_bins=50,
                      random_state=1)
    ms_list.append(f'{cell_type}_ms')
ms_list

In [None]:
sc.pl.umap(adata, color=ms_list,
           frameon=False,
           sort_order=False,
           cmap="bwr", vmax="p95", vcenter=0, ncols=5,
           save='_foetalct_marker_scores_95perc_50degs.svg'
          )

In [None]:
sc.pl.umap(adata, color=ms_list,
           frameon=False,
           sort_order=False,
           cmap="bwr", vmax="p99", vcenter=0, ncols=5,
           save='_foetalct_marker_scores_99perc_50degs.svg'
          )

In [None]:
ms_list2 = []
for cell_type, genes in foetal_markers_dict_2.items():
    sc.tl.score_genes(adata, gene_list=genes, use_raw=False,
                      score_name=f'{cell_type}_ms', 
                      ctrl_size=len(genes), n_bins=50,
                      random_state=1)
    ms_list2.append(f'{cell_type}_ms')

In [None]:
sc.pl.umap(adata, color=ms_list2,
           frameon=False,
           sort_order=False,
           cmap="bwr", vmax="p95", vcenter=0, ncols=5,
           save='_onefile_foetalct_marker_scores_95perc_50degs.svg'
          )

In [None]:
sc.pl.umap(adata, color=ms_list2,
           frameon=False,
           sort_order=False,
           cmap="bwr", vmax="p99", vcenter=0, ncols=5,
           save='_onefile_foetalct_marker_scores_99perc_50degs.svg'
          )

In [None]:
all_annotations = {}
if 'annotations_orig_full_colors' in adata.uns:
    cell_types = adata.obs['annotations_orig_full'].cat.categories
    colors = adata.uns['annotations_orig_full_colors']
    for cell_type, color in zip(cell_types, colors):
        all_annotations[cell_type] = color
        #print(f"'{cell_type}': '{color}',")
else:
    print("Color palette for 'annotations_orig' not found. Run a plot first.")


group_annotations = adata.obs[adata.obs['group'] == 'Foetal']['annotations_orig_full']
unique_annotations = list(pd.unique(group_annotations))
highlighted_clusters = {annotation: all_annotations[annotation] for annotation in unique_annotations 
                        if annotation in all_annotations}
unique_clusters = adata.obs['annotations_orig_full'].cat.categories
color_palette = [highlighted_clusters.get(cluster, 'lightgray') for cluster in unique_clusters]

In [None]:
sc.pl.umap(adata, color=['annotations_orig_full'], s=1,
           legend_loc='on data', legend_fontsize=3, palette=color_palette,
           frameon=False, save='_scANVI_foetalct_annotated.svg'
          )

In [None]:
ms_list2

In [None]:
data = pd.DataFrame(adata.obs[['annotations_orig_full', 'age', 'tendon_status', 'ABI3BP GAS2 Fibroblasts_ms',
 'COL3A1 PI16 Fibroblasts_ms', 'COL6A6 FNDC1 Fibroblasts_ms', 'FGF14 THBS4 Fibroblasts_ms', 'NEGR1 SCN7A Fibroblasts_ms']])
data

In [None]:
data['annotations_orig_full'] = data['annotations_orig_full'].astype(str).str.strip()
data['tendon_status'] = data['tendon_status'].astype(str).str.strip()
data['age'] = data['age'].astype(str).str.strip()
print(data.dtypes)

In [None]:
data = data.sort_values(by=['annotations_orig_full', 'age', 'tendon_status'])

aggregated_data = data.groupby(['annotations_orig_full', 'age', 'tendon_status']).mean()
aggregated_data

In [None]:
ms_columns = ['ABI3BP GAS2 Fibroblasts_ms', 'COL3A1 PI16 Fibroblasts_ms', 
              'COL6A6 FNDC1 Fibroblasts_ms', 'FGF14 THBS4 Fibroblasts_ms', 
              'NEGR1 SCN7A Fibroblasts_ms']

# Create a dictionary to specify aggregation functions for each column
agg_funcs = {col: ['mean', 'max', 'min', 'count'] for col in ms_columns}

# Group by annotations, age, and tendon status, then aggregate
aggregated_data = data.groupby(['annotations_orig_full', 'age', 'tendon_status']).agg(agg_funcs)
aggregated_data

In [None]:
aggregated_data.loc['Adult_Quad_Osteoblasts']

In [None]:
aggregated_data.loc['Adult_Quad_Dividing fibroblasts / mural cells']

In [None]:
aggregated_data.to_csv(os.path.join(RESULTS_FOLDERNAME, 'ct_data_summaryofmsscores.csv'))

In [None]:
sc.pl.umap(adata, color='annotations_orig_full')

In [None]:
sc.pl.umap(adata, color=['C_scANVI'], legend_loc='on data',
          legend_fontsize=5, frameon=False)

In [None]:
del adata.raw

In [None]:
sc.pl.umap(adata, color=['ageint'], legend_loc='on data',
          legend_fontsize=5, frameon=False)