In [None]:
# Import dependencies
import os, glob, re, pickle
from functools import partial
from collections import OrderedDict
import operator as op
from cytoolz import compose

import numpy as np
import pandas as pd
import scanpy as sc
import scipy as sp
import loompy as lp
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import anndata

from pyscenic.export import export2loom, add_scenic_metadata
from pyscenic.utils import load_motifs
from pyscenic.transform import df2regulons
from pyscenic.aucell import aucell
from pyscenic.binarization import binarize
from pyscenic.rss import regulon_specificity_scores
from pyscenic.plotting import plot_binarization, plot_rss

from IPython.display import HTML, display

# Print date and time:
import datetime
e = datetime.datetime.now()
print ("Current date and time = %s" % e)

wdir = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/adult/"
os.chdir( wdir )

# folder structures
INPUT_FOLDERNAME = "annotation/results/"
RESULTS_FOLDERNAME = "scenic/results/foetalcomparison/"
FIGURES_FOLDERNAME = "scenic/figures/foetalcomparison/"
AUXILLIARIES_FOLDERNAME = "../../files/auxilliaries/"

if not os.path.exists(RESULTS_FOLDERNAME):
    os.makedirs(RESULTS_FOLDERNAME)
if not os.path.exists(FIGURES_FOLDERNAME):
    os.makedirs(FIGURES_FOLDERNAME)

# Set folder for saving figures into
sc.settings.figdir = FIGURES_FOLDERNAME

def savesvg(fname: str, fig, folder: str=FIGURES_FOLDERNAME) -> None:
    """
    Save figure as vector-based SVG image format.
    """
    fig.tight_layout()
    fig.savefig(os.path.join(folder, fname), format='svg')

# Set other settings
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.set_figure_params(dpi=150, fontsize=10, dpi_save=600)

# FULL SCENIC ANALYSIS

In [None]:
embryonic_scenic_loom = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/embryonic ScAndSp/Scenic/results/embryonic_005AUC.scenic.loom"
#foetal12_scenic_loom = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/foetal/results/Scenic/12w_foetal_005AUC.scenic.loom"
#foetal17_scenic_loom = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/foetal/results/Scenic/17w_foetal_005AUC.scenic.loom"
#foetal20_scenic_loom = "/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/foetal/results/Scenic/20w_foetal_005AUC.scenic.loom"
#adult_scenic_loom = os.path.join('scenic/results/adult_quad_01AUC.scenic.loom')
    
loomfiles = {
    'embryonic': embryonic_scenic_loom,
    #'foetal12': foetal12_scenic_loom,
    #'foetal17': foetal17_scenic_loom,
    #'foetal20': foetal20_scenic_loom,
    #'adultquads': adult_scenic_loom
}

In [None]:
auc_mtx_dict = {}
regulons_dict = {}

for name, path in loomfiles.items():
    lf = lp.connect(path, mode='r+', validate=False )
    auc_mtx = pd.DataFrame( lf.ca.RegulonsAUC, index=lf.ca.CellID)
    auc_mtx_dict[name] = auc_mtx
    regulons = lf.ra.Regulons
    regulons_dict[name] = regulons
    lf.close()

In [None]:
#bin_mtx, thresholds = binarize(auc_mtx_dict['foetal12'])
#bin_mtx.to_csv(os.path.join(RESULTS_FOLDERNAME, f'12w_foetal.bin.csv'))
#thresholds.to_frame().rename(columns={0:'threshold'}).to_csv(os.path.join(RESULTS_FOLDERNAME, '12w_foetal.thresholds.csv'))

In [None]:
bin_mtx_embryonic = pd.read_csv("/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/embryonic ScAndSp/Scenic/results/embryonic.bin.csv", index_col=0)
bin_mtx_foetal12 = pd.read_csv(os.path.join(RESULTS_FOLDERNAME, f'12w_foetal.bin.csv'), index_col=0)
bin_mtx_foetal17 = pd.read_csv(os.path.join(RESULTS_FOLDERNAME, f'17w_foetal.bin.csv'), index_col=0)
bin_mtx_foetal20 = pd.read_csv("/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/foetal/results/Scenic/20w_foetal.bin.csv", index_col=0)

bin_mtx_dict = {
    'embryonic': bin_mtx_embryonic,
    'foetal12': bin_mtx_foetal12,
    'foetal17': bin_mtx_foetal17,
    'foetal20': bin_mtx_foetal20
}

In [None]:
embryonic_adata = sc.read_h5ad("/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/embryonic ScAndSp/subset_scVI/results/predictedsubset_ccregressed.h5ad")
embryonic_adata.var_names = embryonic_adata.var_names.astype('str')
embryonic_adata.var_names_make_unique()
embryonic_adata

In [None]:
embryonic_adata.obs['cell_type'] = np.where(
    embryonic_adata.obs['cell_type'] == 'Chondrocytes', 'Embryonic Chondrocytes', embryonic_adata.obs['cell_type']
)

In [None]:
embryonic_adata.obs[['cell_type', 'age']].value_counts()

In [None]:
adata_dict = {
    'embryonic': embryonic_adata,
}

In [None]:
foetal_adata = sc.read_h5ad("/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/foetal/results/scVI/dev_scANVI.h5ad")
foetal_adata.var_names_make_unique()
foetal_adata

In [None]:
foetal_adata.obs['C_scANVI'] = np.where(
    foetal_adata.obs['C_scANVI'] == 'ABI3BP GAS2 Fibroblasts 1', 'ABI3BP GAS2 Fibroblasts', foetal_adata.obs['C_scANVI']
)
foetal_adata.obs['C_scANVI'] = np.where(
    foetal_adata.obs['C_scANVI'] == 'ABI3BP GAS2 Fibroblasts 2', 'ABI3BP GAS2 Fibroblasts', foetal_adata.obs['C_scANVI']
)

In [None]:
foetal12 = foetal_adata[foetal_adata.obs['age']=='12w'].copy()
foetal17 = foetal_adata[foetal_adata.obs['age']=='17w'].copy()
foetal20 = foetal_adata[foetal_adata.obs['age']=='20w'].copy()

adata_dict = {
    'foetal12': foetal12,
    'foetal17': foetal17,    
    'foetal20': foetal20
}

In [None]:
del foetal_adata

In [None]:
# Create heatmap with binarized regulon activity.
def palplot(pal, names, colors=None, size=1):
    n = len(pal)
    f, ax = plt.subplots(1, 1, figsize=(n * size, size))
    ax.imshow(np.arange(n).reshape(1, n),
              cmap=mpl.colors.ListedColormap(list(pal)),
              interpolation="nearest", aspect="auto")
    ax.set_xticks(np.arange(n) - .5)
    ax.set_yticks([-.5, .5])
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    colors = n * ['k'] if colors is None else colors
    for idx, (name, color) in enumerate(zip(names, colors)):
        ax.text(0.0+idx, 0.0, name, color=color, horizontalalignment='center', verticalalignment='center')
    return f

In [None]:
# Foetal:
unscolors = 'C_scANVI_colors'
groupcolors = 'C_scANVI'

# Embryonic:
unscolors = 'cell_type_colors'
groupcolors = 'cell_type'

In [None]:
for name, auc_mtx in auc_mtx_dict.items():
    auc_mtx.index = auc_mtx.index.map(str)

In [None]:
import matplotlib as mpl

for name, adata in adata_dict.items():
    cats = sorted(list(set(adata.obs[groupcolors])))
    colors = sns.color_palette(adata.uns[unscolors], n_colors=len(cats))
    colorsd = dict( zip( cats, colors ))
    colormap = [ colorsd[x] for x in adata.obs[groupcolors] ]

    cell_id2cell_type_lut =adata.obs[groupcolors].to_dict()
    bw_palette = sns.xkcd_palette(["white", "black"])
    
    sns.set()
    sns.set_style("whitegrid")
    fig = palplot(bw_palette, ['OFF', 'ON'], ['k', 'w'])
    savesvg(f'{name}_legend_on_off.svg', fig)

    sns.set()
    sns.set(font_scale=1.0)
    fig = palplot(colors, cats, size=2.5)
    savesvg(f'{name}_legend_celltypes.svg', fig)
    
    cell_types_series = pd.Series(auc_mtx_dict[name].index.map(cell_id2cell_type_lut), index=auc_mtx_dict[name].index)
    mapped_colors = cell_types_series.map(lambda x: colorsd.get(x, (1, 1, 1)))  # Using a default color of white for missing keys
    if (1, 1, 1) in mapped_colors.values:
        missing_keys = [index for index, color in mapped_colors.iteritems() if color == (1, 1, 1)]
        print("Missing keys for these index entries:", missing_keys)

    sns.set()
    sns.set(font_scale=1.0)
    sns.set_style("ticks", {"xtick.minor.size": 1, "ytick.minor.size": 0.1})
    g = sns.clustermap(bin_mtx_dict[name].T, 
                   col_colors=mapped_colors,
                   cmap=bw_palette, figsize=(20,20))
    g.ax_heatmap.set_xticklabels([])
    g.ax_heatmap.set_xticks([])
    g.ax_heatmap.set_xlabel('Cells')
    g.ax_heatmap.set_ylabel('Regulons')
    g.ax_col_colors.set_yticks([0.5])
    g.ax_col_colors.set_yticklabels(['Cell Type'])
    g.cax.set_visible(False)
    g.fig.savefig(os.path.join(FIGURES_FOLDERNAME, f'{name}_binarizedregulonheatmap_upd.jpeg'), format='jpeg')

In [None]:
normalized_regs = {}
z_normalized_regs = {}

for name, matrix in bin_mtx_dict.items():
    bin_mtx_clustered = matrix.T.copy()
    bin_mtx_clustered.rename(columns=adata_dict[name].obs[groupcolors].to_dict(), inplace=True)
    bin_mtx_clustered.to_csv(os.path.join(RESULTS_FOLDERNAME, f'{name}_binarized_regulon_activity_newannotations.csv'))
    regulon_presence_summary = bin_mtx_clustered.groupby(by=bin_mtx_clustered.columns, axis=1).sum()
    regulon_presence_summary.to_csv(os.path.join(RESULTS_FOLDERNAME, f'{name}_binarized_regulon_summaryperct.csv'))
    cell_counts = adata_dict[name].obs[groupcolors].value_counts().to_dict()
    normalized_regulon_presence = regulon_presence_summary.copy()
    for column in normalized_regulon_presence.columns:
        normalized_regulon_presence[column] /= cell_counts[column]
    normalized_regs[name] = normalized_regulon_presence
    normalized_regulon_presence_z = (normalized_regulon_presence - normalized_regulon_presence.mean()) / normalized_regulon_presence.std()
    z_normalized_regs[name] = normalized_regulon_presence_z

In [None]:
def OP_regulon_clustermap(regulon_presence_summary, condition, normalization_type=None, size=(12, 25), xsize=10, colcluster=True):
    
    # Determine the colormap based on normalization type
    if normalization_type == 'z-score':
        cmap = 'RdBu_r'  # Blue-White-Red colormap for z-score normalization
        center_val=0
    else:
        cmap = 'Oranges'  # Default colormap
        center_val=None
    
    # Create the clustermap
    g = sns.clustermap(regulon_presence_summary, method='average', metric='euclidean',
                       cmap=cmap, center=center_val, linewidths=.5, figsize=size,
                       row_cluster=True, col_cluster=colcluster,
                       cbar_kws={"shrink": .5, "pad": 0.05})

    # Adjust color bar position
    g.cax.set_position([1, .2, .02, .45])  

    # Retrieve the order of the rows from the clustering
    row_order = g.dendrogram_row.reordered_ind
    regulons = regulon_presence_summary.index[row_order]

    # Separate regulons into odd and even for labeling
    odd_regulons = regulons[0::2]  # Odd-indexed regulons after clustering
    even_regulons = regulons[1::2]  # Even-indexed regulons after clustering

    # Set regulon names on the primary y-axis (left)
    g.ax_heatmap.set_yticks([i for i in range(len(regulons)) if i % 2 == 0])
    g.ax_heatmap.set_yticklabels(odd_regulons, rotation=0, fontsize=xsize)

    g.ax_row_dendrogram.set_visible(False)

    # Adjust tick positions to point to the center of the rows
    centered_ticks = [x + 0.5 for x in range(len(regulons))]

    # Set regulon names on the primary y-axis (left), alternate the labels
    g.ax_heatmap.set_yticks(centered_ticks[::2])
    g.ax_heatmap.set_yticklabels(regulons[::2], rotation=0, fontsize=xsize)

    # Create a secondary y-axis for the right side
    ax2 = g.ax_heatmap.twinx()

    # Set regulon names on the secondary y-axis (right), alternate the labels
    ax2.set_yticks(centered_ticks[1::2])
    ax2.set_yticklabels(regulons[1::2], rotation=0, fontsize=xsize)
    ax2.set_ylim(g.ax_heatmap.get_ylim())

    # Ensure labels are visible and adjust their alignment
    ax2.yaxis.set_label_position("right")
    ax2.yaxis.tick_right()

    # Rotate column labels for readability
    plt.setp(g.ax_heatmap.xaxis.get_majorticklabels(), rotation=90, fontsize=10)

    # Adding labels to the axes
    g.ax_heatmap.set_ylabel('Regulons', fontsize=12, labelpad=10)
    g.ax_heatmap.yaxis.set_label_position('left')  # Explicitly position y-axis label on the left

    plt.grid(False)

    #g.ax_heatmap.set_xlabel('Cell Types', fontsize=fontsize)

    g.savefig(os.path.join(FIGURES_FOLDERNAME, f'activated_regulon_abundance_clustermap_{condition}.svg'))

    # Show the plot
    plt.show()

In [None]:
for name, matrix in normalized_regs.items():
    # foetal: OP_regulon_clustermap(matrix, name, size=(8,28))
    OP_regulon_clustermap(matrix, name, size=(5,30), xsize=8) # embryonic

In [None]:
for name, matrix in z_normalized_regs.items():
    title = name + '_Z'
    # foetal: OP_regulon_clustermap(matrix, title, 'z-score', size=(8,28))
    OP_regulon_clustermap(matrix, title, 'z-score', size=(5,30), xsize=8) # embryonic

# Comparing regulons across embryonic ages

In [None]:
embryonic_adata.obs.age.value_counts()

In [None]:
bin_mtx_6 = bin_mtx_dict['embryonic'].loc[embryonic_adata.obs[embryonic_adata.obs['age'] == '6.5w'].index]
bin_mtx_7 = bin_mtx_dict['embryonic'].loc[embryonic_adata.obs[embryonic_adata.obs['age'] == '7.2w'].index]
bin_mtx_8 = bin_mtx_dict['embryonic'].loc[embryonic_adata.obs[embryonic_adata.obs['age'] == '8.4w'].index]
bin_mtx_9 = bin_mtx_dict['embryonic'].loc[embryonic_adata.obs[embryonic_adata.obs['age'] == '9.0w'].index]
bin_mtx_93 = bin_mtx_dict['embryonic'].loc[embryonic_adata.obs[embryonic_adata.obs['age'] == '9.3w'].index]

bin_mtx_93

In [None]:
bin_mtx_dict = {
    'embryonic 6.5w': bin_mtx_6,
    'embryonic 7.2w': bin_mtx_7,
    'embryonic 8.4w': bin_mtx_8,
    'embryonic 9.0w': bin_mtx_9,
    'embryonic 9.3w': bin_mtx_93,
}

In [None]:
emb6 = embryonic_adata[embryonic_adata.obs['age']=='6.5w'].copy()
emb7 = embryonic_adata[embryonic_adata.obs['age']=='7.2w'].copy()
emb8 = embryonic_adata[embryonic_adata.obs['age']=='8.4w'].copy()
emb9 = embryonic_adata[embryonic_adata.obs['age']=='9.0w'].copy()
emb93 = embryonic_adata[embryonic_adata.obs['age']=='9.3w'].copy()

adata_dict = {
    'embryonic 6.5w': emb6,
    'embryonic 7.2w': emb7,    
    'embryonic 8.4w': emb8,
    'embryonic 9.0w': emb9,
    'embryonic 9.3w': emb93
}

In [None]:
normalized_regs = {}
z_normalized_regs = {}

for name, matrix in bin_mtx_dict.items():
    bin_mtx_clustered = matrix.T.copy()
    bin_mtx_clustered.rename(columns=adata_dict[name].obs[groupcolors].to_dict(), inplace=True)
    bin_mtx_clustered.to_csv(os.path.join(RESULTS_FOLDERNAME, f'{name}_binarized_regulon_activity_newannotations.csv'))
    regulon_presence_summary = bin_mtx_clustered.groupby(by=bin_mtx_clustered.columns, axis=1).sum()
    regulon_presence_summary.to_csv(os.path.join(RESULTS_FOLDERNAME, f'{name}_binarized_regulon_summaryperct.csv'))
    cell_counts = adata_dict[name].obs[groupcolors].value_counts().to_dict()
    normalized_regulon_presence = regulon_presence_summary.copy()
    for column in normalized_regulon_presence.columns:
        normalized_regulon_presence[column] /= cell_counts[column]
    normalized_regs[name] = normalized_regulon_presence
    normalized_regulon_presence_z = (normalized_regulon_presence - normalized_regulon_presence.mean()) / normalized_regulon_presence.std()
    z_normalized_regs[name] = normalized_regulon_presence_z

In [None]:
for name, matrix in normalized_regs.items():
    # foetal: OP_regulon_clustermap(matrix, name, size=(8,28))
    OP_regulon_clustermap(matrix, name, size=(5,30), xsize=8) # embryonic

In [None]:
for name, matrix in z_normalized_regs.items():
    title = name + '_Z'
    # foetal: OP_regulon_clustermap(matrix, title, 'z-score', size=(8,28))
    OP_regulon_clustermap(matrix, title, 'z-score', size=(5,30), xsize=8) # embryonic

In [None]:
bin_h = pd.read_csv(os.path.join('scenic/results/adult_quad_zscore_byct_healthy.csv'), index_col=0)
bin_r = pd.read_csv(os.path.join('scenic/results/adult_quad_zscore_byct_ruptured.csv'), index_col=0)
z_normalized_regs['adult_quad_healthy'] = bin_h
z_normalized_regs['adult_quad_ruptured'] = bin_r

In [None]:
dfs = {key: df for key, df in z_normalized_regs.items() if 'embryonic' in key}
column_sets = [set(df.columns) for df in dfs.values()]
overlapping_columns = set.intersection(*column_sets)
overlapping_columns

# Comparing embryonic with adult

In [None]:
z_normalized_regs.keys()

In [None]:
adult_rupture_fibros = {
     'COL3A1hi fibroblasts',
     'Dividing fibroblasts / mural cells'
}

adult_healthy_fibros = {
     'ABCA10hi fibroblasts',
     'FBLNhi fibroblasts',
     'NR4A1hi fibroblasts',
}

In [None]:
for ct in overlapping_columns:
    try:
        print(f"Processing {ct}...")
        emb = z_normalized_regs['embryonic'][ct].copy()
        if ct == 'Embryonic Chondrocytes':
            emb.name = ct
        else:
            emb.name = 'Embryonic ' + ct
        for match in adult_rupture_fibros:
            if match == 'Dividing fibroblasts / mural cells':
                title = 'CommonOnly '+ct+ 'Dividing fibroblasts'
            else:
                title = 'CommonOnly '+ct+match
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['adult_quad_ruptured'][match].copy()
            matched_column.name = 'Adult Ruptured ' + match
            combined_data = pd.concat([emb, matched_column], axis=1)
            assert not combined_data.index.duplicated().any()
            combined_data = combined_data.dropna(0)
            OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,20), xsize=10, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in overlapping_columns:
    try:
        print(f"Processing {ct}...")
        emb = z_normalized_regs['embryonic'][ct].copy()
        if ct == 'Embryonic Chondrocytes':
            emb.name = ct
        else:
            emb.name = 'Embryonic ' + ct
        for match in adult_rupture_fibros:
            if match == 'Dividing fibroblasts / mural cells':
                title = 'NAfilled '+ct+ 'Dividing fibroblasts'
            else:
                title = 'NAfilled '+ct+match
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['adult_quad_ruptured'][match].copy()
            matched_column.name = 'Adult Ruptured ' + match
            combined_data = pd.concat([emb, matched_column], axis=1)
            assert not combined_data.index.duplicated().any()
            combined_data = combined_data.fillna(0)
            OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,30), xsize=8, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in overlapping_columns:
    try:
        print(f"Processing {ct}...")
        emb = z_normalized_regs['embryonic'][ct].copy()
        if ct == 'Embryonic Chondrocytes':
            emb.name = ct
        else:
            emb.name = 'Embryonic ' + ct
        for match in adult_healthy_fibros:
            if match == 'Dividing fibroblasts / mural cells':
                title = 'CommonOnly '+ct+ 'Dividing fibroblasts'
            else:
                title = 'CommonOnly '+ct+match
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['adult_quad_healthy'][match].copy()
            matched_column.name = 'Adult Healthy ' + match
            combined_data = pd.concat([emb, matched_column], axis=1)
            assert not combined_data.index.duplicated().any()
            combined_data = combined_data.dropna(0)
            OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,20), xsize=10, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in overlapping_columns:
    try:
        print(f"Processing {ct}...")
        emb = z_normalized_regs['embryonic'][ct].copy()
        if ct == 'Embryonic Chondrocytes':
            emb.name = ct
        else:
            emb.name = 'Embryonic ' + ct
        for match in adult_healthy_fibros:
            if match == 'Dividing fibroblasts / mural cells':
                title = 'NAfilled '+ct+ 'Dividing fibroblasts'
            else:
                title = 'NAfilled '+ct+match
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['adult_quad_healthy'][match].copy()
            matched_column.name = 'Adult Healthy ' + match
            combined_data = pd.concat([emb, matched_column], axis=1)
            assert not combined_data.index.duplicated().any()
            combined_data = combined_data.fillna(0)
            OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,30), xsize=8, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

# Comparing regulons across foetal ages

In [None]:
#bin_h = pd.read_csv(os.path.join('scenic/results/adult_quad_zscore_byct_healthy.csv'), index_col=0)
#bin_r = pd.read_csv(os.path.join('scenic/results/adult_quad_zscore_byct_ruptured.csv'), index_col=0)
#z_normalized_regs['adult_quad_healthy'] = bin_h
#z_normalized_regs['adult_quad_ruptured'] = bin_r

In [None]:
foetal_dfs = {key: df for key, df in z_normalized_regs.items() if 'foetal' in key}
column_sets = [set(df.columns) for df in foetal_dfs.values()]
overlapping_columns = set.intersection(*column_sets)
overlapping_columns

In [None]:
foetal_dfs.keys()

In [None]:
for ct in overlapping_columns:
    try:
        print(f"Processing {ct}...")
        f12 = normalized_regs['foetal12'][ct].copy()
        f17 = normalized_regs['foetal17'][ct].copy()
        f20 = normalized_regs['foetal20'][ct].copy()
        f12.name = '12 pcw'
        f17.name = '17 pcw'
        f20.name = '20 pcw'
        combined_data = pd.concat([f12, f17, f20], axis=1)
        assert not combined_data.index.duplicated().any()
        combined_data = combined_data.dropna()
        OP_regulon_clustermap(combined_data, ct+'norm', size=(3,12), colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in overlapping_columns:
    try:
        print(f"Processing {ct}...")
        f12 = z_normalized_regs['foetal12'][ct].copy()
        f17 = z_normalized_regs['foetal17'][ct].copy()
        f20 = z_normalized_regs['foetal20'][ct].copy()
        f12.name = '12 pcw'
        f17.name = '17 pcw'
        f20.name = '20 pcw'
        combined_data = pd.concat([f12, f17, f20], axis=1)
        assert not combined_data.index.duplicated().any()
        combined_data = combined_data.dropna()
        OP_regulon_clustermap(combined_data, ct, 'z-score', size=(3,12), colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in overlapping_columns:
    try:
        print(f"Processing {ct}...")
        f12 = z_normalized_regs['foetal12'][ct].copy()
        f17 = z_normalized_regs['foetal17'][ct].copy()
        f20 = z_normalized_regs['foetal20'][ct].copy()
        f12.name = '12 pcw'
        f17.name = '17 pcw'
        f20.name = '20 pcw'
        combined_data = pd.concat([f12, f17, f20], axis=1)
        assert not combined_data.index.duplicated().any()
        combined_data = combined_data.fillna(0)
        title = ct+'NaNsAs0'
        OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,30), xsize=6, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

# Comparing foetal and adult ct regulons

In [None]:
foetal_fibros = {
     'ABI3BP GAS2 Fibroblasts',
     'COL3A1 PI16 Fibroblasts',
     'COL6A6 FNDC1 Fibroblasts',
     'Chondrocytes',
     'FGF14 THBS4 Fibroblasts',
     'NEGR1 SCN7A Fibroblasts',
}

adult_rupture_fibros = {
     'COL3A1hi fibroblasts',
     'Dividing fibroblasts / mural cells'
}

adult_healthy_fibros = {
     'ABCA10hi fibroblasts',
     'FBLNhi fibroblasts',
     'NR4A1hi fibroblasts',
}

In [None]:
z_normalized_regs['foetal20']

In [None]:
for name, mat in z_normalized_regs.items():
    print(f'{name}: {len(mat.index)}')

# comparing against ruptured

In [None]:
for ct in foetal_fibros:
    try:
        print(f"Processing {ct}...")
        f20 = z_normalized_regs['foetal20'][ct].copy()
        f20.name = '20pcw ' + ct
        for match in adult_rupture_fibros:
            if match == 'Dividing fibroblasts / mural cells':
                title = 'CommonOnly '+ct+ 'Dividing fibroblasts'
            else:
                title = 'CommonOnly '+ct+match
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['adult_quad_ruptured'][match].copy()
            matched_column.name = 'Adult ' + match
            combined_data = pd.concat([f20, matched_column], axis=1)
            assert not combined_data.index.duplicated().any()
            combined_data = combined_data.dropna(0)
            OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,20), xsize=10, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in foetal_fibros:
    try:
        print(f"Processing {ct}...")
        f20 = z_normalized_regs['foetal20'][ct].copy()
        f20.name = '20pcw ' + ct
        for match in adult_rupture_fibros:
            if match == 'Dividing fibroblasts / mural cells':
                title = 'NAfilled '+ct+ 'Dividing fibroblasts'
            else:
                title = 'NAfilled '+ct+match
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['adult_quad_ruptured'][match].copy()
            matched_column.name = 'Adult ' + match
            combined_data = pd.concat([f20, matched_column], axis=1)
            assert not combined_data.index.duplicated().any()
            combined_data = combined_data.fillna(0)
            OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,25), xsize=6, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in foetal_fibros:
    try:
        print(f"Processing {ct}...")
        f17 = z_normalized_regs['foetal17'][ct].copy()
        f17.name = '17pcw ' + ct
        for match in adult_rupture_fibros:
            if match == 'Dividing fibroblasts / mural cells':
                title = 'CommonOnly '+ct+ 'Dividing fibroblasts_17pcw'
            else:
                title = 'CommonOnly '+ct+match+'_17pcw'
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['adult_quad_ruptured'][match].copy()
            matched_column.name = 'Adult ' + match
            combined_data = pd.concat([f17, matched_column], axis=1)
            assert not combined_data.index.duplicated().any()
            combined_data = combined_data.dropna(0)
            OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,20), xsize=10, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in foetal_fibros:
    try:
        print(f"Processing {ct}...")
        f17 = z_normalized_regs['foetal17'][ct].copy()
        f17.name = '17pcw ' + ct
        for match in adult_rupture_fibros:
            if match == 'Dividing fibroblasts / mural cells':
                title = 'NAfilled '+ct+ 'Dividing fibroblasts_17pcw'
            else:
                title = 'NAfilled '+ct+match+'_17pcw'
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['adult_quad_ruptured'][match].copy()
            matched_column.name = 'Adult ' + match
            combined_data = pd.concat([f17, matched_column], axis=1)
            assert not combined_data.index.duplicated().any()
            combined_data = combined_data.fillna(0)
            OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,25), xsize=6, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

# comparing against healthy

In [None]:
for ct in foetal_fibros:
    try:
        print(f"Processing {ct}...")
        f20 = z_normalized_regs['foetal20'][ct].copy()
        f20.name = '20pcw ' + ct
        for match in adult_healthy_fibros:
            title = 'CommonOnly '+ct+match
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['adult_quad_healthy'][match].copy()
            matched_column.name = 'Adult ' + match
            combined_data = pd.concat([f20, matched_column], axis=1)
            assert not combined_data.index.duplicated().any()
            combined_data = combined_data.dropna(0)
            OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,20), xsize=10, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in foetal_fibros:
    try:
        print(f"Processing {ct}...")
        f20 = z_normalized_regs['foetal20'][ct].copy()
        f20.name = '20pcw ' + ct
        for match in adult_healthy_fibros:
            title = 'NAfilled '+ct+match
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['adult_quad_healthy'][match].copy()
            matched_column.name = 'Adult ' + match
            combined_data = pd.concat([f20, matched_column], axis=1)
            assert not combined_data.index.duplicated().any()
            combined_data = combined_data.fillna(0)
            OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,25), xsize=6, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

# Embryonic vs foetal vs adult

In [None]:
bin_mtx_embryonic = pd.read_csv("/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/embryonic ScAndSp/Scenic/results/embryonic.bin.csv", index_col=0)
bin_mtx_foetal12 = pd.read_csv(os.path.join(RESULTS_FOLDERNAME, f'12w_foetal.bin.csv'), index_col=0)
bin_mtx_foetal17 = pd.read_csv(os.path.join(RESULTS_FOLDERNAME, f'17w_foetal.bin.csv'), index_col=0)
bin_mtx_foetal20 = pd.read_csv("/mnt/da8aa2c4-0136-465b-87a2-d12a59afec55/akurjan/analysis/notebooks/foetal/results/Scenic/20w_foetal.bin.csv", index_col=0)

bin_mtx_dict = {
    'embryonic': bin_mtx_embryonic,
    'foetal12': bin_mtx_foetal12,
    'foetal17': bin_mtx_foetal17,
    'foetal20': bin_mtx_foetal20
}

In [None]:
foetal12 = foetal_adata[foetal_adata.obs['age']=='12w'].copy()
foetal17 = foetal_adata[foetal_adata.obs['age']=='17w'].copy()
foetal20 = foetal_adata[foetal_adata.obs['age']=='20w'].copy()

adata_dict = {
    'embryonic': embryonic_adata,
    'foetal12': foetal12,
    'foetal17': foetal17,    
    'foetal20': foetal20
}

In [None]:
normalized_regs = {}
z_normalized_regs = {}

for name, matrix in bin_mtx_dict.items():
    if name == 'embryonic':
        unscolors = 'cell_type_colors'
        groupcolors = 'cell_type'
    else:
        unscolors = 'C_scANVI_colors'
        groupcolors = 'C_scANVI'
    bin_mtx_clustered = matrix.T.copy()
    bin_mtx_clustered.rename(columns=adata_dict[name].obs[groupcolors].to_dict(), inplace=True)
    bin_mtx_clustered.to_csv(os.path.join(RESULTS_FOLDERNAME, f'{name}_binarized_regulon_activity_newannotations.csv'))
    regulon_presence_summary = bin_mtx_clustered.groupby(by=bin_mtx_clustered.columns, axis=1).sum()
    regulon_presence_summary.to_csv(os.path.join(RESULTS_FOLDERNAME, f'{name}_binarized_regulon_summaryperct.csv'))
    cell_counts = adata_dict[name].obs[groupcolors].value_counts().to_dict()
    normalized_regulon_presence = regulon_presence_summary.copy()
    for column in normalized_regulon_presence.columns:
        normalized_regulon_presence[column] /= cell_counts[column]
    normalized_regs[name] = normalized_regulon_presence
    normalized_regulon_presence_z = (normalized_regulon_presence - normalized_regulon_presence.mean()) / normalized_regulon_presence.std()
    z_normalized_regs[name] = normalized_regulon_presence_z

In [None]:
bin_h = pd.read_csv(os.path.join('scenic/results/adult_quad_zscore_byct_healthy.csv'), index_col=0)
bin_r = pd.read_csv(os.path.join('scenic/results/adult_quad_zscore_byct_ruptured.csv'), index_col=0)
z_normalized_regs['adult_quad_healthy'] = bin_h
z_normalized_regs['adult_quad_ruptured'] = bin_r

In [None]:
z_normalized_regs.keys()

In [None]:
embryonic_fibros = {
    #'COL6A6 FSTL1 DCLK1 Progenitors',
    #'Embryonic Chondrocytes',
    'MKX TNMD ABI3BP GAS2 Progenitors',
    #'MSC Precursors',
    #'RUNX2 THBS2 COL11A1 Progenitors',
    #'SCX FGF14 THBS4 FSTL5 Progenitors',
    #'SOX5 CREB5 Chondrocyte Progenitors'
}

foetal_fibros = {
     'ABI3BP GAS2 Fibroblasts',
     #'COL3A1 PI16 Fibroblasts',
     #'COL6A6 FNDC1 Fibroblasts',
     #'Chondrocytes',
     #'FGF14 THBS4 Fibroblasts',
     #'NEGR1 SCN7A Fibroblasts',
}

adult_rupture_fibros = {
     #'COL3A1hi fibroblasts',
     #'Dividing fibroblasts / mural cells'
}

adult_healthy_fibros = {
     #'ABCA10hi fibroblasts',
     'FBLNhi fibroblasts',
     #'NR4A1hi fibroblasts',
}

In [None]:
for ct in embryonic_fibros:
    try:
        print(f"Processing {ct}...")
        emb = z_normalized_regs['embryonic'][ct].copy()
        if ct == 'Embryonic Chondrocytes':
            emb.name = ct
        else:
            emb.name = 'Embryonic ' + ct
        for match in foetal_fibros:
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['foetal20'][match].copy()
            matched_column.name = 'Foetal 20pcw ' + match
            for match2 in adult_healthy_fibros:
                if match2 == 'Dividing fibroblasts / mural cells':
                    title = 'NAfilled '+ct+ 'Dividing fibroblasts'
                else:
                    title = 'NAfilled '+ct+match+match2
                print(f"Matching with {match2}...")
                matched_column2 = z_normalized_regs['adult_quad_healthy'][match2].copy()
                matched_column2.name = 'Adult Healthy ' + match2
                combined_data = pd.concat([emb, matched_column, matched_column2], axis=1)
                assert not combined_data.index.duplicated().any()
                combined_data = combined_data.fillna(0)
                OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,30), xsize=6, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in embryonic_fibros:
    try:
        print(f"Processing {ct}...")
        emb = z_normalized_regs['embryonic'][ct].copy()
        if ct == 'Embryonic Chondrocytes':
            emb.name = ct
        else:
            emb.name = 'Embryonic ' + ct
        for match in foetal_fibros:
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['foetal20'][match].copy()
            matched_column.name = 'Foetal 20pcw ' + match
            for match2 in adult_healthy_fibros:
                if match2 == 'Dividing fibroblasts / mural cells':
                    title = 'CommonOnly '+ct+ 'Dividing fibroblasts'
                else:
                    title = 'CommonOnly '+ct+match+match2
                print(f"Matching with {match2}...")
                matched_column2 = z_normalized_regs['adult_quad_healthy'][match2].copy()
                matched_column2.name = 'Adult Healthy ' + match2
                combined_data = pd.concat([emb, matched_column, matched_column2], axis=1)
                assert not combined_data.index.duplicated().any()
                combined_data = combined_data.dropna(0)
                OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,15), xsize=10, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
embryonic_fibros = {
    'COL6A6 FSTL1 DCLK1 Progenitors',
    #'Embryonic Chondrocytes',
    #'MKX TNMD ABI3BP GAS2 Progenitors',
    #'MSC Precursors',
    #'RUNX2 THBS2 COL11A1 Progenitors',
    #'SCX FGF14 THBS4 FSTL5 Progenitors',
    #'SOX5 CREB5 Chondrocyte Progenitors'
}

foetal_fibros = {
     #'ABI3BP GAS2 Fibroblasts',
     'COL3A1 PI16 Fibroblasts',
     #'COL6A6 FNDC1 Fibroblasts',
     #'Chondrocytes',
     #'FGF14 THBS4 Fibroblasts',
     #'NEGR1 SCN7A Fibroblasts',
}

adult_rupture_fibros = {
     #'COL3A1hi fibroblasts',
     #'Dividing fibroblasts / mural cells'
}

adult_healthy_fibros = {
     'ABCA10hi fibroblasts',
     #'FBLNhi fibroblasts',
     #'NR4A1hi fibroblasts',
}

In [None]:
embryonic_fibros = {
    #'COL6A6 FSTL1 DCLK1 Progenitors',
    #'Embryonic Chondrocytes',
    #'MKX TNMD ABI3BP GAS2 Progenitors',
    #'MSC Precursors',
    'RUNX2 THBS2 COL11A1 Progenitors',
    #'SCX FGF14 THBS4 FSTL5 Progenitors',
    #'SOX5 CREB5 Chondrocyte Progenitors'
}

foetal_fibros = {
     #'ABI3BP GAS2 Fibroblasts',
     #'COL3A1 PI16 Fibroblasts',
     'COL6A6 FNDC1 Fibroblasts',
     'Chondrocytes',
     #'FGF14 THBS4 Fibroblasts',
     #'NEGR1 SCN7A Fibroblasts',
}

adult_rupture_fibros = {
     'COL3A1hi fibroblasts',
     #'Dividing fibroblasts / mural cells'
}

adult_healthy_fibros = {
     #'ABCA10hi fibroblasts',
     #'FBLNhi fibroblasts',
     #'NR4A1hi fibroblasts',
}

In [None]:
for ct in embryonic_fibros:
    try:
        print(f"Processing {ct}...")
        emb = z_normalized_regs['embryonic'][ct].copy()
        if ct == 'Embryonic Chondrocytes':
            emb.name = ct
        else:
            emb.name = 'Embryonic ' + ct
        for match in foetal_fibros:
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['foetal20'][match].copy()
            matched_column.name = 'Foetal 20pcw ' + match
            for match2 in adult_rupture_fibros:
                if match2 == 'Dividing fibroblasts / mural cells':
                    title = 'NAfilled '+ct+ 'Dividing fibroblasts'
                else:
                    title = 'NAfilled '+ct+match+'_20pcw_'+match2
                print(f"Matching with {match2}...")
                matched_column2 = z_normalized_regs['adult_quad_ruptured'][match2].copy()
                matched_column2.name = 'Adult Ruptured ' + match2
                combined_data = pd.concat([emb, matched_column, matched_column2], axis=1)
                assert not combined_data.index.duplicated().any()
                combined_data = combined_data.fillna(0)
                OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,30), xsize=6, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in embryonic_fibros:
    try:
        print(f"Processing {ct}...")
        emb = z_normalized_regs['embryonic'][ct].copy()
        if ct == 'Embryonic Chondrocytes':
            emb.name = ct
        else:
            emb.name = 'Embryonic ' + ct
        for match in foetal_fibros:
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['foetal17'][match].copy()
            matched_column.name = 'Foetal 17pcw ' + match
            for match2 in adult_rupture_fibros:
                if match2 == 'Dividing fibroblasts / mural cells':
                    title = 'NAfilled '+ct+ 'Dividing fibroblasts'
                else:
                    title = 'NAfilled '+ct+match+'_17pcw_'+match2
                print(f"Matching with {match2}...")
                matched_column2 = z_normalized_regs['adult_quad_ruptured'][match2].copy()
                matched_column2.name = 'Adult Ruptured ' + match2
                combined_data = pd.concat([emb, matched_column, matched_column2], axis=1)
                assert not combined_data.index.duplicated().any()
                combined_data = combined_data.fillna(0)
                OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,30), xsize=6, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in embryonic_fibros:
    try:
        print(f"Processing {ct}...")
        emb = z_normalized_regs['embryonic'][ct].copy()
        if ct == 'Embryonic Chondrocytes':
            emb.name = ct
        else:
            emb.name = 'Embryonic ' + ct
        for match in foetal_fibros:
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['foetal20'][match].copy()
            matched_column.name = 'Foetal 20pcw ' + match
            for match2 in adult_rupture_fibros:
                if match2 == 'Dividing fibroblasts / mural cells':
                    title = 'CommonOnly '+ct+ 'Dividing fibroblasts'
                else:
                    title = 'CommonOnly '+ct+match+'_20pcw_'+match2
                print(f"Matching with {match2}...")
                matched_column2 = z_normalized_regs['adult_quad_ruptured'][match2].copy()
                matched_column2.name = 'Adult Ruptured ' + match2
                combined_data = pd.concat([emb, matched_column, matched_column2], axis=1)
                assert not combined_data.index.duplicated().any()
                combined_data = combined_data.dropna(0)
                OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,15), xsize=10, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")

In [None]:
for ct in embryonic_fibros:
    try:
        print(f"Processing {ct}...")
        emb = z_normalized_regs['embryonic'][ct].copy()
        if ct == 'Embryonic Chondrocytes':
            emb.name = ct
        else:
            emb.name = 'Embryonic ' + ct
        for match in foetal_fibros:
            print(f"Matching with {match}...")
            matched_column = z_normalized_regs['foetal17'][match].copy()
            matched_column.name = 'Foetal 17pcw ' + match
            for match2 in adult_rupture_fibros:
                if match2 == 'Dividing fibroblasts / mural cells':
                    title = 'CommonOnly '+ct+ 'Dividing fibroblasts'
                else:
                    title = 'CommonOnly '+ct+match+'_17pcw_'+match2
                print(f"Matching with {match2}...")
                matched_column2 = z_normalized_regs['adult_quad_ruptured'][match2].copy()
                matched_column2.name = 'Adult Ruptured ' + match2
                combined_data = pd.concat([emb, matched_column, matched_column2], axis=1)
                assert not combined_data.index.duplicated().any()
                combined_data = combined_data.dropna(0)
                OP_regulon_clustermap(combined_data, title, 'z-score', size=(3,15), xsize=10, colcluster=False)
    except Exception as e:
        print(f"Failed to process {ct}: {e}")