## Load env

In [None]:
# Load packages
import scanpy as sc
import matplotlib as mpl
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib.colors as mcolors
import pandas as pd
import numpy as np
import anndata as ad
import liana as li
import decoupler as dc
import omnipath as op 

In [None]:
# Setup the global plotting parameters
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.set_figure_params(dpi=100, color_map = "viridis", frameon=True, transparent=True,
                    dpi_save=800, facecolor="None", format="pdf", figsize=[4,4])

sns.set_style("whitegrid", {'axes.grid' : False})

# Figure output directory
sc.settings.figdir = 'lr_analysis/'

# Set seed for reproducibility
np.random.seed(0)

In [None]:
# Color maps
colour_dict_sample = {'DMSO_PDO': '#8383FF', 
               'SN38_PDO': '#531B93',  
               'DMSO_PDO_CAF': '#E39097', 
               'SN38_PDO_CAF':'#941751', 
               'DMSO_CAF': '#C0C0C0', 
               'SN38_CAF': '#5E5E5E'}

In [None]:
# Import data
input_file_path_rna = '../02_preprocessing_multimodal/ex0015_pdo_rna_adata.h5ad'  # h5ad file exported from preprocessing pipeline
input_file_path_adt = '../02_preprocessing_multimodalex0015_pdo_adt_adata.h5ad'  # h5ad file exported from preprocessing pipeline

signal_adata_rna = sc.read_h5ad(input_file_path_rna)
signal_adata_adt = sc.read_h5ad(input_file_path_adt)

# Inspect data shape
print(signal_adata_rna, ": data shape:", signal_adata_rna.shape)
print(signal_adata_adt, ": data shape:", signal_adata_adt.shape)

In [None]:
signal_adata_rna.obs_names = signal_adata_rna.obs['barcode_seq'].astype('str') + '_' + signal_adata_rna.obs['sublib_index'].astype('str')
signal_adata_adt.obs['barcode_seq'] = signal_adata_adt.obs_names

# Make names unique
signal_adata_rna.var_names =  signal_adata_rna.var_names.astype(str)
signal_adata_rna.var_names_make_unique()

raw_adata = signal_adata_rna.raw.to_adata()

# Make names unique
raw_adata.var_names =  raw_adata.var_names.astype(str)
raw_adata.var_names_make_unique()

signal_adata_rna.raw = raw_adata



# Generate the LR matrix for Liana

In [None]:
# Check avail methods
li.mt.show_methods()

In [None]:
lr_network_df = pd.read_csv('lr_analysis/omni_intercell_network.csv')

In [None]:
# Filter for sereted molecules
lr_network_df_secreted = lr_network_df[lr_network_df['secreted_intercell_source'] == True]
lr_network_df_secreted['parent_intercell_source'].value_counts()

In [None]:
# Filter based on ligand, cell_surface_ligand, secreted_enzyme, secreted_receptor
ligand_functional_classes = ['ligand']  # Replace with your list of strings
lr_network_df_secreted_ligands = lr_network_df_secreted[lr_network_df_secreted['parent_intercell_source'].isin(ligand_functional_classes)]

lr_network_df_secreted_ligands.to_csv('lr_analysis/omni_intercell_network_secreted_ligand_senders.csv')

In [None]:
lr_network_df_secreted_ligands['parent_intercell_source'].value_counts()

## Generate the ULM inferred Regulons

In [None]:
# All pdo cells
pdo_adata = signal_adata_rna[signal_adata_rna.obs['cell_type'].isin(['PDO'])]

In [None]:
pdo_adata.obs['cell_type_condition'].value_counts()

In [None]:
# Load the TF regulon network
net = dc.get_collectri(organism='human', split_complexes=False)
net

In [None]:
# Run ULM Enrichment Analysis
dc.run_ulm(
    mat=pdo_adata,
    net=net,
    source='source',
    target='target',
    weight='weight',
    verbose=True
)

In [None]:
acts = dc.get_acts(pdo_adata, obsm_key='ulm_estimate')
acts

In [None]:
df = dc.rank_sources_groups(acts, groupby='sample_id', reference='DMSO_PDO', method='t-test_overestim_var')
df

In [None]:
df.to_csv('lr_analysis/all_samples_deregulated_TFs_pdo.csv')

In [None]:
df_mono = dc.rank_sources_groups(acts, groupby='sample_id', reference='DMSO_PDO_CAF', method='t-test_overestim_var')
df_mono.to_csv('lr_analysis/all_samples_deregulated_TFs_pdo_caf.csv')

In [None]:
markers = {
    'proCSC': ['LEF1', 'ASCL2', 'SOX9'],
    'revCSC': ['YAP1', 'TEAD1', 'SMAD6', 'AP1', 'JUND', 'NFKB1', 'STAT6']
}

sc.pl.matrixplot(acts, markers, 'sample_id', dendrogram=False, standard_scale='var',
                 colorbar_title='Z-scores', cmap='RdBu_r', save='pdo_TF_regulon_activity.png')

## Run the LR Liana tool

In [None]:
# Run rank_aggregate across the individual cultures
# Not run as takes a long time
#li.mt.rank_aggregate(signal_adata_rna, groupby='cell_type_condition', expr_prop=0.1, verbose=True)

In [None]:
liana_res_all = pd.read_csv('lr_analysis/ex0015_liana_lr_analysis_all.csv')

In [None]:
# Filter the list for secreted ligands from the OmniPath database
sender_ligands = lr_network_df_secreted_ligands['genesymbol_intercell_source']
liana_res_all_ligands = liana_res_all[liana_res_all['ligand_complex'].isin(sender_ligands)]
liana_res_all_ligands.to_csv('lr_analysis/ex0015_liana_lr_analysis_all_ligands.csv')

In [None]:
signal_adata_rna.uns['liana_res'] = liana_res_all_ligands

## Extract relevant interactions

In [None]:
# Isolate relevant sender population
cc_interactions_out = ['Fibroblast_DMSO_PDO_CAF', 'PDO_DMSO_PDO_CAF', 'PDO_DMSO_PDO']  # Replace with your list of strings
liana_res_all_ligands_filter_out = liana_res_all_ligands[liana_res_all_ligands['source'].isin(cc_interactions_out)]

# Isolate interactions that could signal 
cc_interactions_in = ['PDO_DMSO_PDO_CAF', 'PDO_DMSO_PDO'] 
liana_res_all_ligands_filter = liana_res_all_ligands_filter_out[liana_res_all_ligands_filter_out['target'].isin(cc_interactions_in)]

In [None]:
ligand_markers = liana_res_all_ligands_filter['ligand_complex'].unique()

In [None]:
caf_cc_deg_df = pd.read_csv("lr_analysis/caf_dmso_cc_degs.csv")
pdo_mono_deg_df = pd.read_csv("lr_analysis/pdo_dmso_mono_degs.csv")

In [None]:
# Filter the two lists
pdo_mono_deg_df_filter = pdo_mono_deg_df[pdo_mono_deg_df['names'].isin(ligand_markers)]
caf_cc_deg_df_filter = caf_cc_deg_df[caf_cc_deg_df['names'].isin(ligand_markers)]

In [None]:
pdo_mono_deg_df_filter_sorted = pdo_mono_deg_df_filter.sort_values(by='names')
pdo_mono_deg_df_filter_sorted.columns =  ['names', 'pdo_scores', 'pdo_logfoldchanges', 'pdo_pvals', 'pdo_pvals_adj']

caf_cc_deg_df_filter_sorted = caf_cc_deg_df_filter.sort_values(by='names')
caf_cc_deg_df_filter_sorted.columns = ['names', 'cc_scores', 'cc_logfoldchanges', 'cc_pvals', 'cc_pvals_adj']

In [None]:
merged_de_ligand_df = pd.merge(pdo_mono_deg_df_filter_sorted, caf_cc_deg_df_filter_sorted, on='names')

In [None]:
# Define a assignment based on p-value and quadrant
def color_point(row):
    if row['cc_pvals_adj'] < 0.05:
        if row['pdo_logfoldchanges'] > 0 and row['cc_logfoldchanges'] > 0 and row['pdo_pvals_adj'] < 0.05:  # 1st quadrant
            return 'Q1_shared_pro_caf'
        elif row['pdo_logfoldchanges'] > 0 and row['cc_logfoldchanges'] > 0 and row['pdo_pvals_adj'] > 0.05:  # 2nd quadrant
            return 'Q5_caf_only'
        elif row['pdo_logfoldchanges'] < 0 and row['cc_logfoldchanges'] > 0 and row['pdo_pvals_adj'] < 0.05:  # 2nd quadrant
            return 'Q5_caf_only'
        elif row['pdo_logfoldchanges'] < 0 and row['cc_logfoldchanges'] > 0 and row['pdo_pvals_adj'] > 0.05:  # 2nd quadrant
            return 'Q5_caf_only'
        elif row['pdo_logfoldchanges'] < 0 and row['cc_logfoldchanges'] < 0 and row['pdo_pvals_adj'] < 0.05:  # 3rd quadrant
            return 'Q3_revCSC_only'
        elif row['pdo_logfoldchanges'] < 0 and row['cc_logfoldchanges'] < 0 and row['pdo_pvals_adj'] > 0.05:  # 3rd quadrant
            return 'Q6_shared_pro_rev'
        elif row['pdo_logfoldchanges'] > 0 and row['cc_logfoldchanges'] < 0 and row['pdo_pvals_adj'] < 0.05:  # 4th quadrant
            return 'Q4_proCSC_only'
        elif row['pdo_logfoldchanges'] > 0 and row['cc_logfoldchanges'] < 0 and row['pdo_pvals_adj'] > 0.05:  # 4th quadrant
            return 'Q6_shared_pro_rev'
    return 'Q7_non_significant'  # Points not meeting the criteria of revCSC/CAF deregulation

merged_de_ligand_df['color'] = merged_de_ligand_df.apply(color_point, axis=1)

In [None]:
# Colours
colors = {'Q1_shared_pro_caf':'#010203', 'Q5_caf_only':'#008000', 'Q3_revCSC_only':'#010203',
          'Q6_shared_pro_rev':'#010203', 'Q4_proCSC_only':'#010203', 'Q7_non_significant':'#DBDBDB'}

fig, ax = plt.subplots()

# Plot each point
for color in merged_de_ligand_df['color'].unique():
    subset = merged_de_ligand_df[merged_de_ligand_df['color'] == color]
    ax.scatter(subset['pdo_logfoldchanges'], subset['cc_logfoldchanges'], s=8, c=subset['color'].map(colors))

# Hide grid lines
ax.grid(False)

# Adjust limits
ax.set_xlim(-2.5,2.5)
ax.set_ylim(-6,8)

# dashed horizontal and vertical lines at x=0 and y=0
ax.axhline(y=0.5, color='black', linestyle='dashed', linewidth=0.5)
ax.axhline(y=-0.7, color='black', linestyle='dashed', linewidth=0.5)
ax.axvline(x=0, color='black', linestyle='dashed', linewidth=0.5)

# Axis labels
plt.xlabel('PDO-specificity', fontsize=15)
plt.ylabel('CAF-specificity', fontsize=15)

# Save figure
fig.savefig('lr_analysis/deg_ligand_specificity.pdf', dpi=300)

In [None]:
# Filter on autocrine revCSC and paracrine CAF factors
caf_paracrine_id = ['Q5_caf_only'] 
caf_paracrine_df = merged_de_ligand_df[merged_de_ligand_df['color'].isin(caf_paracrine_id)]
caf_paracrine_df.sort_values(by='cc_logfoldchanges', ascending=False, inplace=True)

caf_paracrine_factors = caf_paracrine_df['names'].to_list()

In [None]:
# Generate heatmap of DE Ligands and receptors
hm_df = merged_de_ligand_df.set_index("names")
hm_df_lfc = hm_df[['cc_logfoldchanges']].sort_values('cc_logfoldchanges')
hm_df_lfc

In [None]:
pdo_caf_ligs = hm_df_lfc.index.to_list()
ligands = list(reversed(pdo_caf_ligs))

In [None]:
pdo_caf_images = hm_df_lfc.index

sc.pl.dotplot(signal_adata_rna, ligands, groupby='cell_type_condition', dendrogram=False, 
              cmap='Greens', standard_scale='var', save='_filtered_LR_interactions.pdf')

In [None]:
fig, ax = plt.subplots(figsize=(1, 8))
colormap = mpl.cm.get_cmap('PiYG')
offset = mcolors.TwoSlopeNorm(vmin=-5, vcenter=-0, vmax=7)
sns.heatmap(hm_df_lfc, cmap=colormap, norm=offset, linewidths = 0.05, linecolor='grey')
plt.savefig('lr_analysis/cc_logFC.pdf', dpi=600, transparent=True, bbox_inches='tight')
plt.show()