In [None]:
'''
Goal:Check acz vessel_size_gradient
'''

In [None]:
import scanpy as sc
import scanpy.external as sce
import os 
import pandas as pd 
import numpy as np
import seaborn as sns
from functions import compare_obs_values_within_groups_to_excel
import matplotlib.pyplot as plt
import palantir
# from statannotations.Annotator import Annotator

adata_name='venous_ec'
figures = "data/figures/figures/acz_iscience_vessel_size"
data = "data/single_cell_files/scanpy_files"

os.makedirs(figures, exist_ok=True)
sc.set_figure_params(dpi_save=300, fontsize=10, figsize=(1.5,1.5))
sc.settings.figdir = figures
sns.set_style('white', rc={
    'xtick.bottom': True,
    'ytick.left': True,
})
plt.rcParams["font.family"] = "Arial"
size=15

In [None]:
adata = sc.read('/home/carsten/alvira_bioinformatics/data/alvira_sc_data/geo_versions/perinatal_lung.h5ad')
sc.pp.normalize_total(adata,target_sum=1e4)
sc.pp.log1p(adata)
adata.layers["log1p"] = adata.X.copy()
sorted(adata.obs['Cell Subtype'].unique().tolist())

In [None]:
## read in cell cycle genes and score
cell_cycle_genes = [x.strip() for x in open('/home/carsten/alvira_bioinformatics/venous_ec_scrnaseq/data/outside_data/regev_lab_cell_cycle_genes.txt')]
cell_cycle_genes = [x.lower().capitalize() for x in cell_cycle_genes]
s_genes = cell_cycle_genes[:43]
g2m_genes = cell_cycle_genes[43:]
cell_cycle_genes = [x for x in cell_cycle_genes if x in adata.var_names]

In [None]:
endo_cts = ['Arterial EC I',
 'Arterial EC II', 'Early Car4- capillaries','Late Car4- capillaries',
'Venous EC','Proliferative EC', 'Proliferative venous EC',
]
endo_adata = adata[adata.obs['Cell Subtype'].isin(endo_cts)].copy()
sc.tl.score_genes_cell_cycle(endo_adata, s_genes=s_genes, g2m_genes=g2m_genes)
sc.pp.regress_out(endo_adata, ['S_score', 'G2M_score'])
endo_adata.layers['cc_regress'] = endo_adata.X.copy()

In [None]:

sc.pp.highly_variable_genes(endo_adata,batch_key='Mousename')
sc.pp.pca(endo_adata, mask_var="highly_variable")
sce.pp.harmony_integrate(endo_adata,'Mousename',max_iter_harmony = 20)
sc.pp.neighbors(endo_adata, use_rep="X_pca_harmony")
sc.tl.leiden(endo_adata, key_added="leiden")
endo_adata.obs['Cell Subtype 2'] = endo_adata.obs['leiden'].map({'0':'Cap1','1':'Cap1','2':'Cap1','3':'Venous EC','4':'Cap1','5':'Arterial EC','6':'Cap1','7':'Cap1','8':'Cap1','9':'Cap1','10':'Arterial EC',})
endo_adata.uns['Cell Subtype 2_colors']= ['#4A90E2','#9B59B6','#E35D6A']
sc.tl.umap(endo_adata,min_dist=0.5)
endo_adata.X = endo_adata.layers['log1p'].copy()
sc.tl.rank_genes_groups(endo_adata,'leiden',method='wilcoxon')
sc.pl.rank_genes_groups_dotplot(endo_adata,dendrogram=False)

for color in ['Cell Subtype','Cell Subtype 2','Timepoint','Treatment','leiden','Gja5','Slc6a2','Kit','Hey1','Nr2f2','Mecom','Eln','Mgp','Col4a1','Col4a2']:
    sc.pl.umap(endo_adata,color=color,use_raw=False)

In [None]:
sc.pl.umap(endo_adata,color='Cell Subtype 2')

In [None]:
import palantir
import cellrank as cr
import scvelo as scv

root_ct = 'Cap1'
terminal_cts = ['Arterial EC','Venous EC']
celltype='Cell Subtype 2'

palantir.utils.run_diffusion_maps(endo_adata,
                                           n_components=5)
fig = palantir.plot.plot_diffusion_components(endo_adata)[0]
fig.tight_layout()
fig.savefig(f'{figures}/aczpalantir_diffusion_components.png')
plt.close()
palantir.utils.determine_multiscale_space(endo_adata)

palantir.utils.run_magic_imputation(endo_adata)
subset = endo_adata[endo_adata.obs[celltype] == root_ct]
umap1_values = subset.obsm['X_umap'][:, 1]
min_idx = np.argmin(umap1_values)
root_cell = subset.obs_names[min_idx]
terminal_states = []
for ct in terminal_cts:
    subset = endo_adata[endo_adata.obs[celltype] == ct]
    if ct =='Arterial EC':
        # Get the index (obs_names) of the cell with the min UMAP1 (usually component 0)
        umap1_values = subset.obsm['X_umap'][:, 1]
        max_idx = np.argmin(umap1_values)
        # Return the cell name
        terminal_states.append(subset.obs_names[max_idx])
    else:
        umap1_values = subset.obsm['X_umap'][:, 0]
        max_idx = np.argmin(umap1_values)
        # Return the cell name
        terminal_states.append(subset.obs_names[max_idx])
        
terminal_states = pd.Series(index=terminal_states, data=terminal_cts, dtype='object')

fig = palantir.plot.highlight_cells_on_umap(endo_adata, [root_cell]+terminal_states)[0]
fig.tight_layout()
fig.savefig(f'{figures}/aczpalantir_terminal_cells.png')
plt.close()

palantir.core.run_palantir(
    endo_adata, root_cell, num_waypoints=500, terminal_states=terminal_states
)

fig = palantir.plot.plot_palantir_results(endo_adata, s=3)
fig.tight_layout()
fig.savefig(f'{figures}/aczpalantir_results.png')
plt.close()
iroot = endo_adata.obs.index.get_loc(root_cell)
endo_adata.uns["iroot"] = iroot
sc.tl.dpt(endo_adata)

try:
    palantir.presults.select_branch_cells(endo_adata, q=.01, eps=.01,pseudo_time_key='dpt_pseudotime')

    fig = palantir.plot.plot_branch_selection(endo_adata)
    fig.tight_layout()
    fig.savefig(f'{figures}/aczpalantir_branch_selection.png')
    plt.close()

except:
    pass

sc.tl.diffmap(endo_adata)
scv.pl.scatter(
    endo_adata,
    basis="diffmap",
    c=[celltype, iroot],
    legend_loc="right",
    components=["2, 3"],
    show=False,
    save=f'aczdiffmap_{celltype}_root_cell.png'
)


sc.pl.embedding(
    endo_adata,
    basis="umap",
    color=["dpt_pseudotime", "palantir_pseudotime"],
    color_map="viridis",
    show=False,
    save='_acz_pseudotimes.png'
)

palantir.presults.compute_gene_trends(
    endo_adata,
    expression_key="MAGIC_imputed_data",
    pseudo_time_key='dpt_pseudotime'
)

pk = cr.kernels.PseudotimeKernel(endo_adata, time_key="palantir_pseudotime")
pk.compute_transition_matrix()
pk.plot_projection(basis="umap", color=celltype, recompute=True,legend_loc='right margin',
                         save=f'{figures}/aczpalantir_pseudotime_stream.png')


In [None]:
import pandas as pd
from scipy.stats import spearmanr

def correlate_genes_with_pseudotime(adata, layer=None, method='spearman',pseudotime='dpt_pseudotime'):
    """
    Correlates all genes with pseudotime in an AnnData object.

    Parameters:
    - adata: AnnData object with pseudotime in `adata.obs['pseudotime']`
    - layer: (Optional) Layer to use instead of adata.X (e.g., 'log1p', 'counts')
    - method: Correlation method, either 'spearman' (default) or 'pearson'

    Returns:
    - pandas DataFrame with genes as index and columns: ['correlation', 'pval']
    """
    if pseudotime not in adata.obs:
        raise ValueError("Pseudotime must be stored in adata.obs['pseudotime'].")

    # Get expression matrix
    X = adata.X if layer is None else adata.layers[layer]
    if not isinstance(X, pd.DataFrame):
        X = pd.DataFrame(X.toarray() if hasattr(X, "toarray") else X,
                         index=adata.obs_names, columns=adata.var_names)

    # Extract pseudotime
    pseudotime = adata.obs[pseudotime]

    # Run correlation
    results = []
    for gene in X.columns:
        if method == 'spearman':
            corr, pval = spearmanr(X[gene], pseudotime)
        elif method == 'pearson':
            corr, pval = X[gene].corr(pseudotime), None  # Pearson p-value not computed here
        else:
            raise ValueError("Method must be 'spearman' or 'pearson'.")
        results.append((gene, corr, pval))

    result_df = pd.DataFrame(results, columns=['gene', 'correlation', 'pval']).set_index('gene')
    return result_df.sort_values('correlation', ascending=False)

In [None]:
corr_dfs = {}
for ct in ['Arterial EC','Venous EC']:
    # ct_adata = endo_adata[endo_adata.obsm['branch_masks'][ct]]    
    ct_adata = endo_adata[endo_adata.obs['Cell Subtype 2']==ct]

    df = correlate_genes_with_pseudotime(ct_adata,method='pearson',pseudotime='palantir_pseudotime')
    corr_dfs[ct]=df.dropna(how='all')

In [None]:
top_n_genes=50
arterial_large_genes = corr_dfs['Arterial EC'].head(top_n_genes).index.tolist()
venous_large_genes = corr_dfs['Venous EC'].head(top_n_genes).index.tolist()
arterial_small_genes = corr_dfs['Arterial EC'].tail(top_n_genes).index.tolist()[::-1]
venous_small_genes = corr_dfs['Venous EC'].tail(top_n_genes).index.tolist()[::-1]

In [None]:
import matplotlib.pyplot as plt
from matplotlib_venn import venn2,venn3

# Define your lists


# Create the Venn diagram
venn = venn2([set(arterial_large_genes), set(venous_large_genes)], 
             set_labels=('Arterial', 'Venous'), 
             set_colors=('#4A90E2', '#E35D6A'), 
             alpha=0.7)

# Optional: Customize font size
for text in venn.set_labels:
    text.set_fontsize(12)
for text in venn.subset_labels:
    if text:
        text.set_fontsize(12)

# Show the plot
plt.title("Top 50 genes positively correlated with pseudotime")
plt.savefig(f'{figures}/aczvenn_diagram_large.png',dpi=300,bbox_inches='tight')
plt.close()

In [None]:
import matplotlib.pyplot as plt
from matplotlib_venn import venn2

# Define your lists


# Create the Venn diagram
venn = venn2([set(arterial_small_genes), set(venous_small_genes)], 
             set_labels=('Arterial', 'Venous'), 
             set_colors=('#4A90E2', '#E35D6A'), 
             alpha=0.7)

# Optional: Customize font size
for text in venn.set_labels:
    text.set_fontsize(12)
for text in venn.subset_labels:
    if text:
        text.set_fontsize(12)

# Show the plot
plt.title("Top 50 genes negatively correlated with pseudotime")
plt.savefig(f'{figures}/aczvenn_diagram_small.png',dpi=300,bbox_inches='tight')
plt.close()

In [None]:
large_genes = [x for x in arterial_large_genes if x in venous_large_genes]
small_genes = [x for x in arterial_small_genes if x in venous_small_genes]
sc.tl.score_genes(endo_adata,large_genes,score_name='large_score')
sc.tl.score_genes(endo_adata,small_genes,score_name='small_score')
endo_adata.obs['Vessel size score'] = endo_adata.obs['large_score'] - endo_adata.obs['small_score']


from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
def normalize_dataframe(df):
    # Initialize the MinMaxScaler
    scaler = MinMaxScaler(feature_range=(0, 1))
    # Fit the scaler on the data and transform each column
    df_normalized = pd.DataFrame(scaler.fit_transform(df), index=df.index, columns=df.columns)
    return df_normalized
endo_adata.obs['Vessel size score'] = scaler.fit_transform(endo_adata.obs[['Vessel size score']])
endo_adata.obs['Vessel size category'] = pd.cut(endo_adata.obs['Vessel size score'], bins=4,labels=['capillary','small','medium','large'])
sc.pl.umap(endo_adata,color=['Vessel size score'],cmap='Oranges',size=size,frameon=False,save='_aczvessel_size_score.png')
sc.pl.umap(endo_adata,color=['Vessel size category'],cmap='viridis',size=size,frameon=False,save='_aczvessel_size_category.png')
sc.pl.umap(endo_adata,color=['Cell Subtype 2'],cmap='viridis',size=size,legend_loc='on data',legend_fontsize=10, legend_fontoutline=1,frameon=False,save='aczcellsubtype.png')
sc.pl.umap(endo_adata,color=['Mgp'],cmap='viridis',size=size,frameon=False,save='aczmgp.png')
sc.pl.umap(endo_adata,color=['Col4a1'],cmap='viridis',size=size,frameon=False,save='aczcol4a1.png')
sc.pl.umap(endo_adata,color=['Col4a2'],cmap='viridis',size=size,frameon=False,save='aczcol4a2.png')
sc.pl.umap(endo_adata,color=['Eln'],cmap='viridis',size=size,frameon=False,save='aczeln.png')


In [None]:
sc.pl.umap(endo_adata,color = large_genes + small_genes,cmap='viridis',hspace=0.5,save='acz_allsize.png')

In [None]:
endo_adata.write(f'{figures}/vessel_size.gz.h5ad',compression='gzip')

In [None]:
fig, ax = plt.subplots(1,1,figsize=(2,2))
palantir.plot.plot_trajectory(
    endo_adata, # your anndata
    "Arterial EC", # the branch to plot
    cell_color="dpt_pseudotime", # the ad.obs colum to color the cells by
    n_arrows=5, # the number of arrow heads along the path
    color='#4A90E2', # the color of the path and arrow heads
    scanpy_kwargs=dict(cmap="viridis",size=size), # arguments passed to scanpy.pl.embedding
    arrowprops=dict(arrowstyle="->,head_length=.25,head_width=.25", lw=2), # appearance of the arrow heads
    lw=2, # thickness of the path
ax=ax
    # pseudotime_interval=(0, .9), # interval of the pseudotime to cover with the path
)
fig.tight_layout()

fig.savefig(f'{figures}/aczpalantir_art_trajectory.png')
plt.close()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(2,2))
palantir.plot.plot_trajectory(
    endo_adata, # your anndata
    "Venous EC", # the branch to plot
    cell_color="dpt_pseudotime", # the ad.obs colum to color the cells by
    n_arrows=5, # the number of arrow heads along the path
    color='#E35D6A', # the color of the path and arrow heads
    scanpy_kwargs=dict(cmap="viridis",size=size), # arguments passed to scanpy.pl.embedding
    arrowprops=dict(arrowstyle="->,head_length=.25,head_width=.25", lw=2), # appearance of the arrow heads
    lw=2, # thickness of the path
ax=ax
    # pseudotime_interval=(0, .9), # interval of the pseudotime to cover with the path
)
fig.tight_layout()
fig.savefig(f'{figures}/aczpalantir_ven_trajectory.png')
plt.close()

In [None]:
sc.pl.DotPlot(endo_adata,['Cxcl12','Cxcr4','Ackr3'],groupby=['Cell Subtype 2','Vessel size category','Timepoint','Treatment']).add_totals().show()

In [None]:

os.makedirs(figures, exist_ok=True)
sc.set_figure_params(dpi_save=300, fontsize=10, figsize=(1,1))
sc.settings.figdir = figures
sns.set_style('white', rc={
    'xtick.bottom': True,
    'ytick.left': True,
})
plt.rcParams["font.family"] = "Arial"
size=15
for gene in ['Cxcl12','Cxcr4','Ackr3','Esr2','Dkk2','Moxd1']:
    sc.pl.umap(endo_adata,color=[gene],cmap='viridis',size=size,frameon=False,save=f'acz_{gene}.png')
os.makedirs(figures, exist_ok=True)
sc.set_figure_params(dpi_save=300, fontsize=10, figsize=(1.5,1.5))
sc.settings.figdir = figures
sns.set_style('white', rc={
    'xtick.bottom': True,
    'ytick.left': True,
})
plt.rcParams["font.family"] = "Arial"
size=15

In [None]:
ls = []
for x,y in zip(endo_adata.obs['Cell Subtype 2'],endo_adata.obs['Vessel size category']):
    if x =='Cap1':
        ls.append('Cap1')
    else:
        if y =='capillary':
            ls.append(f'Cap1')
            continue
        if x =='Arterial EC':
            x = 'PAEC'
        else:
            x ='PVEC'
        ls.append(f'{x} {y[0].upper()}')
endo_adata.obs['ct_s'] = ls


In [None]:
sc.set_figure_params(dpi_save=300, fontsize=10, figsize=(1.5,1.5))

gene_ls = ['Dkk2','Sox6',
           'Lama3','Esr2',
           'Stc1',
             'Glp1r','Kit','Wif1','Car2',
           'Rarb','Chrm3',
           'Gria3',
           'Ptger3','Moxd1',
             ]
sc.pl.dotplot(endo_adata,gene_ls,groupby='ct_s',cmap='viridis',categories_order=['PAEC L','PAEC M','PAEC S','Cap1','PVEC S','PVEC M', 'PVEC L'],
              standard_scale='var',save='size_axis.png')
sc.pl.umap(endo_adata,color=gene_ls,hspace=0.5,wspace=0.3,cmap='viridis',save='size_axis.png')


In [None]:
gene_ls = ['Ccdc85a','Glp1r','Sema3c',
         'Sox4','Nrp1','Ifitm3',
         'Ptprr','Adgrg6','Foxo1',
         'Mgp','Eln','Nr4a2',
]

sc.pl.MatrixPlot(endo_adata,gene_ls[::-1],groupby='ct_s',categories_order=['PAEC L','PAEC M','PAEC S','Cap1','PVEC S','PVEC M', 'PVEC L'],
              standard_scale='var').style(cmap='viridis').swap_axes().savefig(f'{figures}/matrixplot_size_markers.png',dpi=300,bbox_inches='tight')
sc.pl.DotPlot(endo_adata,gene_ls[::-1],groupby='ct_s',categories_order=['PAEC L','PAEC M','PAEC S','Cap1','PVEC S','PVEC M', 'PVEC L'],
              standard_scale='var').style(cmap='viridis').savefig(f'{figures}/matrixplot_size_markers.png',dpi=300,bbox_inches='tight')