In [None]:
'''
Goal:Check lunmap BPD vessel_size_gradient
'''

In [None]:
import scanpy as sc
import scanpy.external as sce
import os 
import pandas as pd 
import numpy as np
import seaborn as sns
from functions import compare_obs_values_within_groups_to_excel
import matplotlib.pyplot as plt
import palantir
# from statannotations.Annotator import Annotator

adata_name='venous_ec'
figures = "data/figures/figures/lungmap"
data = "data/single_cell_files/scanpy_files"

os.makedirs(figures, exist_ok=True)
sc.set_figure_params(dpi_save=300, fontsize=10, figsize=(1.5,1.5))
sc.settings.figdir = figures
sns.set_style('white', rc={
    'xtick.bottom': True,
    'ytick.left': True,
})
plt.rcParams["font.family"] = "Arial"
size=15

In [None]:

data = '/home/carsten/alvira_bioinformatics/data/external_datasets/lungmap_bpd_LMEX0000004400'
human_adata = sc.read(f'{data}/BPD-adata_combined.h5ad')
adata_obs_df = pd.read_csv(f'{data}/BPD_RNA_author-clusters.txt',sep='\t',header=0,index_col=0)
for col in adata_obs_df:
    human_adata.obs[col] = adata_obs_df[col]
sc.pp.calculate_qc_metrics(human_adata,inplace=True)
sc.pp.normalize_total(human_adata,target_sum=1e4)
sc.pp.log1p(human_adata)

In [None]:
human_adata.obs['age full'] = human_adata.obs['age'].astype(str)+ '-'+ human_adata.obs['age-unit'].astype(str)
human_adata.obs['age full'] =  pd.Categorical(human_adata.obs['age full'],categories = ['4-month',
 '5-month',
 '6-month',
 '7-month',
 '8-month',
 '316-day',
 '11-month',
 '12-month',
 '13-month',
 '14-month',
 '15-month',
 '18-month',
 '19-month',
 '20-month',
 '21-month',
 '3-year',
 ])

In [None]:
sc.pl.DotPlot(human_adata,['ACKR1','HDAC9','EMCN','VWF','CPE','PRCP','GJA5','EFNB2','BMX'],groupby='cell type').add_totals().savefig(f'{figures}/dotplot_vec_markers_age_lungmap.png',dpi=300,bbox_inches='tight')

In [None]:
sc.pl.DotPlot(human_adata[human_adata.obs['cell type']=='VEC'],['ACKR1','HDAC9','EMCN','VWF'],title='Venous EC',groupby='age full').add_totals().show()

In [None]:
sc.pl.DotPlot(human_adata[human_adata.obs['cell type']=='VEC'],['ACKR1','HDAC9','EMCN','VWF'],groupby='disease',title='Venous EC in LungMAP').add_totals().show()

In [None]:
human_adata_endo =  human_adata[human_adata.obs['cell type'].isin(['AEC','CAP1','VEC'])]
human_adata_endo.uns['Cell Subtype_colors']= ['#4A90E2','#9B59B6','#E35D6A']
sc.pp.highly_variable_genes(human_adata_endo, batch_key='donor')
sc.pp.pca(human_adata_endo)
sce.pp.harmony_integrate(human_adata_endo, 'sample', adjusted_basis='X_pca',max_iter_harmony=20)
sc.pp.neighbors(human_adata_endo)
sc.tl.leiden(human_adata_endo, resolution=0.5)
sc.tl.rank_genes_groups(human_adata_endo,'leiden',method='wilcoxon')
sc.pl.rank_genes_groups_dotplot(human_adata_endo,dendrogram=False)

human_adata_endo=human_adata_endo[~human_adata_endo.obs['leiden'].isin(['5'])] # high mt
human_adata_endo.obs['Cell Subtype'] = human_adata_endo.obs['leiden'].map({'0':'Cap1','1':'Cap1','2':'Arterial EC','3':'Venous EC','4':'Cap1'})

sc.tl.umap(human_adata_endo,min_dist=0.5)

for color in [
    'Cell Subtype',
    'cell type','donor','disease','leiden','PTPRC','COL1A1','PROX1','APLN','APLNR','GJA5','ACKR1','PLVAP','COL15A1','TBX2','KIT','FBLN5','SCN7A','MECOM','MGP','VWF']:
    sc.pl.umap(human_adata_endo,color=color,use_raw=False)

In [None]:
sc.pl.DotPlot(human_adata_endo,['ACKR1','HDAC9','EMCN','VWF'],groupby='Cell Subtype',title='Vascular endos LungMap').add_totals().show()

In [None]:
sc.tl.rank_genes_groups(human_adata_endo,'Cell Subtype',method='wilcoxon',groups=['Venous EC'],reference='Arterial EC')
sc.pl.rank_genes_groups_dotplot(human_adata_endo)

In [None]:
df = sc.get.rank_genes_groups_df(human_adata_endo,'Venous EC')
df.index = df.names
sc.pl.umap(human_adata_endo,color=df.head(20).index.tolist()+df.tail(20).index.tolist()[::-1],hspace=0.5,wspace=0.5,ncols=5)

In [None]:
sc.pl.dotplot(human_adata_endo,df.head(50).index.tolist()+df.tail(50).index.tolist()[::-1],groupby= 'Cell Subtype')

In [None]:
sc.pl.umap(human_adata_endo,color=['ACKR1','HDAC9','BNC2','IL1R1','DKK2','GJA5','CPAMD8','PDE3A','BMX','PCSK5','EFNB2','Cell Subtype','cell type'],use_raw=False,
          hspace=0.5,save='vec_markers.png')

In [None]:
import palantir
import cellrank as cr
import scvelo as scv

root_ct = 'Cap1'
terminal_cts = ['Arterial EC','Venous EC']
celltype='Cell Subtype'

palantir.utils.run_diffusion_maps(human_adata_endo,
                                           n_components=5)
fig = palantir.plot.plot_diffusion_components(human_adata_endo)[0]
fig.tight_layout()
fig.savefig(f'{figures}/lungmappalantir_diffusion_components.png')
plt.close()
palantir.utils.determine_multiscale_space(human_adata_endo)

palantir.utils.run_magic_imputation(human_adata_endo)
subset = human_adata_endo[human_adata_endo.obs[celltype] == root_ct]
umap1_values = subset.obsm['X_umap'][:, 0]
min_idx = np.argmin(umap1_values)
root_cell = subset.obs_names[min_idx]
terminal_states = []
for ct in terminal_cts:
    subset = human_adata_endo[human_adata_endo.obs[celltype] == ct]
    if ct =='Arterial EC':
        # Get the index (obs_names) of the cell with the min UMAP1 (usually component 0)
        umap1_values = subset.obsm['X_umap'][:, 1]
        max_idx = np.argmax(umap1_values)
        # Return the cell name
        terminal_states.append(subset.obs_names[max_idx])
    else:
        umap1_values = subset.obsm['X_umap'][:, 0]
        max_idx = np.argmax(umap1_values)
        # Return the cell name
        terminal_states.append(subset.obs_names[max_idx])
        
terminal_states = pd.Series(index=terminal_states, data=terminal_cts, dtype='object')

fig = palantir.plot.highlight_cells_on_umap(human_adata_endo, [root_cell]+terminal_states)[0]
fig.tight_layout()
fig.savefig(f'{figures}/lungmappalantir_terminal_cells.png')
plt.close()

palantir.core.run_palantir(
    human_adata_endo, root_cell, num_waypoints=500, terminal_states=terminal_states
)

fig = palantir.plot.plot_palantir_results(human_adata_endo, s=3)
fig.tight_layout()
fig.savefig(f'{figures}/lungmappalantir_results.png')
plt.close()
iroot = human_adata_endo.obs.index.get_loc(root_cell)
human_adata_endo.uns["iroot"] = iroot
sc.tl.dpt(human_adata_endo)

try:
    palantir.presults.select_branch_cells(human_adata_endo, q=.01, eps=.01,pseudo_time_key='dpt_pseudotime')

    fig = palantir.plot.plot_branch_selection(human_adata_endo)
    fig.tight_layout()
    fig.savefig(f'{figures}/lungmappalantir_branch_selection.png')
    plt.close()

except:
    pass

sc.tl.diffmap(human_adata_endo)
scv.pl.scatter(
    human_adata_endo,
    basis="diffmap",
    c=[celltype, iroot],
    legend_loc="right",
    components=["2, 3"],
    show=False,
    save=f'lungmapdiffmap_{celltype}_root_cell.png'
)


sc.pl.embedding(
    human_adata_endo,
    basis="umap",
    color=["dpt_pseudotime", "palantir_pseudotime"],
    color_map="viridis",
    show=False,
    save='_lungmap_pseudotimes.png'
)

palantir.presults.compute_gene_trends(
    human_adata_endo,
    expression_key="MAGIC_imputed_data",
    pseudo_time_key='dpt_pseudotime'
)

pk = cr.kernels.PseudotimeKernel(human_adata_endo, time_key="palantir_pseudotime")
pk.compute_transition_matrix()
pk.plot_projection(basis="umap", color=celltype, recompute=True,legend_loc='right margin',
                         save=f'{figures}/lungmappalantir_pseudotime_stream.png')


In [None]:
import pandas as pd
from scipy.stats import spearmanr

def correlate_genes_with_pseudotime(adata, layer=None, method='spearman',pseudotime='dpt_pseudotime'):
    """
    Correlates all genes with pseudotime in an AnnData object.

    Parameters:
    - adata: AnnData object with pseudotime in `adata.obs['pseudotime']`
    - layer: (Optional) Layer to use instead of adata.X (e.g., 'log1p', 'counts')
    - method: Correlation method, either 'spearman' (default) or 'pearson'

    Returns:
    - pandas DataFrame with genes as index and columns: ['correlation', 'pval']
    """
    if pseudotime not in adata.obs:
        raise ValueError("Pseudotime must be stored in adata.obs['pseudotime'].")

    # Get expression matrix
    X = adata.X if layer is None else adata.layers[layer]
    if not isinstance(X, pd.DataFrame):
        X = pd.DataFrame(X.toarray() if hasattr(X, "toarray") else X,
                         index=adata.obs_names, columns=adata.var_names)

    # Extract pseudotime
    pseudotime = adata.obs[pseudotime]

    # Run correlation
    results = []
    for gene in X.columns:
        if method == 'spearman':
            corr, pval = spearmanr(X[gene], pseudotime)
        elif method == 'pearson':
            corr, pval = X[gene].corr(pseudotime), None  # Pearson p-value not computed here
        else:
            raise ValueError("Method must be 'spearman' or 'pearson'.")
        results.append((gene, corr, pval))

    result_df = pd.DataFrame(results, columns=['gene', 'correlation', 'pval']).set_index('gene')
    return result_df.sort_values('correlation', ascending=False)

In [None]:
corr_dfs = {}
for ct in ['Arterial EC','Venous EC']:
    # ct_adata = human_adata_endo[human_adata_endo.obsm['branch_masks'][ct]]    
    ct_adata = human_adata_endo[human_adata_endo.obs['Cell Subtype']==ct]

    df = correlate_genes_with_pseudotime(ct_adata,method='pearson',pseudotime='palantir_pseudotime')
    corr_dfs[ct]=df.dropna(how='all')

In [None]:
human_adata_endo.obsm['branch_masks'][ct]

In [None]:
corr_dfs['Arterial EC']

In [None]:
corr_dfs['Venous EC']

In [None]:
top_n_genes=50
arterial_large_genes = corr_dfs['Arterial EC'].head(top_n_genes).index.tolist()
venous_large_genes = corr_dfs['Venous EC'].head(top_n_genes).index.tolist()
arterial_small_genes = corr_dfs['Arterial EC'].tail(top_n_genes).index.tolist()[::-1]
venous_small_genes = corr_dfs['Venous EC'].tail(top_n_genes).index.tolist()[::-1]

In [None]:
import matplotlib.pyplot as plt
from matplotlib_venn import venn2,venn3

# Define your lists


# Create the Venn diagram
venn = venn2([set(arterial_large_genes), set(venous_large_genes)], 
             set_labels=('Arterial', 'Venous'), 
             set_colors=('#4A90E2', '#E35D6A'), 
             alpha=0.7)

# Optional: Customize font size
for text in venn.set_labels:
    text.set_fontsize(12)
for text in venn.subset_labels:
    if text:
        text.set_fontsize(12)

# Show the plot
plt.title("Top 50 genes positively correlated with pseudotime")
plt.savefig(f'{figures}/lungmapvenn_diagram_large.png',dpi=300,bbox_inches='tight')
plt.close()

In [None]:
import matplotlib.pyplot as plt
from matplotlib_venn import venn2

# Define your lists


# Create the Venn diagram
venn = venn2([set(arterial_small_genes), set(venous_small_genes)], 
             set_labels=('Arterial', 'Venous'), 
             set_colors=('#4A90E2', '#E35D6A'), 
             alpha=0.7)

# Optional: Customize font size
for text in venn.set_labels:
    text.set_fontsize(12)
for text in venn.subset_labels:
    if text:
        text.set_fontsize(12)

# Show the plot
plt.title("Top 50 genes negatively correlated with pseudotime")
plt.savefig(f'{figures}/lungmapvenn_diagram_small.png',dpi=300,bbox_inches='tight')
plt.close()

In [None]:
large_genes = [x for x in arterial_large_genes if x in venous_large_genes]
small_genes = [x for x in arterial_small_genes if x in venous_small_genes]
sc.tl.score_genes(human_adata_endo,large_genes,score_name='large_score')
sc.tl.score_genes(human_adata_endo,small_genes,score_name='small_score')
human_adata_endo.obs['Vessel size score'] = human_adata_endo.obs['large_score'] - human_adata_endo.obs['small_score']

human_adata_endo.obs['disease'] = pd.Categorical(human_adata_endo.obs['disease'],categories=['control','aeBPD','eBPD','hBPD'])
human_adata_endo.uns['disease_colors'] =['#66C2A5', '#FC8D62',  '#E78AC3','#8DA0CB',]
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
def normalize_dataframe(df):
    # Initialize the MinMaxScaler
    scaler = MinMaxScaler(feature_range=(-10, 10))
    # Fit the scaler on the data and transform each column
    df_normalized = pd.DataFrame(scaler.fit_transform(df), index=df.index, columns=df.columns)
    return df_normalized
human_adata_endo.obs['Vessel size score'] = scaler.fit_transform(human_adata_endo.obs[['Vessel size score']])
human_adata_endo.obs['Vessel size category'] = pd.cut(human_adata_endo.obs['Vessel size score'], bins=4,labels=['capillary','small','medium','large'])
sc.pl.umap(human_adata_endo,color=['Vessel size score'],cmap='Oranges',size=size,frameon=False,save='_lungmapvessel_size_score.png')
sc.pl.umap(human_adata_endo,color=['Vessel size category'],cmap='viridis',size=size,frameon=False,save='_lungmapvessel_size_category.png')
sc.pl.umap(human_adata_endo,color=['Cell Subtype'],cmap='viridis',size=size,legend_loc='on data',legend_fontsize=10, legend_fontoutline=1,frameon=False,save='lungmapcellsubtype.png')
sc.pl.umap(human_adata_endo,color=['MGP'],cmap='viridis',size=size,frameon=False,save='lungmapmgp.png')
sc.pl.umap(human_adata_endo,color=['COL4A1'],cmap='viridis',size=size,frameon=False,save='lungmapcol4a1.png')
sc.pl.umap(human_adata_endo,color=['COL4A2'],cmap='viridis',size=size,frameon=False,save='lungmapcol4a2.png')
sc.pl.umap(human_adata_endo,color=['ELN'],cmap='viridis',size=size,frameon=False,save='lungmapeln.png')
sc.pl.umap(human_adata_endo,color=['disease'],cmap='viridis',size=size,frameon=False,save='lungmapdisease.png')
sc.pl.umap(human_adata_endo.copy(),color=['Vessel size score'],mask_obs=human_adata_endo.obs['Cell Subtype'].isin(['Arterial EC','Venous EC']),cmap='Oranges',size=size,frameon=False,save='_lungmapvessel_size_score_large.png')
sc.pl.umap(human_adata_endo.copy(),color=['disease'],na_in_legend=False,mask_obs=human_adata_endo.obs['Cell Subtype'].isin(['Arterial EC','Venous EC']),cmap='Oranges',size=size,frameon=False,save='_lungmapvessel_disease_large.png')


In [None]:
sc.pl.umap(human_adata_endo,color = large_genes + small_genes,cmap='viridis',hspace=0.5,save='lungmap_allsize.png')

In [None]:
fig, ax = plt.subplots(1,1,figsize=(2,2))
palantir.plot.plot_trajectory(
    human_adata_endo, # your anndata
    "Arterial EC", # the branch to plot
    cell_color="dpt_pseudotime", # the ad.obs colum to color the cells by
    n_arrows=5, # the number of arrow heads along the path
    color='#4A90E2', # the color of the path and arrow heads
    scanpy_kwargs=dict(cmap="viridis",size=size), # arguments passed to scanpy.pl.embedding
    arrowprops=dict(arrowstyle="->,head_length=.25,head_width=.25", lw=2), # appearance of the arrow heads
    lw=2, # thickness of the path
ax=ax
    # pseudotime_interval=(0, .9), # interval of the pseudotime to cover with the path
)
fig.tight_layout()

fig.savefig(f'{figures}/lungmappalantir_art_trajectory.png')
plt.close()

In [None]:
fig, ax = plt.subplots(1,1,figsize=(2,2))
palantir.plot.plot_trajectory(
    human_adata_endo, # your anndata
    "Venous EC", # the branch to plot
    cell_color="dpt_pseudotime", # the ad.obs colum to color the cells by
    n_arrows=5, # the number of arrow heads along the path
    color='#E35D6A', # the color of the path and arrow heads
    scanpy_kwargs=dict(cmap="viridis",size=size), # arguments passed to scanpy.pl.embedding
    arrowprops=dict(arrowstyle="->,head_length=.25,head_width=.25", lw=2), # appearance of the arrow heads
    lw=2, # thickness of the path
ax=ax
    # pseudotime_interval=(0, .9), # interval of the pseudotime to cover with the path
)
fig.tight_layout()
fig.savefig(f'{figures}/lungmappalantir_ven_trajectory.png')
plt.close()

In [None]:
human_adata_endo.write(f'{figures}/vessel_size.gz.h5ad',compression='gzip')

In [None]:
dis_tp_dict = {'aeBPD':[['316-day','4-month', '8-month','12-month', '13-month','5-month', '6-month', '7-month', '11-month', '14-month'], '#FC8D62',],  
               'eBPD':[['15-month', '18-month', '19-month', '21-month','14-month', '15-month','20-month', '21-month'],'#E78AC3'],
               'hBPD':[['3-year'],'#8DA0CB'],}
human_adata_endo.obs['age full'] = human_adata_endo.obs['age'].astype(str)+ '-'+ human_adata_endo.obs['age-unit'].astype(str)
human_adata_endo.obs['age full'] =  pd.Categorical(human_adata_endo.obs['age full'],categories = ['316-day','4-month',
 '5-month',
 '6-month',
 '7-month',
 '8-month'
 '316-day',
 '11-month',
 '12-month',
 '13-month',
 '14-month',
 '15-month',
 '18-month',
 '19-month',
 '20-month',
 '21-month',
 '3-year',
 ])
for ct in ['Arterial EC','Venous EC']:
    df = sc.get.obs_df(human_adata_endo,['disease','Cell Subtype','Vessel size score','age full','donor'])
    df = df.loc[df['Cell Subtype'] == ct]
    for dis in ['aeBPD','eBPD','hBPD']:
        tps = dis_tp_dict[dis][0]
        color = dis_tp_dict[dis][1]
        df_dis = df.loc[df['disease'].isin(['control',dis])]
        df_dis = df_dis.loc[df_dis['age full'].isin(tps)]
        fig, ax = plt.subplots(1, figsize=(3,2),sharey=True)
        hue_order = ['control',dis]
        palette =['#66C2A5', color]
    
        ax = sns.kdeplot(
            data=df_dis,
            x="Vessel size score",
            hue='disease',
            hue_order=hue_order,
            palette=palette,
            common_norm=False,
            # stat='probability',
            # element='poly',
            # fill=False,
            # common_norm=False,
            # bins=20,
        )
    
        ax.set_title(f'{ct}\nVessel size distribution')
        ax.set_ylabel('Proportion')
        # ax.set_ylim([0,0.1])
        # ax.set_xlim([0.2,1])
        # ax.set_xticklabels([])
        # ax.set_xlabel('')
        # ax.get_legend().remove()
        
        # fig.supxlabel('Vessel size score\n0-1', y=0.15, x=0.52)
        sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
        fig.tight_layout()
        fig.savefig(f'{figures}/histplot_vessel_size_disease_{ct}_{dis}.png', dpi=300, bbox_inches='tight')
        plt.close()

In [None]:
from scipy.stats import mannwhitneyu,ttest_ind
from scipy.stats import ks_2samp
import itertools

dis_tp_dict = {'aeBPD':[['316-day','4-month', '8-month','12-month', '13-month','5-month', '6-month', '7-month', '11-month', '14-month'], '#FC8D62',],  
               'eBPD':[['15-month', '18-month', '19-month', '21-month','14-month', '15-month','20-month', '21-month'],'#E78AC3'],
               'hBPD':[['3-year'],'#8DA0CB'],
              'control':[[],'#66C2A5']}
human_adata_endo.obs['age full'] = human_adata_endo.obs['age'].astype(str)+ '-'+ human_adata_endo.obs['age-unit'].astype(str)
human_adata_endo.obs['age full'] =  pd.Categorical(human_adata_endo.obs['age full'],categories = ['316-day','4-month',
 '5-month',
 '6-month',
 '7-month',
 '8-month'
 '316-day',
 '11-month',
 '12-month',
 '13-month',
 '14-month',
 '15-month',
 '18-month',
 '19-month',
 '20-month',
 '21-month',
 '3-year',
 ])
for ct in ['Arterial EC','Venous EC']:
    df = sc.get.obs_df(human_adata_endo,['disease','Cell Subtype','Vessel size score','age full','donor'])
    df = df.loc[df['Cell Subtype'] == ct]
    for dis in ['aeBPD','eBPD','hBPD']:
        tps = dis_tp_dict[dis][0]
        df_dis = df.loc[df['disease'].isin(['control',dis])]
        df_dis = df_dis.loc[df_dis['age full'].isin(tps)]
        df_dis['donor'] = df_dis['donor'].cat.remove_unused_categories()
        # Prepare replicates
        grouped = df.groupby(['disease', 'donor'])
        rep_distributions = {(g, r): vals['Vessel size score'].values for (g, r), vals in grouped}
        
        # Get replicate IDs by group
        a_reps = [r for (g, r) in rep_distributions if g == 'control']
        b_reps = [r for (g, r) in rep_distributions if g == dis]
        
        # Run KS tests for all A vs B combinations
        results = []
        for a_r, b_r in itertools.product(a_reps, b_reps):
            a_vals = rep_distributions[('control', a_r)]
            b_vals = rep_distributions[(dis, b_r)]
            stat, pval = ks_2samp(a_vals, b_vals)
            results.append({'conrol_replicate': a_r, f'{dis}_replicate': b_r, 'D': stat, 'pval': pval})
        
        ks_results = pd.DataFrame(results)

        print(ct)
        print(dis)
        # Optional summary
        print("Mean D-statistic:", ks_results['D'].mean())
        print("Proportion of p < 0.05:", (ks_results['pval'] < 0.05).mean())
        print('')
        # df_dis['donor'] = pd.Categorical(df_dis['donor'])
        fig, ax = plt.subplots(1, figsize=(3,2),sharey=True)
        palette =['#66C2A5', color]
        donor_dis_dict = df_dis.set_index('donor')['disease'].to_dict()
        palette = [dis_tp_dict[donor_dis_dict[x]][1] for x in sorted(df_dis['donor'].unique())]
        ax = sns.kdeplot(
            data=df_dis,
            x="Vessel size score",
            hue='donor',
            # hue_order=hue_order,
            palette=palette,
            common_norm=False,
            # stat='probability',
            # element='poly',
            # fill=False,
            # common_norm=False,
            # bins=20,
        )
    
        ax.set_title(f'{ct}\nVessel size distribution')
        ax.set_ylabel('Proportion')
        # ax.set_ylim([0,0.1])
        # ax.set_xlim([0.2,1])
        # ax.set_xticklabels([])
        # ax.set_xlabel('')
        # ax.get_legend().remove()
        
        # fig.supxlabel('Vessel size score\n0-1', y=0.15, x=0.52)
        sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
        fig.tight_layout()
        fig.savefig(f'{figures}/histplot_vessel_size_disease_{ct}_{dis}_by_donor.png', dpi=300, bbox_inches='tight')
        plt.close()

In [None]:

for ct in ['Arterial EC','Venous EC']:
    df = sc.get.obs_df(human_adata_endo,['disease','donor','Cell Subtype','Vessel size score'])
    df = df.loc[df['Cell Subtype'] == ct]
    fig, ax = plt.subplots(1, figsize=(3,2),sharey=True)
    hue_order = ['control','aeBPD','eBPD','hBPD']
    palette =['#66C2A5', '#FC8D62',  '#E78AC3','#8DA0CB',]

    ax = sns.kdeplot(
        data=df,
        x="Vessel size score",
        hue='disease',
        hue_order=hue_order,
        palette=palette,
        common_norm=False,
        # stat='probability',
        # element='poly',
        # fill=False,
        # common_norm=False,
        # bins=20,
    )

    ax.set_title(f'{ct}\nVessel size distribution')
    ax.set_ylabel('Proportion')
    # ax.set_ylim([0,0.1])
    # ax.set_xlim([0.2,1])
    # ax.set_xticklabels([])
    # ax.set_xlabel('')
    # ax.get_legend().remove()
    
    # fig.supxlabel('Vessel size score\n0-1', y=0.15, x=0.52)
    sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
    fig.tight_layout()
    fig.savefig(f'{figures}/histplot_vessel_size_disease_{ct}.png', dpi=300, bbox_inches='tight')
    plt.close()


In [None]:
for disease in human_adata_endo.obs['disease'].cat.categories:
    sc.pl.umap(human_adata_endo,color=['disease'],groups=[disease])