In [None]:
'''
Goal:Find some unique differences in proliferation between VEC and Cap1
'''

In [None]:
import scanpy as sc
import scanpy.external as sce
import os 
import pandas as pd 
import numpy as np
import seaborn as sns
from functions import compare_obs_values_within_groups_to_excel
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

figures = "data/pilot/240106_venous_ec_proliferation"
os.makedirs(figures, exist_ok=True)
sc.set_figure_params(dpi=150, format="png")
sc.settings.figdir = figures

In [None]:
adata=sc.read('data/single_cell_files/scanpy_files/venous_ec_celltyped_no_cc.gz.h5ad')

In [None]:
adata.var['ambient_rna_est_contamination_Bst1_HyOx_P3'].loc[['Cbr2','Sftpd']]

In [None]:
adata.var['ambient_rna_est_contamination_Bst1_Nox_P3'].loc[['Cbr2','Sftpd']]

In [None]:
adata.obs['Library'].value_counts()

In [None]:
degs = pd.read_excel('data/figures/deg_no_cc/hyperoxia_degs.xlsx',sheet_name='Venous EC',index_col=0,header=0)
degs['ambient_rna_est_contamination_Bst1_HyOx_P3'] = adata.var['ambient_rna_est_contamination_Bst1_HyOx_P3']
degs['ambient_rna_est_contamination_Bst1_Nox_P3'] = adata.var['ambient_rna_est_contamination_Bst1_Nox_P3']
degs['ambient_rna_difference'] = degs['ambient_rna_est_contamination_Bst1_HyOx_P3'] -degs['ambient_rna_est_contamination_Bst1_Nox_P3']
degs = degs.sort_values('ambient_rna_difference',ascending=False)
degs_sig = degs.loc[(abs(degs['logfoldchanges'])>1)&(degs['pvals_adj']<=0.05)]
degs_sig.to_csv(f'{figures}/ambient_rna_filtered_vec_sig_deg_no_cc.csv')
degs_sig

In [None]:
top_ambient_genes.tolist()

In [None]:
sc.pl.dotplot(adata,['Cdkn1a','Btg2','Mki67','Top2a'],groupby=['Cell Subtype','Treatment'])

In [None]:
vec_size_comp = pd.read_excel(f'data/figures/subcluster_no_cc/Venous EC/leiden_Venous EC_comparisons.xlsx',sheet_name='3 v 4',index_col=0)
art_size_comp = pd.read_excel(f'data/figures/subcluster_no_cc/Arterial EC/leiden_Arterial EC_comparisons.xlsx',sheet_name='0 v 4',index_col=0)
size_score = vec_size_comp['scores'] + -art_size_comp['scores'] # comparisons are in opposite directions
size_score = size_score.sort_values()
print(size_score.head(20))
print(size_score.tail(20))
large_genes = size_score.tail(10).index
small_genes = size_score.head(10).index

In [None]:
art_size_comp

In [None]:
vec_prolif_comp = pd.read_excel(f'data/figures/subcluster_no_cc/Venous EC/leiden_Venous EC_comparisons.xlsx',sheet_name='0 v 2',index_col=0)
prolif_genes = vec_prolif_comp.tail(10).index

In [None]:
endo_adata = adata[adata.obs['Lineage']=='Endothelial']
sc.pl.embedding(endo_adata,
                basis='X_umap_Endothelial',
                color='Cell Subtype')
sc.pl.embedding(endo_adata,
                basis='X_umap_Endothelial',
                color=large_genes)
sc.pl.embedding(endo_adata,
                basis='X_umap_Endothelial',
                color=small_genes)
sc.pl.embedding(endo_adata,
                basis='X_umap_Endothelial',
                color=prolif_genes)

In [None]:
# for ct in adata[adata.obs['Lineage']=='Endothelial'].obs['Cell Subtype'].cat.categories:
for ct in ['Arterial EC','Venous EC']:
    ct_adata = sc.read(f'data/figures/subcluster_no_cc/{ct}/{ct}_adata.gz.h5ad')
    sc.pl.umap(ct_adata,color=f'leiden_{ct}')
    sc.pl.umap(ct_adata,color=prolif_genes)
    sc.pl.umap(ct_adata,color=large_genes)
    sc.pl.umap(ct_adata,color=small_genes)
    sc.tl.score_genes(ct_adata,large_genes,score_name='large_score')
    sc.tl.score_genes(ct_adata,small_genes,score_name='small_score')
    sc.tl.score_genes(ct_adata,prolif_genes,score_name='prolif_score')
    ct_adata.obs['size_score'] = ct_adata.obs['large_score'] - ct_adata.obs['small_score']
    for x in ['size_score','prolif_score']:
        ct_adata.obs[x] = scaler.fit_transform(ct_adata.obs[[x]])
    sc.pl.violin(ct_adata,'prolif_score',groupby=f'leiden_{ct}')
    sc.pl.violin(ct_adata,'size_score',groupby=f'leiden_{ct}')

    # ct_adata.obs['prolif_score'] = ct_adata[:,vec_prolif_comp.tail(10).index].X.sum(axis=1)
    sc.pl.umap(ct_adata,color='small_score')
    sc.pl.umap(ct_adata,color='large_score')
    sc.pl.umap(ct_adata,color='size_score')
    sc.pl.umap(ct_adata,color='prolif_score')    
    df = ct_adata.obs[['size_score','prolif_score','Treatment']].copy()
    df['size_score_bins'] = pd.cut(df['size_score'], bins=3)
    # df['size_score_bins'] = pd.qcut(df['size_score'],3)
    
    # Use barplot
    ax = sns.barplot(data=df, x='size_score_bins', y='prolif_score',ci=None)
    plt.xticks(rotation=45)
    plt.show()
    plt.close()
    
    ax = sns.barplot(data=df, x='size_score_bins', y='prolif_score', hue='Treatment',hue_order=['Normoxia','Hyperoxia'],ci=None)
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, 1))
    plt.xticks(rotation=45)
    plt.show()
    plt.close()


    ax = sns.violinplot(data=df, x='size_score_bins', y='prolif_score',cut=0)    
    plt.xticks(rotation=45)
    plt.show()
    plt.close()

    ax = sns.violinplot(data=df, x='size_score_bins', y='prolif_score', hue='Treatment',hue_order=['Normoxia','Hyperoxia'],cut=0)
    sns.move_legend(ax, "upper right", bbox_to_anchor=(1.5, 1))
    plt.xticks(rotation=45)
    plt.show()
    plt.close()


In [None]:
sc.pl.dotplot(adata,['Cbr2','Sftpc'],groupby='Cell Subtype')

In [None]:
vec = sc.read('data/figures/subcluster/Venous EC/Venous EC_adata.gz.h5ad')
pro_vec = sc.read('data/figures/subcluster/Proliferating Venous EC/Proliferating Venous EC_adata.gz.h5ad')
vec = vec.concatenate(pro_vec)
vec = vec[:,(vec.var['mt']==False)&(vec.var['ribo']==False)&(vec.var['hb']==False)]
vec.obs['Proliferating'] = ['Pro' if x == 'Proliferating Venous EC' else 'Non-pro' for x in vec.obs['Cell Subtype']]
sc.tl.rank_genes_groups(vec,'Proliferating',method='wilcoxon',pts=True)
df_vec = sc.get.rank_genes_groups_df(vec, key="rank_genes_groups", group='Pro')
df_vec.set_index("names",inplace=True)
df_vec["pct_difference"] = df_vec["pct_nz_group"] - df_vec["pct_nz_reference"]
df_vec

In [None]:
cap1 = sc.read('data/figures/subcluster/Cap1/Cap1_adata.gz.h5ad')
pro_cap = sc.read('data/figures/subcluster/Proliferating Cap/Proliferating Cap_adata.gz.h5ad')
sc.pl.umap(pro_cap,color=['leiden_Proliferating Cap','Gja4','Kit','Kitl','Car4','Sirpa','Tbx2','Car8','Peg3','Hpgd'])
pro_cap1 = pro_cap[pro_cap.obs['leiden_Proliferating Cap'].isin(['1','2','3','4'])]
cap1 = cap1.concatenate(pro_cap1)
cap1 = cap1[:,(cap1.var['mt']==False)&(cap1.var['ribo']==False)&(cap1.var['hb']==False)]
cap1.obs['Proliferating'] = ['Pro' if x == 'Proliferating Cap' else 'Non-pro' for x in cap1.obs['Cell Subtype']]
sc.tl.rank_genes_groups(cap1,'Proliferating',method='wilcoxon',pts=True)
df_cap1 = sc.get.rank_genes_groups_df(cap1, key="rank_genes_groups", group='Pro')
df_cap1.set_index("names",inplace=True)
df_cap1["pct_difference"] = df_cap1["pct_nz_group"] - df_cap1["pct_nz_reference"]


In [None]:
proliferating_score = pd.DataFrame(index=df_cap1.index)
proliferating_score['VEC'] = df_vec['scores']
proliferating_score['Cap1'] = df_cap1['scores']
from sklearn.preprocessing import MinMaxScaler

def normalize_dataframe(df):
    # Initialize the MinMaxScaler
    scaler = MinMaxScaler(feature_range=(-25, 50)) # range was picked to be close to Cap1 wilcoxon score
    
    # Fit the scaler on the data and transform each column
    df_normalized = pd.DataFrame(scaler.fit_transform(df), index=df.index,columns=df.columns)
    
    return df_normalized
proliferating_score = normalize_dataframe(proliferating_score)
proliferating_score['difference'] = proliferating_score['VEC'] - proliferating_score['Cap1']
proliferating_score = proliferating_score.sort_values('difference')

In [None]:
with pd.ExcelWriter(
        f"{figures}/proliferation_scores.xlsx", engine = "xlsxwriter"
    ) as writer:
    df_vec.to_excel(writer, sheet_name=f"Venous EC")
    df_cap1.to_excel(writer, sheet_name=f"Cap1")
    proliferating_score.to_excel(writer, sheet_name=f"Wilcoxon_score_normalized"[:31])

In [None]:
sns.scatterplot(data=proliferating_score,x='VEC',y='Cap1',linewidth=0)

In [None]:
sc.pl.dotplot(adata,proliferating_score.head(20).index, groupby='Cell Subtype')

In [None]:
sc.pl.dotplot(adata,proliferating_score.tail(20).index, groupby='Cell Subtype')

# Hyperoxia

In [None]:
degs = pd.read_excel('data/figures/deg/hyperoxia_degs.xlsx',sheet_name=None,index_col=0,header=0)

In [None]:
hyperoxia_score = pd.DataFrame(index=df_cap1.index)
hyperoxia_score['VEC'] = degs['Venous EC']['scores']
hyperoxia_score['Cap1'] = degs['Cap1']['scores']
from sklearn.preprocessing import MinMaxScaler

def normalize_dataframe(df):
    # Initialize the MinMaxScaler
    scaler = MinMaxScaler(feature_range=(-20, 20)) # range picked to match Cap1 wilcoxon score range
    # Fit the scaler on the data and transform each column
    df_normalized = pd.DataFrame(scaler.fit_transform(df), index=df.index,columns=df.columns)
    return df_normalized
hyperoxia_score = normalize_dataframe(hyperoxia_score)
hyperoxia_score['difference'] = hyperoxia_score['VEC'] - hyperoxia_score['Cap1']
hyperoxia_score = hyperoxia_score.sort_values('difference')
hyperoxia_score = hyperoxia_score.loc[(~hyperoxia_score.index.str.startswith('mt'))&(~hyperoxia_score.index.str.startswith('Rps'))&(~hyperoxia_score.index.str.startswith('Rpl'))]
sns.scatterplot(data=hyperoxia_score,x='VEC',y='Cap1',linewidth=0)

In [None]:
with pd.ExcelWriter(
        f"{figures}/hyperoxia_scores.xlsx", engine = "xlsxwriter"
    ) as writer:
    degs['Venous EC'].to_excel(writer, sheet_name=f"Venous EC")
    degs['Cap1'].to_excel(writer, sheet_name=f"Cap1")
    hyperoxia_score.to_excel(writer, sheet_name=f"Wilcoxon_score_normalized"[:31])

In [None]:
sc.pl.dotplot(adata,degs['Venous EC'].head(20).index, groupby='Cell Subtype')

In [None]:
sc.pl.dotplot(adata,['Scn7a','Eln','Mgp'], groupby='Cell Subtype')

In [None]:
sc.pl.dotplot(adata,degs['Venous EC'].tail(20).index, groupby='Cell Subtype')

In [None]:
vec = sc.read('data/figures/subcluster/Venous EC/Venous EC_adata.gz.h5ad')
vec = vec[~vec.obs['leiden_Venous EC'].isin(['6','7'])]
sc.tl.diffmap(vec)
vec.obsm["X_diffmap"] = vec.obsm["X_diffmap"][:, 1:]
vec.obs['difm1'] = [x[0] for x in vec.obsm["X_diffmap"]]
vec.obs['difm2'] = [x[1] for x in vec.obsm["X_diffmap"]]
vec.obs['umap1'] = [x[0] for x in vec.obsm["X_umap"]]
vec.obs['umap2'] = [x[1] for x in vec.obsm["X_umap"]]

vec.uns['iroot'] = vec.obs.index.get_loc(vec.obs['umap2'].idxmax())
sc.tl.dpt(vec)

sc.pl.umap(vec,color=['dpt_pseudotime'])