In [None]:
'''
Goal:Find some unique differences in proliferation between VEC and Cap1
'''

In [None]:
import scanpy as sc
import scanpy.external as sce
import os 
import pandas as pd 
import numpy as np
import seaborn as sns
from functions import compare_obs_values_within_groups_to_excel

figures = "data/pilot/240106_venous_ec_proliferation"
os.makedirs(figures, exist_ok=True)
sc.set_figure_params(dpi=300, format="png")
sc.settings.figdir = figures

In [None]:
adata = sc.read('data/single_cell_files/scanpy_files/venous_ec_celltyped.gz.h5ad')
adata

In [None]:
adata.obs.groupby('Library')['Lineage'].value_counts(normalize=True)

In [None]:
sc.pl.embedding(adata[adata.obs['Lineage']=='Endothelial'],
                basis='X_umap_Endothelial',
                color=['Scn7a','Clic4','Nostrin','Arap2','Eln','Mgp','Dkk2','Lingo2','Fabp4','Treatment','Cell Subtype']
               )

In [None]:
sc.pl.umap(adata, color = ['Scgb1a1','Cyp2f2','Cbr2','Hp','Sftpc','Cell Subtype'])

In [None]:
sc.pl.dotplot(adata,['Cbr2','Sftpc'],groupby='Cell Subtype')

In [None]:
pd.set_option('display.max_rows', 500) 

adata.var['ambient_rna_est_contamination'].sort_values(ascending=False).head(500)

In [None]:
vec = sc.read('data/figures/subcluster/Venous EC/Venous EC_adata.gz.h5ad')
sc.pl.umap(vec,color='leiden_Venous EC')

In [None]:

sc.pl.umap(vec,color=['Eln','Nrxn3','Mgp','Cyp1b1'])

In [None]:
sc.pl.umap(vec,color=['Arap2','Nostrin','Scn7a','Cyp4b1'])

In [None]:
sc.pl.umap(vec,color=['Slc6a2','Car8','Mmp16','log1p_n_genes_by_umis','Car4','Gja5','Kitl','Gpihbp1'])

In [None]:
vec = sc.read('data/figures/subcluster/Venous EC/Venous EC_adata.gz.h5ad')
pro_vec = sc.read('data/figures/subcluster/Proliferating Venous EC/Proliferating Venous EC_adata.gz.h5ad')
vec = vec.concatenate(pro_vec)
vec = vec[:,(vec.var['mt']==False)&(vec.var['ribo']==False)&(vec.var['hb']==False)]
vec.obs['Proliferating'] = ['Pro' if x == 'Proliferating Venous EC' else 'Non-pro' for x in vec.obs['Cell Subtype']]
sc.tl.rank_genes_groups(vec,'Proliferating',method='wilcoxon',pts=True)
df_vec = sc.get.rank_genes_groups_df(vec, key="rank_genes_groups", group='Pro')
df_vec.set_index("names",inplace=True)
df_vec["pct_difference"] = df_vec["pct_nz_group"] - df_vec["pct_nz_reference"]
df_vec

In [None]:
cap1 = sc.read('data/figures/subcluster/Cap1/Cap1_adata.gz.h5ad')
pro_cap = sc.read('data/figures/subcluster/Proliferating Cap/Proliferating Cap_adata.gz.h5ad')
sc.pl.umap(pro_cap,color=['leiden_Proliferating Cap','Gja4','Kit','Kitl','Car4','Sirpa','Tbx2','Peg3','Hpgd'])
pro_cap1 = pro_cap[pro_cap.obs['leiden_Proliferating Cap'].isin(['0','1','4'])]
cap1 = cap1.concatenate(pro_cap1)
cap1 = cap1[:,(cap1.var['mt']==False)&(cap1.var['ribo']==False)&(cap1.var['hb']==False)]
cap1.obs['Proliferating'] = ['Pro' if x == 'Proliferating Cap' else 'Non-pro' for x in cap1.obs['Cell Subtype']]
sc.tl.rank_genes_groups(cap1,'Proliferating',method='wilcoxon',pts=True)
df_cap1 = sc.get.rank_genes_groups_df(cap1, key="rank_genes_groups", group='Pro')
df_cap1.set_index("names",inplace=True)
df_cap1["pct_difference"] = df_cap1["pct_nz_group"] - df_cap1["pct_nz_reference"]


In [None]:
proliferating_score = pd.DataFrame(index=df_cap1.index)
proliferating_score['VEC'] = df_vec['scores']
proliferating_score['Cap1'] = df_cap1['scores']
from sklearn.preprocessing import MinMaxScaler

def normalize_dataframe(df):
    # Initialize the MinMaxScaler
    scaler = MinMaxScaler(feature_range=(-25, 50)) # range was picked to be close to Cap1 wilcoxon score
    
    # Fit the scaler on the data and transform each column
    df_normalized = pd.DataFrame(scaler.fit_transform(df), index=df.index,columns=df.columns)
    
    return df_normalized
proliferating_score = normalize_dataframe(proliferating_score)
proliferating_score['difference'] = proliferating_score['VEC'] - proliferating_score['Cap1']
proliferating_score = proliferating_score.sort_values('difference')

In [None]:
with pd.ExcelWriter(
        f"{figures}/proliferation_scores.xlsx", engine = "xlsxwriter"
    ) as writer:
    df_vec.to_excel(writer, sheet_name=f"Venous EC")
    df_cap1.to_excel(writer, sheet_name=f"Cap1")
    proliferating_score.to_excel(writer, sheet_name=f"Wilcoxon_score_normalized"[:31])

In [None]:
sns.scatterplot(data=proliferating_score,x='VEC',y='Cap1',linewidth=0)

In [None]:
sc.pl.dotplot(adata,proliferating_score.head(20).index, groupby='Cell Subtype')

In [None]:
sc.pl.dotplot(adata,proliferating_score.tail(20).index, groupby='Cell Subtype')

# Hyperoxia

In [None]:
degs = pd.read_excel('data/figures/deg/hyperoxia_degs.xlsx',sheet_name=None,index_col=0,header=0)

In [None]:
hyperoxia_score = pd.DataFrame(index=df_cap1.index)
hyperoxia_score['VEC'] = degs['Venous EC']['scores']
hyperoxia_score['Cap1'] = degs['Cap1']['scores']
from sklearn.preprocessing import MinMaxScaler

def normalize_dataframe(df):
    # Initialize the MinMaxScaler
    scaler = MinMaxScaler(feature_range=(-20, 20)) # range picked to match Cap1 wilcoxon score range
    # Fit the scaler on the data and transform each column
    df_normalized = pd.DataFrame(scaler.fit_transform(df), index=df.index,columns=df.columns)
    return df_normalized
hyperoxia_score = normalize_dataframe(hyperoxia_score)
hyperoxia_score['difference'] = hyperoxia_score['VEC'] - hyperoxia_score['Cap1']
hyperoxia_score = hyperoxia_score.sort_values('difference')
hyperoxia_score = hyperoxia_score.loc[(~hyperoxia_score.index.str.startswith('mt'))&(~hyperoxia_score.index.str.startswith('Rps'))&(~hyperoxia_score.index.str.startswith('Rpl'))]
sns.scatterplot(data=hyperoxia_score,x='VEC',y='Cap1',linewidth=0)

In [None]:
with pd.ExcelWriter(
        f"{figures}/hyperoxia_scores.xlsx", engine = "xlsxwriter"
    ) as writer:
    degs['Venous EC'].to_excel(writer, sheet_name=f"Venous EC")
    degs['Cap1'].to_excel(writer, sheet_name=f"Cap1")
    hyperoxia_score.to_excel(writer, sheet_name=f"Wilcoxon_score_normalized"[:31])

In [None]:
sc.pl.dotplot(adata,degs['Venous EC'].head(20).index, groupby='Cell Subtype')

In [None]:
sc.pl.dotplot(adata,degs['Venous EC'].tail(20).index, groupby='Cell Subtype')

In [None]:
vec = sc.read('data/figures/subcluster/Venous EC/Venous EC_adata.gz.h5ad')
vec = vec[~vec.obs['leiden_Venous EC'].isin(['6','7'])]
sc.tl.diffmap(vec)
vec.obsm["X_diffmap"] = vec.obsm["X_diffmap"][:, 1:]
vec.obs['difm1'] = [x[0] for x in vec.obsm["X_diffmap"]]
vec.obs['difm2'] = [x[1] for x in vec.obsm["X_diffmap"]]
vec.obs['umap1'] = [x[0] for x in vec.obsm["X_umap"]]
vec.obs['umap2'] = [x[1] for x in vec.obsm["X_umap"]]

vec.uns['iroot'] = vec.obs.index.get_loc(vec.obs['umap2'].idxmax())
sc.tl.dpt(vec)

sc.pl.umap(vec,color=['dpt_pseudotime'])