In [None]:
'''
Goal:Daina questions on snRNA
Author:Carsten Knutsen
Date:231114
conda_env:uterus_sc
Notes: Create figures for Daiana
'''

In [None]:
import scanpy as sc

# Only needed for processing
import numpy as np
import pandas as pd
import os 
pd.set_option('display.max_rows', 500)
import matplotlib.pyplot as plt
import seaborn as sns
output = '/home/carsten/alvira_bioinformatics/uterus/data/pilot/231114_snRNA_daiana_plan'
os.makedirs(output, exist_ok=True)
sc.set_figure_params(dpi_save = 300, fontsize=10)
sc.settings.figdir = output
plt.rcParams["font.family"] = "Arial"

In [None]:
adata = sc.read('/home/carsten/alvira_bioinformatics/uterus/data/single_cell_files/scanpy_files/uterus_processed_celltyped.gz.h5ad')

#### 1) Cell Type Identification:
#### Please, could you generate a t-SNE or UMAP plot based on the snRNA-seq data to visualize the clustering of cells and the sub-clusters

In [None]:
sc.pl.umap(adata, color = 'Cell Subtype', alpha=0.5, save='_Cell Subtype.png')
sc.pl.umap(adata, color = 'GroupContract',title='Group', alpha=0.5, save='_Group.png')
for group in adata.obs['GroupContract'].cat.categories:
    sc.pl.umap(adata, color = 'GroupContract',title='Group',groups=group, alpha=0.5, save=f'_group_{group}.png')

#### 2) Gene Expression Heatmaps:
#### Is it possible to create heatmaps displaying the expression levels of specific gene expressions across different cell clusters to identify potential subpopulations with distinct expression profiles?


In [None]:
'''TRPV4: Transient Receptor Potential Vanilloid 4
OXTR: Oxytocin Receptor
RYR2: Ryanodine Receptor 2
ARRb1: Beta-Arrestin 1
ARRb2: Beta-Arrestin 2
AIP4: Atrophin-Interacting Protein 4 (ITCH - E3 ubiquitin-protein ligase Itchy homolog)
IP3: Inositol 1,4,5-Trisphosphate
ITPR1: Inositol 1,4,5-Trisphosphate Receptor Type 1
ESR1: Estrogen Receptor 1

nNOS: Neuronal Nitric Oxide Synthase
iNOS: Inducible Nitric Oxide Synthase
eNOS: Endothelial Nitric Oxide Synthase

TLR1: Toll-like receptors 1
TLR2: Toll-like receptors 2
TLR3: Toll-like receptors 3
NFκB: Nuclear Factor Kappa B
TNF-alpha: Tumor Necrosis Factor Alpha

VEGFA: Vascular Endothelial Growth Factor A
VEGFR: Vascular Endothelial Growth Factor Receptors
FGF2: Fibroblast Growth Factor 2 
ANGPT1: Angiopoietin 1
ANGPT2: Angiopoietin 2
NOTCH1: Notch Receptor 
PDGFB: Platelet-Derived Growth Factor B
NRP1: Neuropilin 1'''

In [None]:
genes = ['TRPV4',
         'OXTR',
         'RYR2',
         'ARRB1',
         'ARRB2',
         'ITCH',#'AIP4',
         'ITPR3',#'IP3',
         'ITPR1',
         'ESR1',
         'NOS1',
         'NOS2',
         'NOS3',
         'TLR1',
         'TLR2',
         'TLR3',
         'NFKB1',
         'NFKB2',
         'RELA',
         'RELB',
         'REL',
         'TNF',
         'VEGFA',
         'VEGFC',
         'FLT1',
         'FLT4',
         'KDR',
         'FGF2',
         'ANGPT1',
         'ANGPT2',
         'NOTCH1',
         'PDGFB',
         'NRP1',#'NPP1'
        ]
sc.pl.dotplot(adata, genes,groupby='Cell Subtype',standard_scale='var',dendrogram=True)

In [None]:
genes = ['TRPV4',
         'OXTR',
         'RYR2',
         'ARRB1',
         'ARRB2',
         'ITCH',#'AIP4',
         'ITPR3',#'IP3',
         'ITPR1',
         'ESR1',
         'NOS1',
         'NOS2',
         'NOS3',
         'TLR1',
         'TLR2',
         'TLR3',
         'NFKB1',
         'NFKB2',
         'RELA',
         'RELB',
         'REL',
         'TNF',
         'VEGFA',
         'VEGFC',
         'FLT1',
         'FLT4',
         'KDR',
         'FGF2',
         'ANGPT1',
         'ANGPT2',
         'NOTCH1',
         'PDGFB',
         'NRP1',#'NPP1'
        ]


In [None]:
df = sc.get.obs_df(adata, genes+['Cell Subtype'])
df2 = df.groupby('Cell Subtype')[df.columns.tolist()[:-1]].mean()
cg = sns.clustermap(df2, square=True,standard_scale=1)


In [None]:
labels = cg.ax_heatmap.xaxis.get_majorticklabels()
genes_sorted = [x.get_text() for x in labels]
sc.pl.dotplot(adata, genes_sorted,groupby='Cell Subtype',standard_scale='var',dendrogram=True, save = 'question2_gene_expression_sorted.png')
sc.pl.matrixplot(adata, genes_sorted,groupby='Cell Subtype',standard_scale='var',dendrogram=True, save = 'question2_gene_expression_sorted.png')
sc.pl.dotplot(adata, genes,groupby='Cell Subtype',standard_scale='var',dendrogram=False, save = 'question2_gene_expression_sorted.png')
sc.pl.matrixplot(adata, genes,groupby='Cell Subtype',standard_scale='var',dendrogram=False, save = 'question2_gene_expression_sorted.png')

In [None]:
for gene in genes:
    sc.pl.umap(adata, color=gene,alpha=0.3,save=f'_{gene}.png')

#### 3) Differential Gene Expression Analysis:
#### Please, could you perform a differential gene expression analysis between relevant cell clusters, focusing on genes associated with
    • uterine contractility and calcium signaling
    • lipid metabolism
    • angiogenesis pathway
    • NO production
#### And generate volcano plots or bar graphs to highlight significantly upregulated or downregulated genes.


In [None]:
'''Here is a list of some genes per pathway:

    a) Uterine Contractility and Calcium signaling
Oxytocin Pathway:
OXTR: Oxytocin Receptor
GNAQ: Guanine Nucleotide-Binding Protein G(q) Subunit Alpha
PLCβ: Phospholipase C Beta
PKC: Protein Kinase C
CaMKII: Calcium/Calmodulin-Dependent Protein Kinase II
MAPK: Mitogen-Activated Protein Kinase
PI3K: Phosphoinositide 3-Kinase
AKT1: AKT Serine/Threonine Kinase 1
MYLK: Myosin Light Chain Kinase
CALM: Calmodulin
ADRA1A: Adrenoceptor Alpha 1A
ADRB2: Adrenoceptor Beta 2
SRC: SRC Proto-Oncogene, Non-Receptor Tyrosine Kinase
EGFR: Epidermal Growth Factor Receptor
nNOS: Neuronal Nitric Oxide Synthase
iNOS: Inducible Nitric Oxide Synthase
eNOS: Endothelial Nitric Oxide Synthase

TRPV4 Pathway
TRPV4: Transient Receptor Potential Vanilloid 4
GNAQ: Guanine Nucleotide-Binding Protein G(q) Subunit Alpha
PLCβ: Phospholipase C Beta
CaMKII: Calcium/Calmodulin-Dependent Protein Kinase II
PKC: Protein Kinase C
SRC: SRC Proto-Oncogene, Non-Receptor Tyrosine Kinase
AKT1: AKT Serine/Threonine Kinase 1
PI3K: Phosphoinositide 3-Kinase
MAPK: Mitogen-Activated Protein Kinase
EGFR: Epidermal Growth Factor Receptor
NFAT: Nuclear Factor of Activated T Cells
PTK2: Protein Tyrosine Kinase 2 (also known as FAK, Focal Adhesion Kinase)
CAM: Calmodulin
RhoA: Ras Homolog Family Member A



    b) Lipid metabolism
PPARG: Peroxisome Proliferator-Activated Receptor Gamma
PPARA: Peroxisome Proliferator-Activated Receptor Alpha
PPARD: Peroxisome Proliferator-Activated Receptor Delta
RXRA: Retinoid X Receptor Alpha
CPT1A: Carnitine Palmitoyltransferase 1
FABP4: Fatty Acid Binding Protein 4
LPL: Lipoprotein Lipase
ACACA: Acetyl-CoA Carboxylase Alpha
ACADM: Acyl-CoA Dehydrogenase, C-4 to C-12 Straight Chain
SREBF1: Sterol Regulatory Element-Binding Transcription Factor 1
DGAT1: Diacylglycerol O-Acyltransferase 1
LXR: Liver X Receptor


    c) Angiogenesis pathway
VEGFA: Vascular Endothelial Growth Factor A
VEGFR: Vascular Endothelial Growth Factor Receptors
FGF2: Fibroblast Growth Factor 2 
ANGPT1: Angiopoietin 1
ANGPT2: Angiopoietin 2
NOTCH1: Notch Receptor 
PDGFB: Platelet-Derived Growth Factor B
NRP1: Neuropilin 1
HIF1A: Hypoxia-Inducible Factor 1 Alpha
PECAM1: Platelet and Endothelial Cell Adhesion Molecule 1
DLL4: Delta-like ligand 4



    d) Nitric oxide production
nNOS: Neuronal Nitric Oxide Synthase
iNOS: Inducible Nitric Oxide Synthase
eNOS: Endothelial Nitric Oxide Synthase
GUCY1A3: Guanylate Cyclase 1, Soluble, Alpha 3
PRKG1: Protein Kinase, cGMP-Dependent, Type I
CaMKII: Calcium/Calmodulin-Dependent Protein Kinase II
CA2: Carbonic Anhydrase 2
ARG1: Arginase 1
HSP90: Heat Shock Protein 90
CAT: Catalase
SOD: Superoxide Dismutase (there are different isoforms such as SOD1 and SOD2)
eNOSIP: Endothelial Nitric Oxide Synthase Interacting Protein
DPP4: Dipeptidyl Peptidase 4
AKT1: AKT Serine/Threonine Kinase 1
RXRA: Retinoid X Receptor Alpha'''

In [None]:
pathways = {'Oxytocin Pathway':
['OXTR',
'GNAQ',
'PLCB1',
 'PLCB2',
 'PLCB3',
 'PLCB4',#: Phospholipase C Beta
'PKC',#: Protein Kinase C
'CaMKII',#: Calcium/Calmodulin-Dependent Protein Kinase II
'MAPK',#: Mitogen-Activated Protein Kinase
'PI3K',#: Phosphoinositide 3-Kinase
'AKT1',#: AKT Serine/Threonine Kinase 1
'MYLK',#: Myosin Light Chain Kinase
'CALM',#: Calmodulin
'ADRA1A',#: Adrenoceptor Alpha 1A
'ADRB2',#: Adrenoceptor Beta 2
'SRC',#: SRC Proto-Oncogene, Non-Receptor Tyrosine Kinase
'EGFR',#: Epidermal Growth Factor Receptor
'NOS1',#: Neuronal Nitric Oxide Synthase
'NOS2',#: Inducible Nitric Oxide Synthase
'NOS3',#: Endothelial Nitric Oxide Synthase}

#### 4) Pathway Analysis:
#### Perform pathway analysis to identify enriched biological pathways within specific cell types.

#### Will run pathway analysis using metascape on cell type markers in each group

#### 5) Network Analysis:
#### Can we construct gene co-expression networks to reveal potential interactions between genes involved in uterine contractility and calcium signaling, TRPV4 or OXTR modulation, and NO signaling?

#### 6) Cell Trajectory Analysis:
#### Do you think that we can create pseudotime plots to illustrate the predicted developmental trajectory of cells within the myometrial tissue?

#### 7) Gene-Set Enrichment Analysis:
#### Please, could you perform GSEA to determine whether predefined gene sets related to uterine function, calcium regulation, or NO signaling are enriched in specific cell clusters?

* Uterine function during gestation:
* ESR1: Estrogen Receptor 1
* PGR: Progesterone Receptor
* OTX2: Orthodenticle Homeobox 2
* PTGES: Prostaglandin E Synthase
* ITGB3: Integrin Subunit Beta 3
* LHCGR: Luteinizing Hormone/Chorionic Gonadotropin Receptor
* IGF1 Insulin-Like Growth Factor 1
* VDR Vitamin D Receptor
* PLAU Plasminogen Activator, Urokinase
* HOXA10: Homeobox A10
* IL6: Interleukin 6
* COL1A1: Collagen Type I Alpha 1 Chain
* CYP19A1: Aromatase
* HAND2: Heart and Neural Crest Derivatives Expressed 
* TFPI: Tissue Factor Pathway Inhibitor
* MMP9: Matrix Metallopeptidase 9
* NFKB1: Nuclear Factor Kappa B Subunit 1
* HAND1: Heart and Neural Crest Derivatives Expressed 1
* CASP3: Caspase 3
* PRL: Prolactin


* Calcium regulation
* CACNA1C: Calcium Voltage-Gated Channel Subunit Alpha1 C
* ATP2A2: ATPase Sarcoplasmic/Endoplasmic Reticulum Ca2+ Transporting 2
* RYR2: Ryanodine Receptor 2
* PLN: - Phospholamban
* CALM1: Calmodulin 1
* S100A10: S100 Calcium Binding Protein A10
* CAMK2D: Calcium/Calmodulin-Dependent Protein Kinase II Delta
* ITPR1: Inositol 1,4,5-Trisphosphate Receptor Type 1
* CALR: Calreticulin
* ATP2B2: ATPase Plasma Membrane Ca2+ Transporting 2
* CALM2: Calmodulin 2
* CASQ1: Calsequestrin 1
* TMEM38B: Transmembrane Protein 38B
* ITPKC: Inositol-Tetrakisphosphate 1-Kinase
* ATP2B4: ATPase Plasma Membrane Ca2+ Transporting 4
* CAMK2B: Calcium/Calmodulin-Dependent Protein Kinase II Beta
* SLC8A1: Solute Carrier Family 8 Member A1
* PLCD4: ATPase Plasma Membrane Ca2+ Transporting 1
* STIM1 Stromal Interaction Molecule 1


* NO signaling
* NOSi: Nitric Oxide Synthase inducible
* NOSe: Nitric Oxide Synthase 
* NOSn: Nitric Oxide Synthase Neuronal
* GUCY1A3: Guanylate Cyclase 1, Soluble, Alpha 3
* GUCY1B3: Guanylate Cyclase 1, Soluble, Beta 3
* PRKG1: Protein Kinase, cGMP-Dependent, Type I
* PRKG2: Protein Kinase, cGMP-Dependent, Type II
* CALM1: Calmodulin 1
* HMOX1: Heme Oxygenase 1
* EDN1: Endothelin 1
* CAV1: Caveolin 1
* CAV2: Caveolin 2
* CAT: Catalase
* GPX1: Glutathione Peroxidase 1
* SOD1: Superoxide Dismutase 1
* SOD2: Superoxide Dismutase 2
* GSR: Glutathione-Disulfide Reductase
* NRF2: Nuclear Factor, Erythroid 2 Like 2
* AKT1: AKT Serine/Threonine Kinase 1
* MAPK1: Mitogen-Activated Protein Kinase 1
