## Notebook for the gene enrichment analysis of Cancer Joanito Epithelial Cells

### Developed by: Anna Maguza

### Institute of Computational Biology - Computational Health Centre - Hemlholtz Munich

### 4th June 2023

#### Load required packages

In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
import anndata as ad
import seaborn as sns
import numpy as np

#### Setup Cells


In [None]:
%matplotlib inline

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

#### Upload Cancer Data

In [None]:
input = '/Users/anna.maguza/Desktop/Data/Processed_datasets/Cancer_dataset_integration/Predicted_cancer_labels/Joanito/Joanito_epithelial_cells_with_predicted_annotations_after_scVI.h5ad' 
adata = sc.read_h5ad(input)

In [None]:
adata.raw.X

In [None]:
adata_raw = adata.raw.to_adata()

In [None]:
adata_raw

### Prepare Gene Sets

In [None]:
Krebs_cycle_genes = ['ACO2', 'CS', 'FH', 'MDH1', 'OGDH', 'PDHA1', 'PDHA2', 'SDHC', 'SUCLG1', 'ACLY', 'ACO1', 'DLAT', 'DLD',
                     'DLST', 'IDH1', 'IDH2', 'IDH3A', 'IDH3B', 'IDH3G', 'MDH2', 'MPC1', 'OGDHL', 'PC', 'PCK1', 'PCK2',
                     'PDHB', 'SDHA', 'SDHB', 'SDHD', 'SUCLA2', 'SUCLG2']

In [None]:
mitochondrial_genes = ['MRPL1', "MRPL2", "MRPL3", "MRPL4", "MRPL9", "MRPL10", "MRPL11", "MRPL12", "MRPL13", "MRPL14", "MRPL15",
                       "MRPL16", "MRPL17", "MRPL18", "MRPL19", "MRPL20", "MRPL21", "MRPL22", "MRPL23", "MRPL24", "MRPL27", "MRPL28", 
                       "MRPL30", "MRPL32", "MRPL33", "MRPL34", "MRPL35", "MRPL36", "MRPL37", "MRPL38", "MRPL39", "MRPL40","MRPL41",
                       "MRPL42", "MRPL43", "MRPL44", "MRPL45", "MRPL46", "MRPL47", "MRPL48", "MRPL49", "MRPL50", "MRPL51", "MRPL52",
                       "MRPL53", "MRPL54", "MRPL55", "MRPL57",
                       "MRPS2", "MRPS5", "MRPS6", "MRPS7", "MRPS9", "MRPS10", "MRPS11", "MRPS12", "MRPS14", "MRPS15", "MRPS16", "MRPS17",
                       "MRPS18A", "MRPS18B", "MRPS18C", "MRPS21", "MRPS22", "MRPS23", "MRPS24", "MRPS25", "MRPS26", "MRPS27", "MRPS28", "MRPS30", 
                       "MRPS31", "MRPS33", "MRPS34", "MRPS35", "MRPS36", 'DAP3',
                       "MT-ND1", "MT-ND2", "MT-COX1", "MT-COX2" ,"MT-ATP8", "MT-ATP6", "MT-COX3", "MT-ND3", "MT-ND4L", "MT-ND4", "MT-ND5", "MT-ND6", "MT-CYTB",
                       'POLG', "POLG2", "SOD2", "MFN1", "MFN2", "OPA1", "PINK1", "PARK7", "PARK2", "SDHA", "SDHB", "SDHC", "SDHD", "NDUFS1", "NDUFS2", "NDUFS3", "NDUFS4", "NDUFS7", "NDUFS8", "NDUFA1", "NDUFA2", "NDUFA9"]

In [None]:
glycolysis_genes = ['ALDOA', 'BPGM', 'ENO1', 'ENO2', 'GAPDH', 'GPI', 'HK1', 'HK2', 'HKDC1', 'PFKL', 'PFKM', 'PGAM1', 
                    'PGAM2', 'PGAM4', 'PGK1', 'PKLR', 'PKM', 'TPI1']

### Cancer Dataser

In [None]:
sc.tl.score_genes(adata_raw, Krebs_cycle_genes)
sc.set_figure_params(figsize=(10, 10),dpi=200)
sc.pl.umap(adata_raw, color= ['score', 'Study_name', 'Unified Cell States'], color_map = "magma", size = 7, frameon = False)

In [None]:
sc.tl.score_genes(adata_raw, mitochondrial_genes)
sc.set_figure_params(figsize=(10, 10),dpi=200)
sc.pl.umap(adata_raw, color= ['score', 'Study_name', 'Unified Cell States'], color_map = "magma", size = 7, frameon = False)

In [None]:
sc.tl.score_genes(adata_raw, glycolysis_genes)
sc.set_figure_params(figsize=(10, 10),dpi=200)
sc.pl.umap(adata_raw, color= ['score', 'Study_name', 'Unified Cell States'], color_map = "magma", size = 7, frameon = False)

### Upload Healthy and Cancer Dataset

In [None]:
input2= '/Users/anna.maguza/Desktop/Data/Processed_datasets/Cancer_dataset_integration/input_files/Datasets_integration/Integrated_cancer_Joanito_and_Healthy_datasets_5000_output.h5ad'
adata2 = sc.read_h5ad(input2)

In [None]:
adata2.raw.X

In [None]:
adata_raw2 = adata2.raw.to_adata()

In [None]:
adata_raw2

In [None]:
sc.tl.score_genes(adata_raw2, Krebs_cycle_genes)
sc.set_figure_params(figsize=(10, 10),dpi=200)
sc.pl.umap(adata_raw2, color= ['score', 'Study_name', 'Unified Cell States'], color_map = "magma", size = 7, frameon = False)

In [None]:
sc.tl.score_genes(adata_raw2, mitochondrial_genes)
sc.set_figure_params(figsize=(10, 10),dpi=200)
sc.pl.umap(adata_raw2, color= ['score', 'Study_name', 'Unified Cell States'], color_map = "magma", size = 7, frameon = False)

In [None]:
sc.tl.score_genes(adata_raw2, glycolysis_genes)
sc.set_figure_params(figsize=(10, 10),dpi=200)
sc.pl.umap(adata_raw2, color= ['score', 'Study_name', 'Unified Cell States'], color_map = "magma", size = 7, frameon = False)

#### Upload Healthy Data

In [None]:
input = '/Users/anna.maguza/Desktop/Data/Processed_datasets/Cancer_dataset_integration/Healthy_epithelial_scVI/Healthy_epithelial_cells_with_predicted_annotations_after_scVI.h5ad' 
adata = sc.read_h5ad(input)

In [None]:
adata.raw.X

In [None]:
adata_raw = adata.raw.to_adata()

In [None]:
sc.tl.score_genes(adata_raw, Krebs_cycle_genes)
sc.set_figure_params(figsize=(10, 10),dpi=200)
sc.pl.umap(adata_raw, color= ['score', 'Study_name', 'Unified Cell States'], color_map = "magma", size = 7, frameon = False)

In [None]:
sc.tl.score_genes(adata_raw, mitochondrial_genes)
sc.set_figure_params(figsize=(10, 10),dpi=200)
sc.pl.umap(adata_raw, color= ['score', 'Study_name', 'Unified Cell States'], color_map = "magma", size = 7, frameon = False)

In [None]:
sc.tl.score_genes(adata_raw, glycolysis_genes)
sc.set_figure_params(figsize=(10, 10),dpi=200)
sc.pl.umap(adata_raw, color= ['score', 'Study_name', 'Unified Cell States'], color_map = "magma", size = 7, frameon = False)