# Pathway Enrichment Analysis of Dysregulated Genes in Colon Cancer

### This analysis identifies significantly enriched biological pathways associated with differentially expressed genes (DEGs) in colon cancer. Using gene set enrichment techniques, we explore key molecular functions, cellular processes, and signaling pathways disrupted in colon tumor samples. Visualizations such as bar plots and dot plots highlight pathway significance, offering insights into potential therapeutic targets and disease mechanisms.

In [None]:
import scanpy as sc
import gseapy as gs
import rpy2.robjects as robjects
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri, numpy2ri
import numpy as np
import pandas as pd
import dask
from dask.diagnostics import ProgressBar
from scipy.stats import spearmanr
import os
import tqdm
import matplotlib.pyplot as plt
from functools import reduce

## Activate rpy2 conversion for Pandas and NumPy

In [None]:
numpy2ri.activate()
pandas2ri.activate()
ev = importr('EnhancedVolcano')

In [None]:
def correlate_metals(X, metal_vector, names):
    """
    Compute Spearman correlation between gene expression and metal levels.
    """
    spearman_results = {gene: spearmanr(X[:, i], metal_vector) for i, gene in enumerate(names)}
    spearman_results_df = pd.DataFrame({
        gene: {"p": res.pvalue, "r": res.correlation, "z": 0.5 * np.log((1 + res.correlation) / (1 - res.correlation))}
        for gene, res in spearman_results.items()
    }).T
    return spearman_results_df

## Load Data

In [None]:
adata_st_metals = sc.read_h5ad(filename="metal_ST.h5ad")

## Compute Metal-Gene Correlations

In [None]:
metal_results = {}
for metal in adata_st_metals.obsm["metals"].columns[4:-1]:
    metal_results[metal] = dask.delayed(correlate_metals)(
        adata_st_metals.X, adata_st_metals.obsm["metals"][metal], adata_st_metals.var_names
    )

In [None]:
with ProgressBar():
    metal_results_v2 = dask.compute(metal_results, scheduler="processes", num_workers=20)[0]

## Save Correlation Results

In [None]:
correlation_file = "spearman_corr_metal_dict.pkl"
if not os.path.exists(correlation_file):
    pd.to_pickle(metal_results_v2, correlation_file)
metal_results_v2 = pd.read_pickle(correlation_file)

## Perform Pathway Enrichment Analysis

In [None]:
n_genes = 150
pathway_associations = {}
pos_associations = {}

In [None]:
for metal, results in metal_results_v2.items():
    try:
        enr = gs.enrichr(
            gene_list=results.query("z>0").sort_values("p").index[:n_genes].tolist(),
            gene_sets=["MSigDB_Hallmark_2020", "Reactome_2022"],
            background=adata_st_metals.var_names,
            outdir=None,
        )
        enr_results = enr.results[enr.results.Gene_set.isin(["Reactome_2022", "MSigDB_Hallmark_2020"])]
        enr_results = enr_results.assign(Term=lambda x: x.Term.str.split(" R-HSA").str[0])
    except:
        enr_results = metal
    pos_associations[metal] = enr_results
pathway_associations["pos"] = pos_associations

## Save Pathway Associations

In [None]:
pathway_file = f"pathway_associations_n{n_genes}.pkl"
pd.to_pickle(pathway_associations, pathway_file)

In [None]:
# Export Results to Excel
for category in pathway_associations:
    with pd.ExcelWriter(f'{category}_pathways_n{n_genes}.xlsx', engine='openpyxl') as writer:
        for metal, df in pathway_associations[category].items():
            if isinstance(df, pd.DataFrame):
                df[df["Adjusted P-value"] <= 0.05].to_excel(writer, sheet_name=metal)

## Plot Results (Example for Fe56)

In [None]:
plt.figure(figsize=(8, 6))
plt.hist(metal_results_v2["Fe56"]["r"], bins=50, color='blue', alpha=0.7)
plt.xlabel("Spearman Correlation")
plt.ylabel("Frequency")
plt.title("Distribution of Gene-Metal Correlations for Fe56")
plt.show()

In [None]:
## Pathway Enrichment Visualization