In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from goatools.base import download_ncbi_associations
from goatools.associations import read_ncbi_gene2go
from goatools.go_enrichment import GOEnrichmentStudy
import urllib.request
from io import BytesIO
import gzip

# Step 1: Load Data
data = pd.read_csv('differentiallyexpressedgenes.csv')

In [None]:
# Step 2: Filter Differentially Expressed Genes
filtered_genes = data[(data['Colon_pValue'] < 0.05) & (data['Lung_pValue'] < 0.05)]

In [None]:
# Step 3: Gene Ontology Analysis
# Fetch gene2go.gz file into memory
url = "ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/gene2go.gz"
with urllib.request.urlopen(url) as response:
    with gzip.open(BytesIO(response.read()), 'rt') as f:
        gene2go = read_ncbi_gene2go(f, taxids=[10090])

# Example GO analysis
study = GOEnrichmentStudy(gene2go, geneid2gos_mouse, 
        src_filter=None, # Source filter: e.g., 'EXP', 'IDA', 'IPI', 'IMP', 'IGI', 'IEP'
        alpha=0.05) # p-value cutoff
study.run_study(list(filtered_genes['Gene_ID']))

# Step 4: Visualize Results
# Example: Plot top enriched GO terms
plt.figure(figsize=(10, 6))
go_enrichment_results = study.get_results()
top_enriched_terms = go_enrichment_results[go_enrichment_results['depth'] == 1].nlargest(10, 'depth')
plt.barh(top_enriched_terms['GO'], top_enriched_terms['depth'], color='skyblue')
plt.xlabel('Enrichment Depth')
plt.ylabel('GO Term')
plt.title('Top Enriched GO Terms')
plt.gca().invert_yaxis()
plt.show()