In [None]:

library(GO.db)
library(org.Hs.eg.db)

print(packageVersion("GO.db"))
print(packageVersion("org.Hs.eg.db"))

In [None]:
go_genes <- select(
  org.Hs.eg.db,
  keys = keys(org.Hs.eg.db, keytype = "GOALL"),
  columns = c("SYMBOL", "GOALL", "ONTOLOGYALL", "EVIDENCEALL"),
  keytype = "GOALL"
)

colnames(go_genes) <- c("GOID", "Gene", "Ontology", "Evidence")

library(dplyr)
go_terms <- go_genes %>%
  group_by(GOID, Ontology) %>%
  summarise(
    Genes = list(unique(Gene)),
    Evidence = list(unique(Evidence)),
    .groups = 'drop'
  )

go_info <- AnnotationDbi::select(
  GO.db, 
  keys = go_terms$GOID,
  columns = c("TERM", "ONTOLOGY")
)
go_terms <- merge(go_terms, go_info, by.x = "GOID", by.y = "GOID")

head(go_terms[, c("GOID", "TERM", "ONTOLOGY", "Genes")])

In [None]:
go_terms$Genes <- sapply(go_terms$Genes, function(x) paste(unique(x), collapse = ", "))
go_terms$Evidence <- sapply(go_terms$Evidence, function(x) paste(unique(x), collapse = ", "))

go_terms$Ontology <- NULL
names(go_terms)[names(go_terms) == "ONTOLOGY"] <- "Ontology"

write.csv(go_terms, "../preprocessed_data/go_20250312.csv", row.names = FALSE)