# GO semantic similarity analysis

In [1]:
suppressPackageStartupMessages({
    library(dplyr)
    library(ggplot2)
    library(GOSemSim)
    library(org.Hs.eg.db)
})

## Functions

In [2]:
config_mods = list("Caudate"="../../../caudate/goatools/_m/", 
                  "Dentate Gyrus"="../../../dentateGyrus/goatools/_m/", 
                  "DLPFC"="../../../dlpfc/goatools/_m/", 
                  "Hippocampus"="../../../hippocampus/goatools/_m/")
config_sig_mods = list("Caudate"=c("magenta", "skyblue", "violet"), 
                      "Dentate Gyrus"=c("grey", "red", "black", "magenta", 
                                        "lightcyan", "grey60", "darkgrey", 
                                        "skyblue", "steelblue", "darkolivegreen", 
                                        "sienna3", "skyblue3"),
                       "DLPFC"=c("yellow", "black", "salmon", "cyan", "lightyellow", 
                                 "darkred", "darkgreen", "darkturquoise", "saddlebrown"),
                       "Hippocampus"=c("grey", "brown", "royalblue", "saddlebrown"))

In [3]:
get_semData <- function(ont){
    return(godata('org.Hs.eg.db', ont=ont))
}

GO_semantic_similarity <- function(ont, tissue1, tissue2, hgGO){
    mod1 = c(); mod2 = c(); ss = c(); t1 = c(); t2 = c()
    for(module1 in config_sig_mods[[tissue1]]){
        fn1 = paste0(config_mods[[tissue1]], "GO_analysis_module_",module1,".xlsx")
        for(module2 in config_sig_mods[[tissue2]]){
            fn2 = paste0(config_mods[[tissue2]], "GO_analysis_module_",module2,".xlsx")
            if(file.exists(fn1) & file.exists(fn2)){
                df1 = readxl::read_excel(fn1) %>% filter(NS == ont)
                df2 = readxl::read_excel(fn2) %>% filter(NS == ont)
                sim = mgoSim(df1$GO, df2$GO, semData=hsGO, 
                             measure="Wang", combine="BMA")
                mod1 = c(mod1, module1); mod2 = c(mod2, module2)
                ss = c(ss, sim); t1 = c(t1, tissue1); t2 = c(t2, tissue2)
            }
        }
    }
    return(data.frame("Module_1"=mod1, "Module_2"=mod2, "Semantic_Similarity"=ss, 
                      "Ont"=ont, "Tissue_1"=t1, "Tissue_2"=t2))
}

save_img <- function(image, fn, w=7, h=7){
    for(ext in c(".pdf", ".png")){
        ggsave(file=paste0(fn, ext), plot=image, width=w, height=h)
    }
}

In [10]:
generate_tile_plot <- function(dt, tissue1, tissue2){
    tile_plot = dt %>% filter(Tissue_1 == tissue1, Tissue_2 == tissue2) %>%
    ggplot(aes(x=Module_1, y=Module_2, fill=Semantic_Similarity, 
               label=format(round(Semantic_Similarity, 1)))) + 
    geom_tile(color="grey") + ggfittext::geom_fit_text(contrast=TRUE) + 
    viridis::scale_color_viridis(option="magma") + facet_wrap("~Ont") +
    viridis::scale_fill_viridis(name="Semantic Similarity", limits=c(0,1),
                                direction=-1, option="magma") +
    labs(x=tissue1, y=tissue2) + ggpubr::theme_pubr(base_size=15, border=TRUE) +
    theme(axis.text.x=element_text(angle = 45, hjust=1), 
          strip.text=element_text(face="bold"), 
          legend.key.width=unit(2, 'cm'))
    return(tile_plot)
}

## Semantic similarity analysis

In [9]:
datalist1 = list(); datalist2 = list(); datalist3 = list()
for(ont in c("MF", "BP", "CC")){
    hsGO = get_semData(ont)
    for(tissue1 in c("Caudate", "Dentate Gyrus", "DLPFC")){
        for(tissue2 in c("Dentate Gyrus", "DLPFC", "Hippocampus")){
            if(tissue1 != tissue2){
                print(paste(tissue1, "VS", tissue2))
                datalist1[[tissue2]] = GO_semantic_similarity(ont, tissue1, tissue2, hsGO)
            }
        }
        datalist2[[tissue1]] = bind_rows(datalist1)
    }
    datalist3[[ont]] = bind_rows(datalist2)
}

dt = bind_rows(datalist3)
dt %>% head

preparing gene to GO mapping data...

preparing IC data...



[1] "Caudate VS Dentate Gyrus"
[1] "Caudate VS DLPFC"
[1] "Caudate VS Hippocampus"
[1] "Dentate Gyrus VS DLPFC"
[1] "Dentate Gyrus VS Hippocampus"
[1] "DLPFC VS Dentate Gyrus"
[1] "DLPFC VS Hippocampus"


preparing gene to GO mapping data...

preparing IC data...



[1] "Caudate VS Dentate Gyrus"
[1] "Caudate VS DLPFC"
[1] "Caudate VS Hippocampus"
[1] "Dentate Gyrus VS DLPFC"
[1] "Dentate Gyrus VS Hippocampus"
[1] "DLPFC VS Dentate Gyrus"
[1] "DLPFC VS Hippocampus"


preparing gene to GO mapping data...

preparing IC data...



[1] "Caudate VS Dentate Gyrus"
[1] "Caudate VS DLPFC"
[1] "Caudate VS Hippocampus"
[1] "Dentate Gyrus VS DLPFC"
[1] "Dentate Gyrus VS Hippocampus"
[1] "DLPFC VS Dentate Gyrus"
[1] "DLPFC VS Hippocampus"


Unnamed: 0_level_0,Module_1,Module_2,Semantic_Similarity,Ont,Tissue_1,Tissue_2
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>
1,magenta,grey,0.602,MF,Caudate,Dentate Gyrus
2,magenta,red,0.624,MF,Caudate,Dentate Gyrus
3,magenta,black,1.0,MF,Caudate,Dentate Gyrus
4,magenta,magenta,0.515,MF,Caudate,Dentate Gyrus
5,magenta,lightcyan,0.431,MF,Caudate,Dentate Gyrus
6,magenta,darkgrey,1.0,MF,Caudate,Dentate Gyrus


## Plot semantic similarity

In [17]:
for(tissue1 in c("Caudate", "Dentate Gyrus", "DLPFC")){
    for(tissue2 in c("Dentate Gyrus", "DLPFC", "Hippocampus")){
        if(tissue1 != tissue2){
            outfile = paste("GO_semantic_similarity", gsub(" ", "", tissue1), 
                           gsub(" ", "", tissue2), sep='_')
            #print(outfile)
            tile_plot = generate_tile_plot(dt, tissue1, tissue2)
            save_img(tile_plot, outfile, w=10, h=5)
        }
    }
}

## Reproducibility Information

In [None]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()