# Visualization of Interaction Network — Circos Plots

***by Kengo Watanabe***  

This Jupyter Notebook (with R kernel) visualized the results of interaction analysis as interaction networks using circlize package on R.  

Original notebook (memo for my future tracing):  
* dalek:\[JupyterLab HOME\]/220427_Arivale-APOE-BA-visualization-for-Dylan/240820_Arivale-APOE-BA-visualization-for-Dylan_ver3-3.ipynb  

In [None]:
library("tidyverse")
options(repr.plot.width=5, repr.plot.height=5)#Default=7x7
#CRAN
for (package in c("readxl", "circlize", "RColorBrewer", "colorspace")) {
    #install.packages(package)
    eval(bquote(library(.(package))))
    print(str_c(package, ": ", as.character(packageVersion(package))))
}

# 1. Prepare all analytes

In [None]:
#Import the cleaned analylte metadata
fileDir <- "./ExportData/"
ipynbName <- "240725_Arivale-APOE-BA_CleanMetadata_"
fileName <- "analyte-metadata.xlsx"
temp_vec <- c("Metabolite", "Protein", "LabTest")
temp_tbl <- tibble()
for (sheetName in temp_vec) {
    temp_tbl <- read_excel(str_c(fileDir,ipynbName,fileName), sheet=sheetName, col_types="text") %>%
        dplyr::bind_rows(temp_tbl, .)
}
fileDir <- "./ImportData/"
fileName <- "240819_Arivale-APOE-BA-Microbiome_Gaps_Filled.xlsx"
temp_vec <- c("Microbiome_Update")
for (sheetName in temp_vec) {
    temp_tbl <- read_excel(str_c(fileDir,fileName), sheet=sheetName, col_types="text") %>%
        dplyr::bind_rows(temp_tbl, .)
}

print(str_c("nrow: ",nrow(temp_tbl)))
head(temp_tbl)
temp_tbl %>% dplyr::group_by(AnalyteType) %>%
    dplyr::summarize(Count=n())

analyte_tbl <- temp_tbl

# 2. APOE2 vs. biologically old in male

## 2-1. Prepare the significant pairs

### 2-1-1. Retrieve the significant pairs

In [None]:
fdr_cutoff <- 0.1
temp_vec <- c("M_APOE2", "M_Biologically_Old")
cohort_vec <- c("MaleAPOE2", "MaleBAold")

temp_tbl1 <- tibble()
for (i in 1:length(cohort_vec)) {
    sheetName <- temp_vec[i]
    cohort <- cohort_vec[i]
    
    #Import the significant pairs
    fileDir <- "./ImportData/"
    fileName <- "240707_ME2_MBioOld_FBioOld_for_circos_fig.xlsx"
    temp_tbl <- read_excel(str_c(fileDir,fileName), sheet=sheetName)
    print(str_c(sheetName," sheet: ",cohort))
    print(str_c("nrow: ",nrow(temp_tbl)))
    
    #Take the significant pairs
    temp_tbl <- temp_tbl %>%
        dplyr::filter(pFDR_interaction < fdr_cutoff) %>%
        #Prepare pair ID for handling
        dplyr::mutate(PairID=str_c(cohort,"_row",str_pad(row_number(), width=5, side="left", pad="0")))
    print(str_c("-> FDR < ",fdr_cutoff,": ",nrow(temp_tbl)))
    
    temp_tbl1 <- dplyr::bind_rows(temp_tbl1, temp_tbl)
}

print("Combined")
print(str_c("nrow: ",nrow(temp_tbl1)))
head(temp_tbl1)

pair_tbl <- temp_tbl1

### 2-1-2. Update analyte ID and label

> Because analyte label is not unique in his original tables.  

In [None]:
#Update analyte ID and label
for (analyte_i in c(1, 2)) {
    #Take target analytes and clean for matching with analyte metadata
    temp_tbl1 <- pair_tbl %>%
        dplyr::select(PairID, !!as.name(str_c("Analyte",analyte_i)),
                      !!as.name(str_c("type",analyte_i)), !!as.name(str_c("supergroup",analyte_i))) %>%
        dplyr::rename(AnalyteName=!!as.name(str_c("Analyte",analyte_i)),
                      Supergroup=!!as.name(str_c("supergroup",analyte_i))) %>%
        dplyr::mutate(AnalyteType=ifelse(!!as.name(str_c("type",analyte_i))=="clinical",
                                         "Lab test", str_to_title(!!as.name(str_c("type",analyte_i)))))
    
    #Merge analyte metadata based on analyte type
    temp_tbl2 <- tibble()
    for (analytetype in unique(temp_tbl1$AnalyteType)) {
        temp_tbl <- temp_tbl1 %>%
            dplyr::filter(AnalyteType==analytetype)
        if (analytetype=="Metabolite") {
            temp_tbl <- analyte_tbl %>%
                dplyr::select(AnalyteID, AnalyteType, AnalyteName, AnalyteLabel, SUPER_PATHWAY) %>%
                dplyr::rename(Supergroup=SUPER_PATHWAY) %>%
                dplyr::left_join(temp_tbl, ., by=c("AnalyteType", "AnalyteName", "Supergroup"))
        } else if (analytetype=="Protein") {
            temp_tbl <- analyte_tbl %>%
                dplyr::select(AnalyteID, AnalyteType, AnalyteName, AnalyteLabel, OlinkPanel) %>%
                dplyr::rename(Supergroup=OlinkPanel) %>%
                dplyr::left_join(temp_tbl, ., by=c("AnalyteType", "AnalyteName", "Supergroup"))
        } else {
            temp_tbl <- analyte_tbl %>%
                dplyr::select(AnalyteID, AnalyteType, AnalyteName, AnalyteLabel) %>%
                dplyr::left_join(temp_tbl, ., by=c("AnalyteType", "AnalyteName"))
        }
        temp_tbl2 <- dplyr::bind_rows(temp_tbl2, temp_tbl)
    }
    
    #Clean
    temp_tbl <- temp_tbl2 %>%
        dplyr::select(PairID, AnalyteID, AnalyteLabel, AnalyteType)
    colnames(temp_tbl)[2:4] = str_c(colnames(temp_tbl)[2:4],"_",analyte_i)
    
    #Add to the original table
    pair_tbl <- dplyr::left_join(pair_tbl, temp_tbl, by="PairID")
}
##Clean
pair_tbl <- pair_tbl %>%
    dplyr::select(PairID,
                  AnalyteID_1, AnalyteLabel_1, AnalyteType_1,
                  AnalyteID_2, AnalyteLabel_2, AnalyteType_2,
                  Beta_interaction, pnominal_interaction, pFDR_interaction)
print(str_c("nrow: ",nrow(pair_tbl)))
head(pair_tbl)

## 2-2. Extract analytes from the significant pairs

In [None]:
#Extract analytes from the significant pairs
temp_tbl <- tibble()
for (analyte_i in c(1, 2)) {
    temp_tbl <- pair_tbl %>%
        dplyr::select(PairID, !!as.name(str_c("AnalyteID_",analyte_i))) %>%
        dplyr::rename(AnalyteID=!!as.name(str_c("AnalyteID_",analyte_i))) %>%
        dplyr::bind_rows(temp_tbl, .)
}

#Flatten the same analytes while calculating the number of significant pairs per cohort
temp_tbl <- temp_tbl %>%
    dplyr::mutate(Cohort=str_replace(PairID, "_row[0-9]+$", "")) %>%
    dplyr::group_by(Cohort, AnalyteID) %>%
    dplyr::summarize(nSigPairs=n()) %>%
    dplyr::arrange(desc(nSigPairs)) %>%
    dplyr::ungroup()

#Add label
temp_tbl <- analyte_tbl %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnalyteType) %>%
    dplyr::left_join(temp_tbl, ., by="AnalyteID")

print(str_c("nrow: ",nrow(temp_tbl)))
head(temp_tbl)
#Check
temp_tbl %>% dplyr::group_by(Cohort) %>%
    dplyr::summarize(nSigAnalytes=n(), nSigPairs_sum=sum(nSigPairs))
temp_tbl1 <- analyte_tbl %>%
    dplyr::group_by(AnalyteType) %>%
    dplyr::summarize(nBackgrounds=n())
temp_tbl %>% dplyr::group_by(Cohort, AnalyteType) %>%
    dplyr::summarize(nSignifs=n()) %>%
    dplyr::ungroup() %>%
    dplyr::left_join(., temp_tbl1, by="AnalyteType") %>%
    dplyr::mutate(Percentage=nSignifs/nBackgrounds*100)

sig_analyte_tbl <- temp_tbl

## 2-3. Summarize edge attribution

### 2-3-1. Prepare edge table

In [None]:
#Prepare edge ID and cohort
temp_tbl1 <- pair_tbl %>%
    dplyr::mutate(EdgeID=str_c(AnalyteID_1," vs. ",AnalyteID_2),
                  Cohort=str_replace(PairID, "_row[0-9]+$", ""))

#Initialize edge table
temp_tbl <- temp_tbl1 %>%
    dplyr::select(EdgeID, AnalyteID_1, AnalyteLabel_1, AnalyteType_1, AnalyteID_2, AnalyteLabel_2, AnalyteType_2) %>%
    dplyr::distinct()

#Add edge attributions per cohort
for (i in 1:length(cohort_vec)) {
    cohort <- cohort_vec[i]
    temp_tbl <- temp_tbl1 %>%
        dplyr::filter(Cohort==cohort) %>%
        dplyr::select(EdgeID, Beta_interaction, pFDR_interaction) %>%
        dplyr::rename(!!as.name(str_c(cohort,"_bcoef")):=Beta_interaction,
                      !!as.name(str_c(cohort,"_pFDR")):=pFDR_interaction) %>%
        dplyr::left_join(temp_tbl, ., by="EdgeID")
}

print(str_c("nrow: ",nrow(temp_tbl)))
head(temp_tbl)
#Check inverse match just in case
print("Check inverse EdgeID match:")
temp_vec <- str_c(temp_tbl$AnalyteID_2," vs. ",temp_tbl$AnalyteID_1)
temp_tbl %>%
    dplyr::filter(EdgeID %in% temp_vec)

edge_tbl <- temp_tbl

> In this result, edges are to be highlighted based on significance and direction of interaction term.  

In [None]:
#Add edge category
temp_vec <- c()
for (row_i in 1:nrow(edge_tbl)) {#Note that this table contains only the significant pairs in either comparison
    if (is.na(edge_tbl[[str_c(cohort_vec[1],"_pFDR")]][row_i])) {#Significant only in 2nd
        if (edge_tbl[[str_c(cohort_vec[2],"_bcoef")]][row_i]>0) {
            temp_vec <- c(temp_vec, str_c(cohort_vec[2],"(+)"))
        } else {
            temp_vec <- c(temp_vec, str_c(cohort_vec[2],"(-)"))
        }
    } else if (is.na(edge_tbl[[str_c(cohort_vec[2],"_pFDR")]][row_i])) {#Significant only in 1st
        if (edge_tbl[[str_c(cohort_vec[1],"_bcoef")]][row_i]>0) {
            temp_vec <- c(temp_vec, str_c(cohort_vec[1],"(+)"))
        } else {
            temp_vec <- c(temp_vec, str_c(cohort_vec[1],"(-)"))
        }
    } else {#Significant in both
        if ((edge_tbl[[str_c(cohort_vec[1],"_bcoef")]][row_i]>0)&
            (edge_tbl[[str_c(cohort_vec[2],"_bcoef")]][row_i]>0)) {
            temp_vec <- c(temp_vec, "Both(+)")
        } else if ((edge_tbl[[str_c(cohort_vec[1],"_bcoef")]][row_i]<0)&
                   (edge_tbl[[str_c(cohort_vec[2],"_bcoef")]][row_i]<0)) {
            temp_vec <- c(temp_vec, "Both(-)")
        } else if ((edge_tbl[[str_c(cohort_vec[1],"_bcoef")]][row_i]>0)&
                   (edge_tbl[[str_c(cohort_vec[2],"_bcoef")]][row_i]<0)) {
            temp_vec <- c(temp_vec, str_c(cohort_vec[1],"(+)",cohort_vec[2],"(-)"))
        } else if ((edge_tbl[[str_c(cohort_vec[1],"_bcoef")]][row_i]<0)&
                   (edge_tbl[[str_c(cohort_vec[2],"_bcoef")]][row_i]>0)) {
            temp_vec <- c(temp_vec, str_c(cohort_vec[1],"(-)",cohort_vec[2],"(+)"))
        } else {
            temp_vec <- c(temp_vec, "Error due to beta = 0 !?")
        }
    }
}
edge_tbl$Category <- temp_vec
head(edge_tbl)

#Check
edge_tbl %>%
    dplyr::group_by(Category) %>%
    summarize(Count=n())

### 2-3-2. Filter edges to be presented

> All edges can be presented, and thus skip in this case.  

## 2-4. Summarize node attribution

### 2-4-1. Prepare node table

In [None]:
#Initialize node table
temp_tbl <- analyte_tbl %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnalyteType)

#Add node attributions per cohort
for (i in 1:length(cohort_vec)) {
    cohort <- cohort_vec[i]
    temp_tbl <- sig_analyte_tbl %>%
        dplyr::filter(Cohort==cohort) %>%
        dplyr::select(AnalyteID, nSigPairs) %>%
        dplyr::rename(!!as.name(str_c(cohort,"_nSigPairs")):=nSigPairs) %>%
        dplyr::left_join(temp_tbl, ., by="AnalyteID")
}

#Add edge count
temp_tbl1 <- tibble()
for (analyte_i in c(1, 2)) {
    temp_tbl1 <- edge_tbl %>%
        dplyr::select(EdgeID, !!as.name(str_c("AnalyteID_",analyte_i))) %>%
        dplyr::rename(AnalyteID=!!as.name(str_c("AnalyteID_",analyte_i))) %>%
        dplyr::bind_rows(temp_tbl1, .)
}
temp_tbl <- temp_tbl1 %>%
    dplyr::group_by(AnalyteID) %>%
    dplyr::summarize(nEdges=n()) %>%
    dplyr::left_join(temp_tbl, ., by="AnalyteID")

print(str_c("nrow: ",nrow(temp_tbl)))
head(temp_tbl)
temp_tbl %>% dplyr::group_by(AnalyteType) %>%
    dplyr::summarize(nNodes=n(),
                     nSigNodes=sum(!is.na(nEdges))) %>%
    dplyr::mutate(Percentage=nSigNodes/nNodes*100)

node_tbl <- temp_tbl

> –> This network is large for labelling all nodes.  
> –> Highlight nodes based on two styles: 1) sectors and 2) text labels.  

### 2-4-2. Categorize nodes

> In this case, data type-based category is not so good for the sector, especially because metabolite sector becomes quite larger than lab test sector. Hence, nodes are further categorized based on annotations.  

#### 2-4-2-1. Pre-check metabolite annotations

In [None]:
analytetype <- "Metabolite"
annotcol <- "SUPER_PATHWAY"
annotlabel <- "Superpathway"

#Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(AnnotationID=str_c(annotlabel,":",!!as.name(annotcol)),
                  AnnotationName=!!as.name(annotcol)) %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnnotationID, AnnotationName) %>%
    tidyr::drop_na(AnnotationID)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))

In [None]:
analytetype <- "Metabolite"
annotcol <- "SUB_PATHWAY"
annotlabel <- "Subpathway"
supcol <- "SUPER_PATHWAY"

#Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(AnnotationID=str_c(annotlabel,":",!!as.name(annotcol)),
                  AnnotationName=!!as.name(annotcol),
                  SupAnnotation=!!as.name(supcol)) %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnnotationID, AnnotationName, SupAnnotation) %>%
    tidyr::drop_na(AnnotationID)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::group_by(SupAnnotation, AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(SupAnnotation, desc(nAnalytes)) %>%
    dplyr::filter(nAnalytes>=10)

#### 2-4-2-2. Pre-check protein annotations

In [None]:
#Prepare annotations
fileDir <- "./ExportData/"
ipynbName <- "240725_Arivale-APOE-BA_CleanMetadata_"
fileName <- "analyte-metadata.xlsx"
sheetName <- "Protein2GOBP"
temp_tbl <- read_excel(str_c(fileDir,ipynbName,fileName), sheet=sheetName)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::select(-AnalyteID) %>%
    dplyr::distinct() %>%
    dplyr::arrange(desc(nAnalytes), desc(nProteins), desc(ProteinCoverage))

annotation_tbl <- temp_tbl

In [None]:
#Check exclusive combinations
temp_tbl <- annotation_tbl %>%
    dplyr::select(-AnalyteID) %>%
    dplyr::distinct() %>%
    dplyr::arrange(desc(nAnalytes))
for (i in 1:nrow(temp_tbl)) {
    for (j in i:nrow(temp_tbl)) {
        if (i!=j) {
            #Prepare each analyte set
            annotation1 <- temp_tbl$AnnotationID[i]
            temp_tbl1 <- annotation_tbl %>%
                dplyr::filter(AnnotationID==annotation1)
            annotation2 <- temp_tbl$AnnotationID[j]
            temp_tbl2 <- annotation_tbl %>%
                dplyr::filter(AnnotationID==annotation2)
            #Take common analyte set
            temp_tbl3 <- temp_tbl1 %>%
                dplyr::filter(AnalyteID %in% temp_tbl2$AnalyteID)
            if (nrow(temp_tbl3)==0) {
                print(str_c(annotation1," (",temp_tbl1$AnnotationName[1],"):",nrow(temp_tbl1)," vs. ",
                            annotation2," (",temp_tbl2$AnnotationName[1],"):",nrow(temp_tbl2)))
            }
        }
    }
}

In [None]:
#Further check the combination set
temp_tbl <- annotation_tbl %>%
    dplyr::filter(AnnotationID %in% c("GO:0006954", "GO:0030574"))
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))
print(str_c("-> Covered analytes: ",nrow(temp_tbl)))
print(str_c(" -> Exclusive?: ",nrow(temp_tbl)==length(unique(temp_tbl$AnalyteID))))

#Further check the combination set
temp_tbl <- annotation_tbl %>%
    dplyr::filter(AnnotationID %in% c("GO:0006935", "GO:0032722", "GO:0051781", "GO:0030574"))
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))
print(str_c("-> Covered analytes: ",nrow(temp_tbl)))
print(str_c(" -> Exclusive?: ",nrow(temp_tbl)==length(unique(temp_tbl$AnalyteID))))

#Further check the combination set
temp_tbl <- annotation_tbl %>%
    dplyr::filter(AnnotationID %in% c("GO:0070098", "GO:0032722", "GO:0051781", "GO:0030574"))
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))
print(str_c("-> Covered analytes: ",nrow(temp_tbl)))
print(str_c(" -> Exclusive?: ",nrow(temp_tbl)==length(unique(temp_tbl$AnalyteID))))

#### 2-4-2-3. Pre-check lab test annotations

In [None]:
#Prepare annotations
fileDir <- "./ExportData/"
ipynbName <- "240725_Arivale-APOE-BA_CleanMetadata_"
fileName <- "analyte-metadata.xlsx"
sheetName <- "LabTest2ManCat"
temp_tbl <- read_excel(str_c(fileDir,ipynbName,fileName), sheet=sheetName)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))

#### 2-4-2-4. Pre-check microbiome annotations

In [None]:
analytetype <- "Microbiome"
annotcol <- "Phylum"
annotlabel <- "Phylum"

#Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(AnnotationID=str_c(annotlabel,":",!!as.name(annotcol)),
                  AnnotationName=!!as.name(annotcol)) %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnnotationID, AnnotationName) %>%
    tidyr::drop_na(AnnotationID)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))

In [None]:
analytetype <- "Microbiome"
annotcol <- "Class"
annotlabel <- "Class"
supcol <- "Phylum"

#Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(AnnotationID=str_c(annotlabel,":",!!as.name(annotcol)),
                  AnnotationName=!!as.name(annotcol),
                  SupAnnotation=!!as.name(supcol)) %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnnotationID, AnnotationName, SupAnnotation) %>%
    tidyr::drop_na(AnnotationID)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::group_by(SupAnnotation, AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(SupAnnotation, desc(nAnalytes)) %>%
    dplyr::filter(nAnalytes>=10)

#### 2-4-2-5. Categorize nodes

> Because some annotations are combined here, annotaiton number is prepared at this point for plot settings. Also, the order is changed to 1. Clinical labs, 2. Metabolomics, 3. Proteomics, 4. Microbiome.  

In [None]:
#Initialize annotation table
annotation_tbl <- tibble()

#Categorize metabolite nodes
analytetype <- "Metabolite"
maincol <- "SUB_PATHWAY"
supcol <- "SUPER_PATHWAY"
cutoff_min <- 20
minorlabel_main <- "Others"
minorlabel_sup <- "Others (carbohydrate, vitamins, energy, etc.)"
nalabel <- str_c(analytetype,": N.A.")
##Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(MainAnnotation=!!as.name(maincol),
                  SupAnnotation=!!as.name(supcol)) %>%
    dplyr::select(AnalyteID, MainAnnotation, SupAnnotation) %>%
    dplyr::mutate(MainAnnotation=ifelse(is.na(MainAnnotation), nalabel, str_to_sentence(MainAnnotation)),
                  SupAnnotation=ifelse(is.na(SupAnnotation), nalabel, str_to_sentence(SupAnnotation)))
##Combine minor annotations at the main level
temp_tbl <- temp_tbl %>%
    dplyr::group_by(SupAnnotation, MainAnnotation) %>%
    dplyr::summarize(nAnalytes=n()) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(SupAnnotation, desc(nAnalytes)) %>%
    dplyr::mutate(MainName=ifelse((nAnalytes>=cutoff_min)|(MainAnnotation==nalabel),
                                  MainAnnotation, minorlabel_main)) %>%
    dplyr::select(-nAnalytes) %>%
    dplyr::left_join(temp_tbl, ., by=c("SupAnnotation", "MainAnnotation"))
##Combine minor annotations at the super level
temp_tbl <- temp_tbl %>%
    dplyr::group_by(SupAnnotation) %>%
    dplyr::summarize(nAnalytes=n()) %>%
    dplyr::arrange(desc(nAnalytes)) %>%
    dplyr::mutate(SupName=ifelse((nAnalytes>=cutoff_min)|(SupAnnotation==nalabel),
                                 SupAnnotation, minorlabel_sup)) %>%
    dplyr::select(-nAnalytes) %>%
    dplyr::mutate(TempSortNum=row_number()) %>%
    dplyr::left_join(temp_tbl, ., by=c("SupAnnotation"))
##Prepare super levels having only the combined annotation
temp_vec <- temp_tbl %>%
    dplyr::select(SupName, MainName) %>%
    dplyr::distinct() %>%
    dplyr::group_by(SupName) %>%
    dplyr::summarize(Count=n()) %>%
    dplyr::filter(Count==1) %>%
    .$SupName
##Finalize annotations
annotation_tbl <- temp_tbl %>%
    dplyr::mutate(AnnotationName=ifelse(SupName %in% c(temp_vec, minorlabel_sup, nalabel),
                                        SupName, str_c(SupName,": ",MainName))) %>%
    dplyr::select(AnalyteID, AnnotationName, TempSortNum) %>%
    dplyr::mutate(TempSortNum=ifelse(str_detect(AnnotationName, str_c(": ", minorlabel_main,"$")),
                                     TempSortNum+0.1, TempSortNum)) %>%#Dummy shift
    dplyr::mutate(TempSortNum=ifelse(AnnotationName==minorlabel_sup, 1000, TempSortNum)) %>%#Dummy large number
    dplyr::mutate(TempSortNum=ifelse(AnnotationName==nalabel, 2000, TempSortNum)) %>%#Dummy large number
    dplyr::mutate(TempCatNum=2) %>%
    dplyr::bind_rows(annotation_tbl, .)

#Categorize protein nodes
analytetype <- "Protein"
temp_vec <- c("GO:0006954", "GO:0030574")
minorlabel <- "Other GOBPs"
nalabel <- str_c(analytetype,": N.A.")
##Prepare annotations
fileDir <- "./ExportData/"
ipynbName <- "240725_Arivale-APOE-BA_CleanMetadata_"
fileName <- "analyte-metadata.xlsx"
sheetName <- "Protein2GOBP"
temp_tbl <- read_excel(str_c(fileDir,ipynbName,fileName), sheet=sheetName)
##Select the target exclusive annotations
temp_tbl1 <- temp_tbl %>%
    dplyr::filter(AnnotationID %in% temp_vec) %>%
    dplyr::mutate(AnnotationName=str_c(str_to_sentence(AnnotationName)," (",AnnotationID,")")) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::mutate(TempSortNum=1)
##Combine the others
temp_tbl2 <- temp_tbl %>%
    dplyr::filter(!(AnalyteID %in% temp_tbl1$AnalyteID)) %>%
    dplyr::mutate(AnnotationName=minorlabel) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::distinct() %>%
    dplyr::mutate(TempSortNum=2)
##Rescue the unmapped analytes
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::filter(!(AnalyteID %in% c(temp_tbl1$AnalyteID, temp_tbl2$AnalyteID))) %>%
    dplyr::mutate(AnnotationName=nalabel) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::mutate(TempSortNum=3)
##Finalize annotations
annotation_tbl <- dplyr::bind_rows(temp_tbl1, temp_tbl2, temp_tbl) %>%
    dplyr::mutate(TempCatNum=3) %>%
    dplyr::bind_rows(annotation_tbl, .)

#Categorize lab test nodes
analytetype <- "Lab test"
temp_vec <- c("ManualCategory:Cell", "ManualCategory:Lipid", "ManualCategory:Protein")
minorlabel <- "Other biomarkers"
nalabel <- str_c(analytetype,": N.A.")
##Prepare annotations
fileDir <- "./ExportData/"
ipynbName <- "240725_Arivale-APOE-BA_CleanMetadata_"
fileName <- "analyte-metadata.xlsx"
sheetName <- "LabTest2ManCat"
temp_tbl <- read_excel(str_c(fileDir,ipynbName,fileName), sheet=sheetName)
##Select the target exclusive annotations
temp_tbl1 <- temp_tbl %>%
    dplyr::filter(AnnotationID %in% temp_vec) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::mutate(TempSortNum=1)
##Combine the others
temp_tbl2 <- temp_tbl %>%
    dplyr::filter(!(AnalyteID %in% temp_tbl1$AnalyteID)) %>%
    dplyr::mutate(AnnotationName=minorlabel) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::distinct() %>%
    dplyr::mutate(TempSortNum=2)
##Rescue the unmapped analytes
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::filter(!(AnalyteID %in% c(temp_tbl1$AnalyteID, temp_tbl2$AnalyteID))) %>%
    dplyr::mutate(AnnotationName=nalabel) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::mutate(TempSortNum=3)
##Finalize annotations
annotation_tbl <- dplyr::bind_rows(temp_tbl1, temp_tbl2, temp_tbl) %>%
    dplyr::mutate(TempCatNum=1) %>%
    dplyr::bind_rows(annotation_tbl, .)

#Categorize microbiome nodes
analytetype <- "Microbiome"
maincol <- "Phylum"
cutoff_min <- 10
minorlabel_main <- "Other phyla"
nalabel <- str_c(analytetype,": N.A.")
##Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(MainAnnotation=!!as.name(maincol)) %>%
    dplyr::select(AnalyteID, MainAnnotation) %>%
    dplyr::mutate(MainAnnotation=ifelse(is.na(MainAnnotation), nalabel, MainAnnotation))
##Combine minor annotations at the main level
temp_tbl <- temp_tbl %>%
    dplyr::group_by(MainAnnotation) %>%
    dplyr::summarize(nAnalytes=n()) %>%
    dplyr::arrange(desc(nAnalytes)) %>%
    dplyr::mutate(MainName=ifelse((nAnalytes>=cutoff_min)|(MainAnnotation==nalabel),
                                  MainAnnotation, minorlabel_main)) %>%
    dplyr::select(-nAnalytes) %>%
    dplyr::mutate(TempSortNum=row_number()) %>%
    dplyr::left_join(temp_tbl, ., by=c("MainAnnotation"))
##Finalize annotations
annotation_tbl <- temp_tbl %>%
    dplyr::mutate(AnnotationName=MainName) %>%
    dplyr::select(AnalyteID, AnnotationName, TempSortNum) %>%
    dplyr::mutate(TempSortNum=ifelse(AnnotationName==minorlabel_main, 1000, TempSortNum)) %>%#Dummy large number
    dplyr::mutate(TempSortNum=ifelse(AnnotationName==nalabel, 2000, TempSortNum)) %>%#Dummy large number
    dplyr::mutate(TempCatNum=4) %>%
    dplyr::bind_rows(annotation_tbl, .)

#Clean the sorting info
annotation_tbl <- annotation_tbl %>%
    dplyr::group_by(TempCatNum, TempSortNum, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n()) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(TempCatNum, TempSortNum, desc(nAnalytes)) %>%
    dplyr::mutate(AnnotationNum=row_number()) %>%
    dplyr::left_join(annotation_tbl, ., by=c("TempCatNum", "TempSortNum", "AnnotationName")) %>%
    dplyr::select(-TempCatNum, -TempSortNum, -nAnalytes)

print(str_c("nrow: ",nrow(annotation_tbl)))
print(str_c("- Unique analyte: ",length(unique(annotation_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(annotation_tbl$AnnotationName))))
head(annotation_tbl)

#Check annotation-based summary
annotation_tbl %>%
    dplyr::group_by(AnnotationNum, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(AnnotationNum)

In [None]:
#Add to the node table
temp_tbl <- dplyr::left_join(node_tbl, annotation_tbl, by="AnalyteID")

print(str_c("nrow: ",nrow(temp_tbl)))
head(temp_tbl)
temp_tbl %>%
    dplyr::group_by(AnalyteType, AnnotationNum, AnnotationName) %>%
    dplyr::summarize(nNodes=n(),
                     nSigNodes=sum(!is.na(nEdges))) %>%
    dplyr::mutate(Percentage=nSigNodes/nNodes*100) %>%
    dplyr::arrange(AnnotationNum)

#Update
node_tbl <- temp_tbl

### 2-4-3. Highlight nodes with text labels

> In this case, the number of the common significant pairs is small. So, the number of edges is used for judging nodes to be highlighted, while removing the nodes with edges from only one cohort.  

In [None]:
#Check nodes of the common significant pairs
temp_tbl <- edge_tbl %>%
    dplyr::filter(Category %in% c("Both(+)", "Both(-)"))
temp_tbl
temp_vec <- unique(c(temp_tbl$AnalyteID_1, temp_tbl$AnalyteID_2))
node_tbl %>%
    dplyr::filter(AnalyteID %in% temp_vec)

#Check node summary
summary(node_tbl)
for (i in 2:10) {
    temp_tbl <- node_tbl %>%
        dplyr::filter(nEdges>=i)
    print(str_c("nNodes with ",i," edges: ",nrow(temp_tbl)))
}
##While removing the nodes with edges from only one cohort
for (i in 1:10) {
    temp_tbl <- node_tbl %>%
        dplyr::filter((!is.na(!!as.name(str_c(cohort_vec[1],"_nSigPairs"))))&
                      (!is.na(!!as.name(str_c(cohort_vec[2],"_nSigPairs"))))) %>%
        dplyr::filter(nEdges>=i)
    print(str_c("nNodes with ",i," edges which were at least derived from each: ",nrow(temp_tbl)))
}

In [None]:
#Nodes with higher number of edges than cutoff (while removing the nodes with edges from only one cohort)
cutoff <- 1
temp_vec <- node_tbl %>%
    dplyr::filter((!is.na(!!as.name(str_c(cohort_vec[1],"_nSigPairs"))))&
                  (!is.na(!!as.name(str_c(cohort_vec[2],"_nSigPairs"))))) %>%
    dplyr::filter(nEdges>=cutoff) %>%
    .$AnalyteID
temp_tbl <- node_tbl %>%
    dplyr::mutate(Highlight=ifelse((AnalyteID %in% temp_vec), 1, 0))

print(str_c("nrow: ",nrow(temp_tbl)))
head(temp_tbl)
temp_tbl %>%
    dplyr::group_by(AnalyteType, AnnotationNum, AnnotationName) %>%
    dplyr::summarize(nNodes=n(),
                     nSigNodes=sum(!is.na(nEdges)),
                     nTextNodes=sum(Highlight==1)) %>%
    dplyr::mutate(SigPercent=nSigNodes/nNodes*100,
                  TextPercent=nTextNodes/nNodes*100) %>%
    dplyr::arrange(AnnotationNum)
temp_tbl %>%
    dplyr::filter(Highlight==1) %>%
    dplyr::arrange(desc(nEdges))

#Update
node_tbl <- temp_tbl

### 2-4-4. Add plot settings

In [None]:
#Numbering sector/category and x-coordinate in each category
node_tbl <- node_tbl %>%
    dplyr::mutate(CategoryNum=AnnotationNum) %>%
    dplyr::arrange(CategoryNum, desc(nEdges)) %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::mutate(Xcoord=1:n()) %>%
    dplyr::ungroup()
head(node_tbl)

In [None]:
#Check
print("Check Xcoord around margin")
temp_vec <- node_tbl %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::summarize(nNodes=n()) %>%
    .$nNodes
i <- 1
border <- sum(temp_vec[1:i])
node_tbl[(border-3):(border+3),]
i <- 2
border <- sum(temp_vec[1:i])
node_tbl[(border-3):(border+3),]
i <- 3
border <- sum(temp_vec[1:i])
node_tbl[(border-3):(border+3),]

> Of note, when gaps between labels were tight, label offset calculation was needed to avoid ovalapping.  
–> This offset was handled manually in my previous code, but it can be done automatically by circos.labels() function!!  
–> Skip this step.  

## 2-5. Circos plot

In [None]:
options(repr.plot.width=8, repr.plot.height=8)#Default=7x7
circos.clear()

#Generate xlim table
xlim_tbl <- node_tbl %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::summarize(Xmin=min(Xcoord), Xmax=max(Xcoord))

#Initialize
circos.par(start.degree=90, gap.degree=1.5, track.margin=c(0, 0),
           cell.padding=c(0, 0, 0, 0), unit.circle.segments=nrow(node_tbl),
           canvas.xlim=c(-1.0, 1.5), canvas.ylim=c(-1.25, 1.25),#Change based on the label length
           clock.wise=TRUE, xaxis.clock.wise=TRUE)
circos.initialize(sectors=xlim_tbl$CategoryNum, xlim=xlim_tbl[, c("Xmin", "Xmax")],
                  sector.width=xlim_tbl$Xmax)

#Prepare color palette with standard/dummy sector order
temp_vec <- brewer.pal(4, "Set1")
temp_vec <- darken(temp_vec, amount=0.05)
color_palette1 <- c("Metabolite"=temp_vec[2],#blue
                    "Protein"=temp_vec[1],#red
                    "Lab test"=temp_vec[3],#green
                    "Microbiome"=temp_vec[4])#purple
color_palette2 <- rep(c(rgb(255/255, 69/255, 0/255, alpha=0.6),#orange red
                        rgb(34/255, 139/255, 34/255, alpha=0.3),#forest green
                        rgb(65/255, 105/255, 225/255, alpha=0.3),#royal blue
                        rgb(0, 0, 0, alpha=1.0)),#Just in case
                      each=2)
names(color_palette2) <- c("Both(+)", "Both(-)",
                           str_c(cohort_vec[1],"(+)"),str_c(cohort_vec[1],"(-)"),
                           str_c(cohort_vec[2],"(+)"),str_c(cohort_vec[2],"(-)"),
                           str_c(cohort_vec[1],"(+)",cohort_vec[2],"(-)"),str_c(cohort_vec[1],"(-)",cohort_vec[2],"(+)"))

#Add highlighting node labels (two outer tracks)
##Prepare highlighting nodes
temp_tbl <- node_tbl %>%
    dplyr::filter(Highlight==1) %>%
    dplyr::mutate(TextColor=sapply(AnalyteType, function(x){color_palette1[x]}),
                  LineColor="black")
##Prepare dummy node for the sector without highlighting nodes
temp_tbl <- node_tbl %>%
    dplyr::filter(!(CategoryNum %in% temp_tbl$CategoryNum)) %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::summarize(Xcoord=(min(Xcoord)+max(Xcoord))/2) %>%
    dplyr::mutate(AnalyteLabel="Dummy",
                  TextColor=rgb(0, 0, 0, alpha=0.0),
                  LineColor=rgb(0, 0, 0, alpha=0.0)) %>%
    dplyr::bind_rows(temp_tbl, .)
##Add the target node labels
circos.labels(sectors=temp_tbl$CategoryNum, x=temp_tbl$Xcoord, labels=temp_tbl$AnalyteLabel,
              facing="clockwise", niceFacing=TRUE, col=temp_tbl$TextColor, cex=0.6,
              padding=0.1, line_col=temp_tbl$LineColor, line_lwd=1, side="outside")

#Add category sectors (track 3)
circos.track(ylim=c(0, 1), track.height=0.075, bg.col=NA, bg.border=NA)
for (row_i in 1:nrow(xlim_tbl)) {
    sector_n <- xlim_tbl$CategoryNum[row_i]
    xstart <- xlim_tbl$Xmin[row_i]
    xend <- xlim_tbl$Xmax[row_i]
    theta_start <- circlize(x=xstart, y=0, sector.index=sector_n, track.index=3)[1, 1]
    theta_end <- circlize(x=xend, y=0, sector.index=sector_n, track.index=3)[1, 1]
    #Prepare labels and colors
    label_n <- as.character(sector_n)
    analytetype <- node_tbl[node_tbl$CategoryNum==sector_n, ]$AnalyteType[1]
    fill_color <- color_palette1[analytetype]
    #Add category sectors
    draw.sector(start.degree=theta_start, end.degree=theta_end, clock.wise=TRUE,
                rou1=get.cell.meta.data("cell.top.radius", track.index=3),
                rou2=get.cell.meta.data("cell.bottom.radius", track.index=3),
                col=fill_color, border="black", lwd=2)
    #Add category label
    xcenter <- (xstart + xend) / 2
    circos.text(x=xcenter, y=0.5, labels=label_n, sector.index=sector_n, track.index=3,
                facing="bending.inside", niceFacing=TRUE, adj=c(0.5, 0.5),
                cex=0.6, col="white", font=2)#2 = bold font
    #Add axis ticks
    circos.axis(h="top", major.at=xstart:xend, labels=FALSE, major.tick=TRUE,
                sector.index=sector_n, track.index=3, direction="outside",
                minor.ticks=0, major.tick.length=0.1, lwd=0.25, col="black")
}

#Add links for the significant pairs
temp_tbl <- edge_tbl %>%
    dplyr::mutate(MeanAbsBcoef=mean(!!as.name(str_c(cohort_vec,"_bcoef")))) %>%
    dplyr::arrange(desc(Category), desc(MeanAbsBcoef))#For rendering with consistent order
for (row_i in 1:nrow(temp_tbl)) {
    var1 <- temp_tbl$AnalyteID_1[row_i]
    var2 <- temp_tbl$AnalyteID_2[row_i]
    var1_sector <- node_tbl$CategoryNum[node_tbl$AnalyteID==var1]
    var2_sector <- node_tbl$CategoryNum[node_tbl$AnalyteID==var2]
    var1_xcoord <- node_tbl$Xcoord[node_tbl$AnalyteID==var1]
    var2_xcoord <- node_tbl$Xcoord[node_tbl$AnalyteID==var2]
    #Prepare line color
    label_n <- temp_tbl$Category[row_i]
    line_color <- color_palette2[label_n]
    line_width <- temp_tbl$MeanAbsBcoef*3
    #Add edge
    circos.link(sector.index1=var1_sector, point1=var1_xcoord, rou1=get_most_inside_radius(),
                sector.index2=var2_sector, point2=var2_xcoord, rou2=get_most_inside_radius(),
                h.ratio=0.6, w=4, col=line_color, lwd=line_width)
}

circos.clear()
options(repr.plot.width=5, repr.plot.height=5)#Default=7x7

> –> Don't need to take care about the above "out of plotting" notes because the final font is bit different after export (see below).  

> Because circlize package uses base graphycs, use graphycs device.  

In [None]:
options(repr.plot.width=8, repr.plot.height=8)#Default=7x7
circos.clear()

#To save (circlize uses base graphycs)
fileDir = "./ExportFigures/"
ipynbName <- "240820_Arivale-APOE-BA-visualization-for-Dylan_ver3-3_"
fileName = str_c(str_flatten(cohort_vec, collapse="-vs-"),".pdf")
pdf(str_c(fileDir,ipynbName,fileName), 8, 8)

#Generate xlim table
xlim_tbl <- node_tbl %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::summarize(Xmin=min(Xcoord), Xmax=max(Xcoord))

#Initialize
circos.par(start.degree=90, gap.degree=1.5, track.margin=c(0, 0),
           cell.padding=c(0, 0, 0, 0), unit.circle.segments=nrow(node_tbl),
           canvas.xlim=c(-1.0, 1.5), canvas.ylim=c(-1.25, 1.25),#Change based on the label length
           clock.wise=TRUE, xaxis.clock.wise=TRUE)
circos.initialize(sectors=xlim_tbl$CategoryNum, xlim=xlim_tbl[, c("Xmin", "Xmax")],
                  sector.width=xlim_tbl$Xmax)

#Prepare color palette with standard/dummy sector order
temp_vec <- brewer.pal(4, "Set1")
temp_vec <- darken(temp_vec, amount=0.05)
color_palette1 <- c("Metabolite"=temp_vec[2],#blue
                    "Protein"=temp_vec[1],#red
                    "Lab test"=temp_vec[3],#green
                    "Microbiome"=temp_vec[4])#purple
color_palette2 <- rep(c(rgb(255/255, 69/255, 0/255, alpha=0.6),#orange red
                        rgb(34/255, 139/255, 34/255, alpha=0.3),#forest green
                        rgb(65/255, 105/255, 225/255, alpha=0.3),#royal blue
                        rgb(0, 0, 0, alpha=1.0)),#Just in case
                      each=2)
names(color_palette2) <- c("Both(+)", "Both(-)",
                           str_c(cohort_vec[1],"(+)"),str_c(cohort_vec[1],"(-)"),
                           str_c(cohort_vec[2],"(+)"),str_c(cohort_vec[2],"(-)"),
                           str_c(cohort_vec[1],"(+)",cohort_vec[2],"(-)"),str_c(cohort_vec[1],"(-)",cohort_vec[2],"(+)"))

#Add highlighting node labels (two outer tracks)
##Prepare highlighting nodes
temp_tbl <- node_tbl %>%
    dplyr::filter(Highlight==1) %>%
    dplyr::mutate(TextColor=sapply(AnalyteType, function(x){color_palette1[x]}),
                  LineColor="black")
##Prepare dummy node for the sector without highlighting nodes
temp_tbl <- node_tbl %>%
    dplyr::filter(!(CategoryNum %in% temp_tbl$CategoryNum)) %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::summarize(Xcoord=(min(Xcoord)+max(Xcoord))/2) %>%
    dplyr::mutate(AnalyteLabel="Dummy",
                  TextColor=rgb(0, 0, 0, alpha=0.0),
                  LineColor=rgb(0, 0, 0, alpha=0.0)) %>%
    dplyr::bind_rows(temp_tbl, .)
##Add the target node labels
circos.labels(sectors=temp_tbl$CategoryNum, x=temp_tbl$Xcoord, labels=temp_tbl$AnalyteLabel,
              facing="clockwise", niceFacing=TRUE, col=temp_tbl$TextColor, cex=0.6,
              padding=0.1, line_col=temp_tbl$LineColor, line_lwd=1, side="outside")

#Add category sectors (track 3)
circos.track(ylim=c(0, 1), track.height=0.075, bg.col=NA, bg.border=NA)
for (row_i in 1:nrow(xlim_tbl)) {
    sector_n <- xlim_tbl$CategoryNum[row_i]
    xstart <- xlim_tbl$Xmin[row_i]
    xend <- xlim_tbl$Xmax[row_i]
    theta_start <- circlize(x=xstart, y=0, sector.index=sector_n, track.index=3)[1, 1]
    theta_end <- circlize(x=xend, y=0, sector.index=sector_n, track.index=3)[1, 1]
    #Prepare labels and colors
    label_n <- as.character(sector_n)
    analytetype <- node_tbl[node_tbl$CategoryNum==sector_n, ]$AnalyteType[1]
    fill_color <- color_palette1[analytetype]
    #Add category sectors
    draw.sector(start.degree=theta_start, end.degree=theta_end, clock.wise=TRUE,
                rou1=get.cell.meta.data("cell.top.radius", track.index=3),
                rou2=get.cell.meta.data("cell.bottom.radius", track.index=3),
                col=fill_color, border="black", lwd=2)
    #Add category label
    xcenter <- (xstart + xend) / 2
    circos.text(x=xcenter, y=0.5, labels=label_n, sector.index=sector_n, track.index=3,
                facing="bending.inside", niceFacing=TRUE, adj=c(0.5, 0.5),
                cex=0.6, col="white", font=2)#2 = bold font
    #Add axis ticks
    circos.axis(h="top", major.at=xstart:xend, labels=FALSE, major.tick=TRUE,
                sector.index=sector_n, track.index=3, direction="outside",
                minor.ticks=0, major.tick.length=0.1, lwd=0.25, col="black")
}

#Add links for the significant pairs
temp_tbl <- edge_tbl %>%
    dplyr::mutate(MeanAbsBcoef=mean(!!as.name(str_c(cohort_vec,"_bcoef")))) %>%
    dplyr::arrange(desc(Category), desc(MeanAbsBcoef))#For rendering with consistent order
for (row_i in 1:nrow(temp_tbl)) {
    var1 <- temp_tbl$AnalyteID_1[row_i]
    var2 <- temp_tbl$AnalyteID_2[row_i]
    var1_sector <- node_tbl$CategoryNum[node_tbl$AnalyteID==var1]
    var2_sector <- node_tbl$CategoryNum[node_tbl$AnalyteID==var2]
    var1_xcoord <- node_tbl$Xcoord[node_tbl$AnalyteID==var1]
    var2_xcoord <- node_tbl$Xcoord[node_tbl$AnalyteID==var2]
    #Prepare line color
    label_n <- temp_tbl$Category[row_i]
    line_color <- color_palette2[label_n]
    line_width <- temp_tbl$MeanAbsBcoef*3
    #Add edge
    circos.link(sector.index1=var1_sector, point1=var1_xcoord, rou1=get_most_inside_radius(),
                sector.index2=var2_sector, point2=var2_xcoord, rou2=get_most_inside_radius(),
                h.ratio=0.6, w=4, col=line_color, lwd=line_width)
}

#Close
dev.off()

circos.clear()
options(repr.plot.width=5, repr.plot.height=5)#Default=7x7

> –> The exported figure is surely .pdf file.  
> –> Furthermore, the Arial font was normally used in the exported file!!  

# 3. Female vs. male in biologically old

## 3-1. Prepare the significant pairs

### 3-1-1. Retrieve the significant pairs

In [None]:
fdr_cutoff <- 0.1
temp_vec <- c("F_Biologically_Old", "M_Biologically_Old")
cohort_vec <- c("FemaleBAold", "MaleBAold")

temp_tbl1 <- tibble()
for (i in 1:length(cohort_vec)) {
    sheetName <- temp_vec[i]
    cohort <- cohort_vec[i]
    
    #Import the significant pairs
    fileDir <- "./ImportData/"
    fileName <- "240707_ME2_MBioOld_FBioOld_for_circos_fig.xlsx"
    temp_tbl <- read_excel(str_c(fileDir,fileName), sheet=sheetName)
    print(str_c(sheetName," sheet: ",cohort))
    print(str_c("nrow: ",nrow(temp_tbl)))
    
    #Take the significant pairs
    temp_tbl <- temp_tbl %>%
        dplyr::filter(pFDR_interaction < fdr_cutoff) %>%
        #Prepare pair ID for handling
        dplyr::mutate(PairID=str_c(cohort,"_row",str_pad(row_number(), width=5, side="left", pad="0")))
    print(str_c("-> FDR < ",fdr_cutoff,": ",nrow(temp_tbl)))
    
    temp_tbl1 <- dplyr::bind_rows(temp_tbl1, temp_tbl)
}

print("Combined")
print(str_c("nrow: ",nrow(temp_tbl1)))
head(temp_tbl1)

pair_tbl <- temp_tbl1

### 3-1-2. Update analyte ID and label

> Because analyte label is not unique in his original tables.  

In [None]:
#Update analyte ID and label
for (analyte_i in c(1, 2)) {
    #Take target analytes and clean for matching with analyte metadata
    temp_tbl1 <- pair_tbl %>%
        dplyr::select(PairID, !!as.name(str_c("Analyte",analyte_i)),
                      !!as.name(str_c("type",analyte_i)), !!as.name(str_c("supergroup",analyte_i))) %>%
        dplyr::rename(AnalyteName=!!as.name(str_c("Analyte",analyte_i)),
                      Supergroup=!!as.name(str_c("supergroup",analyte_i))) %>%
        dplyr::mutate(AnalyteType=ifelse(!!as.name(str_c("type",analyte_i))=="clinical",
                                         "Lab test", str_to_title(!!as.name(str_c("type",analyte_i)))))
    
    #Merge analyte metadata based on analyte type
    temp_tbl2 <- tibble()
    for (analytetype in unique(temp_tbl1$AnalyteType)) {
        temp_tbl <- temp_tbl1 %>%
            dplyr::filter(AnalyteType==analytetype)
        if (analytetype=="Metabolite") {
            temp_tbl <- analyte_tbl %>%
                dplyr::select(AnalyteID, AnalyteType, AnalyteName, AnalyteLabel, SUPER_PATHWAY) %>%
                dplyr::rename(Supergroup=SUPER_PATHWAY) %>%
                dplyr::left_join(temp_tbl, ., by=c("AnalyteType", "AnalyteName", "Supergroup"))
        } else if (analytetype=="Protein") {
            temp_tbl <- analyte_tbl %>%
                dplyr::select(AnalyteID, AnalyteType, AnalyteName, AnalyteLabel, OlinkPanel) %>%
                dplyr::rename(Supergroup=OlinkPanel) %>%
                dplyr::left_join(temp_tbl, ., by=c("AnalyteType", "AnalyteName", "Supergroup"))
        } else {
            temp_tbl <- analyte_tbl %>%
                dplyr::select(AnalyteID, AnalyteType, AnalyteName, AnalyteLabel) %>%
                dplyr::left_join(temp_tbl, ., by=c("AnalyteType", "AnalyteName"))
        }
        temp_tbl2 <- dplyr::bind_rows(temp_tbl2, temp_tbl)
    }
    
    #Clean
    temp_tbl <- temp_tbl2 %>%
        dplyr::select(PairID, AnalyteID, AnalyteLabel, AnalyteType)
    colnames(temp_tbl)[2:4] = str_c(colnames(temp_tbl)[2:4],"_",analyte_i)
    
    #Add to the original table
    pair_tbl <- dplyr::left_join(pair_tbl, temp_tbl, by="PairID")
}
##Clean
pair_tbl <- pair_tbl %>%
    dplyr::select(PairID,
                  AnalyteID_1, AnalyteLabel_1, AnalyteType_1,
                  AnalyteID_2, AnalyteLabel_2, AnalyteType_2,
                  Beta_interaction, pnominal_interaction, pFDR_interaction)
print(str_c("nrow: ",nrow(pair_tbl)))
head(pair_tbl)

## 3-2. Extract analytes from the significant pairs

In [None]:
#Extract analytes from the significant pairs
temp_tbl <- tibble()
for (analyte_i in c(1, 2)) {
    temp_tbl <- pair_tbl %>%
        dplyr::select(PairID, !!as.name(str_c("AnalyteID_",analyte_i))) %>%
        dplyr::rename(AnalyteID=!!as.name(str_c("AnalyteID_",analyte_i))) %>%
        dplyr::bind_rows(temp_tbl, .)
}

#Flatten the same analytes while calculating the number of significant pairs per cohort
temp_tbl <- temp_tbl %>%
    dplyr::mutate(Cohort=str_replace(PairID, "_row[0-9]+$", "")) %>%
    dplyr::group_by(Cohort, AnalyteID) %>%
    dplyr::summarize(nSigPairs=n()) %>%
    dplyr::arrange(desc(nSigPairs)) %>%
    dplyr::ungroup()

#Add label
temp_tbl <- analyte_tbl %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnalyteType) %>%
    dplyr::left_join(temp_tbl, ., by="AnalyteID")

print(str_c("nrow: ",nrow(temp_tbl)))
head(temp_tbl)
#Check
temp_tbl %>% dplyr::group_by(Cohort) %>%
    dplyr::summarize(nSigAnalytes=n(), nSigPairs_sum=sum(nSigPairs))
temp_tbl1 <- analyte_tbl %>%
    dplyr::group_by(AnalyteType) %>%
    dplyr::summarize(nBackgrounds=n())
temp_tbl %>% dplyr::group_by(Cohort, AnalyteType) %>%
    dplyr::summarize(nSignifs=n()) %>%
    dplyr::ungroup() %>%
    dplyr::left_join(., temp_tbl1, by="AnalyteType") %>%
    dplyr::mutate(Percentage=nSignifs/nBackgrounds*100)

sig_analyte_tbl <- temp_tbl

## 3-3. Summarize edge attribution

### 3-3-1. Prepare edge table

In [None]:
#Prepare edge ID and cohort
temp_tbl1 <- pair_tbl %>%
    dplyr::mutate(EdgeID=str_c(AnalyteID_1," vs. ",AnalyteID_2),
                  Cohort=str_replace(PairID, "_row[0-9]+$", ""))

#Initialize edge table
temp_tbl <- temp_tbl1 %>%
    dplyr::select(EdgeID, AnalyteID_1, AnalyteLabel_1, AnalyteType_1, AnalyteID_2, AnalyteLabel_2, AnalyteType_2) %>%
    dplyr::distinct()

#Add edge attributions per cohort
for (i in 1:length(cohort_vec)) {
    cohort <- cohort_vec[i]
    temp_tbl <- temp_tbl1 %>%
        dplyr::filter(Cohort==cohort) %>%
        dplyr::select(EdgeID, Beta_interaction, pFDR_interaction) %>%
        dplyr::rename(!!as.name(str_c(cohort,"_bcoef")):=Beta_interaction,
                      !!as.name(str_c(cohort,"_pFDR")):=pFDR_interaction) %>%
        dplyr::left_join(temp_tbl, ., by="EdgeID")
}

print(str_c("nrow: ",nrow(temp_tbl)))
head(temp_tbl)
#Check inverse match just in case
print("Check inverse EdgeID match:")
temp_vec <- str_c(temp_tbl$AnalyteID_2," vs. ",temp_tbl$AnalyteID_1)
temp_tbl %>%
    dplyr::filter(EdgeID %in% temp_vec)

edge_tbl <- temp_tbl

> In this result, edges are to be highlighted based on significance and direction of interaction term.  

In [None]:
#Add edge category
temp_vec <- c()
for (row_i in 1:nrow(edge_tbl)) {#Note that this table contains only the significant pairs in either comparison
    if (is.na(edge_tbl[[str_c(cohort_vec[1],"_pFDR")]][row_i])) {#Significant only in 2nd
        if (edge_tbl[[str_c(cohort_vec[2],"_bcoef")]][row_i]>0) {
            temp_vec <- c(temp_vec, str_c(cohort_vec[2],"(+)"))
        } else {
            temp_vec <- c(temp_vec, str_c(cohort_vec[2],"(-)"))
        }
    } else if (is.na(edge_tbl[[str_c(cohort_vec[2],"_pFDR")]][row_i])) {#Significant only in 1st
        if (edge_tbl[[str_c(cohort_vec[1],"_bcoef")]][row_i]>0) {
            temp_vec <- c(temp_vec, str_c(cohort_vec[1],"(+)"))
        } else {
            temp_vec <- c(temp_vec, str_c(cohort_vec[1],"(-)"))
        }
    } else {#Significant in both
        if ((edge_tbl[[str_c(cohort_vec[1],"_bcoef")]][row_i]>0)&
            (edge_tbl[[str_c(cohort_vec[2],"_bcoef")]][row_i]>0)) {
            temp_vec <- c(temp_vec, "Both(+)")
        } else if ((edge_tbl[[str_c(cohort_vec[1],"_bcoef")]][row_i]<0)&
                   (edge_tbl[[str_c(cohort_vec[2],"_bcoef")]][row_i]<0)) {
            temp_vec <- c(temp_vec, "Both(-)")
        } else if ((edge_tbl[[str_c(cohort_vec[1],"_bcoef")]][row_i]>0)&
                   (edge_tbl[[str_c(cohort_vec[2],"_bcoef")]][row_i]<0)) {
            temp_vec <- c(temp_vec, str_c(cohort_vec[1],"(+)",cohort_vec[2],"(-)"))
        } else if ((edge_tbl[[str_c(cohort_vec[1],"_bcoef")]][row_i]<0)&
                   (edge_tbl[[str_c(cohort_vec[2],"_bcoef")]][row_i]>0)) {
            temp_vec <- c(temp_vec, str_c(cohort_vec[1],"(-)",cohort_vec[2],"(+)"))
        } else {
            temp_vec <- c(temp_vec, "Error due to beta = 0 !?")
        }
    }
}
edge_tbl$Category <- temp_vec
head(edge_tbl)

#Check
edge_tbl %>%
    dplyr::group_by(Category) %>%
    summarize(Count=n())

### 3-3-2. Filter edges to be presented

> All edges can be presented, and thus skip in this case.  

## 3-4. Summarize node attribution

### 3-4-1. Prepare node table

In [None]:
#Initialize node table
temp_tbl <- analyte_tbl %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnalyteType)

#Add node attributions per cohort
for (i in 1:length(cohort_vec)) {
    cohort <- cohort_vec[i]
    temp_tbl <- sig_analyte_tbl %>%
        dplyr::filter(Cohort==cohort) %>%
        dplyr::select(AnalyteID, nSigPairs) %>%
        dplyr::rename(!!as.name(str_c(cohort,"_nSigPairs")):=nSigPairs) %>%
        dplyr::left_join(temp_tbl, ., by="AnalyteID")
}

#Add edge count
temp_tbl1 <- tibble()
for (analyte_i in c(1, 2)) {
    temp_tbl1 <- edge_tbl %>%
        dplyr::select(EdgeID, !!as.name(str_c("AnalyteID_",analyte_i))) %>%
        dplyr::rename(AnalyteID=!!as.name(str_c("AnalyteID_",analyte_i))) %>%
        dplyr::bind_rows(temp_tbl1, .)
}
temp_tbl <- temp_tbl1 %>%
    dplyr::group_by(AnalyteID) %>%
    dplyr::summarize(nEdges=n()) %>%
    dplyr::left_join(temp_tbl, ., by="AnalyteID")

print(str_c("nrow: ",nrow(temp_tbl)))
head(temp_tbl)
temp_tbl %>% dplyr::group_by(AnalyteType) %>%
    dplyr::summarize(nNodes=n(),
                     nSigNodes=sum(!is.na(nEdges))) %>%
    dplyr::mutate(Percentage=nSigNodes/nNodes*100)

node_tbl <- temp_tbl

> –> This network is large for labelling all nodes.  
> –> Highlight nodes based on two styles: 1) sectors and 2) text labels.  

### 3-4-2. Categorize nodes

> In this case, data type-based category is not so good for the sector, especially because metabolite sector becomes quite larger than lab test sector. Hence, nodes are further categorized based on annotations.  

#### 3-4-2-1. Pre-check metabolite annotations

In [None]:
analytetype <- "Metabolite"
annotcol <- "SUPER_PATHWAY"
annotlabel <- "Superpathway"

#Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(AnnotationID=str_c(annotlabel,":",!!as.name(annotcol)),
                  AnnotationName=!!as.name(annotcol)) %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnnotationID, AnnotationName) %>%
    tidyr::drop_na(AnnotationID)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))

In [None]:
analytetype <- "Metabolite"
annotcol <- "SUB_PATHWAY"
annotlabel <- "Subpathway"
supcol <- "SUPER_PATHWAY"

#Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(AnnotationID=str_c(annotlabel,":",!!as.name(annotcol)),
                  AnnotationName=!!as.name(annotcol),
                  SupAnnotation=!!as.name(supcol)) %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnnotationID, AnnotationName, SupAnnotation) %>%
    tidyr::drop_na(AnnotationID)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::group_by(SupAnnotation, AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(SupAnnotation, desc(nAnalytes)) %>%
    dplyr::filter(nAnalytes>=10)

#### 3-4-2-2. Pre-check protein annotations

In [None]:
#Prepare annotations
fileDir <- "./ExportData/"
ipynbName <- "240725_Arivale-APOE-BA_CleanMetadata_"
fileName <- "analyte-metadata.xlsx"
sheetName <- "Protein2GOBP"
temp_tbl <- read_excel(str_c(fileDir,ipynbName,fileName), sheet=sheetName)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::select(-AnalyteID) %>%
    dplyr::distinct() %>%
    dplyr::arrange(desc(nAnalytes), desc(nProteins), desc(ProteinCoverage))

annotation_tbl <- temp_tbl

In [None]:
#Check exclusive combinations
temp_tbl <- annotation_tbl %>%
    dplyr::select(-AnalyteID) %>%
    dplyr::distinct() %>%
    dplyr::arrange(desc(nAnalytes))
for (i in 1:nrow(temp_tbl)) {
    for (j in i:nrow(temp_tbl)) {
        if (i!=j) {
            #Prepare each analyte set
            annotation1 <- temp_tbl$AnnotationID[i]
            temp_tbl1 <- annotation_tbl %>%
                dplyr::filter(AnnotationID==annotation1)
            annotation2 <- temp_tbl$AnnotationID[j]
            temp_tbl2 <- annotation_tbl %>%
                dplyr::filter(AnnotationID==annotation2)
            #Take common analyte set
            temp_tbl3 <- temp_tbl1 %>%
                dplyr::filter(AnalyteID %in% temp_tbl2$AnalyteID)
            if (nrow(temp_tbl3)==0) {
                print(str_c(annotation1," (",temp_tbl1$AnnotationName[1],"):",nrow(temp_tbl1)," vs. ",
                            annotation2," (",temp_tbl2$AnnotationName[1],"):",nrow(temp_tbl2)))
            }
        }
    }
}

In [None]:
#Further check the combination set
temp_tbl <- annotation_tbl %>%
    dplyr::filter(AnnotationID %in% c("GO:0006954", "GO:0030574"))
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))
print(str_c("-> Covered analytes: ",nrow(temp_tbl)))
print(str_c(" -> Exclusive?: ",nrow(temp_tbl)==length(unique(temp_tbl$AnalyteID))))

#Further check the combination set
temp_tbl <- annotation_tbl %>%
    dplyr::filter(AnnotationID %in% c("GO:0006935", "GO:0032722", "GO:0051781", "GO:0030574"))
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))
print(str_c("-> Covered analytes: ",nrow(temp_tbl)))
print(str_c(" -> Exclusive?: ",nrow(temp_tbl)==length(unique(temp_tbl$AnalyteID))))

#Further check the combination set
temp_tbl <- annotation_tbl %>%
    dplyr::filter(AnnotationID %in% c("GO:0070098", "GO:0032722", "GO:0051781", "GO:0030574"))
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))
print(str_c("-> Covered analytes: ",nrow(temp_tbl)))
print(str_c(" -> Exclusive?: ",nrow(temp_tbl)==length(unique(temp_tbl$AnalyteID))))

#### 3-4-2-3. Pre-check lab test annotations

In [None]:
#Prepare annotations
fileDir <- "./ExportData/"
ipynbName <- "240725_Arivale-APOE-BA_CleanMetadata_"
fileName <- "analyte-metadata.xlsx"
sheetName <- "LabTest2ManCat"
temp_tbl <- read_excel(str_c(fileDir,ipynbName,fileName), sheet=sheetName)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))

#### 3-4-2-4. Pre-check microbiome annotations

In [None]:
analytetype <- "Microbiome"
annotcol <- "Phylum"
annotlabel <- "Phylum"

#Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(AnnotationID=str_c(annotlabel,":",!!as.name(annotcol)),
                  AnnotationName=!!as.name(annotcol)) %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnnotationID, AnnotationName) %>%
    tidyr::drop_na(AnnotationID)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::group_by(AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(desc(nAnalytes))

In [None]:
analytetype <- "Microbiome"
annotcol <- "Class"
annotlabel <- "Class"
supcol <- "Phylum"

#Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(AnnotationID=str_c(annotlabel,":",!!as.name(annotcol)),
                  AnnotationName=!!as.name(annotcol),
                  SupAnnotation=!!as.name(supcol)) %>%
    dplyr::select(AnalyteID, AnalyteLabel, AnnotationID, AnnotationName, SupAnnotation) %>%
    tidyr::drop_na(AnnotationID)

print(str_c("nrow: ",nrow(temp_tbl)))
print(str_c("- Unique analyte: ",length(unique(temp_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(temp_tbl$AnnotationID))))

#Check annotation-based summary
temp_tbl %>%
    dplyr::group_by(SupAnnotation, AnnotationID, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(SupAnnotation, desc(nAnalytes)) %>%
    dplyr::filter(nAnalytes>=10)

#### 3-4-2-5. Categorize nodes

> Because some annotations are combined here, annotaiton number is prepared at this point for plot settings. Also, the order is changed to 1. Clinical labs, 2. Metabolomics, 3. Proteomics, 4. Microbiome.  

In [None]:
#Initialize annotation table
annotation_tbl <- tibble()

#Categorize metabolite nodes
analytetype <- "Metabolite"
maincol <- "SUB_PATHWAY"
supcol <- "SUPER_PATHWAY"
cutoff_min <- 20
minorlabel_main <- "Others"
minorlabel_sup <- "Others (carbohydrate, vitamins, energy, etc.)"
nalabel <- str_c(analytetype,": N.A.")
##Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(MainAnnotation=!!as.name(maincol),
                  SupAnnotation=!!as.name(supcol)) %>%
    dplyr::select(AnalyteID, MainAnnotation, SupAnnotation) %>%
    dplyr::mutate(MainAnnotation=ifelse(is.na(MainAnnotation), nalabel, str_to_sentence(MainAnnotation)),
                  SupAnnotation=ifelse(is.na(SupAnnotation), nalabel, str_to_sentence(SupAnnotation)))
##Combine minor annotations at the main level
temp_tbl <- temp_tbl %>%
    dplyr::group_by(SupAnnotation, MainAnnotation) %>%
    dplyr::summarize(nAnalytes=n()) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(SupAnnotation, desc(nAnalytes)) %>%
    dplyr::mutate(MainName=ifelse((nAnalytes>=cutoff_min)|(MainAnnotation==nalabel),
                                  MainAnnotation, minorlabel_main)) %>%
    dplyr::select(-nAnalytes) %>%
    dplyr::left_join(temp_tbl, ., by=c("SupAnnotation", "MainAnnotation"))
##Combine minor annotations at the super level
temp_tbl <- temp_tbl %>%
    dplyr::group_by(SupAnnotation) %>%
    dplyr::summarize(nAnalytes=n()) %>%
    dplyr::arrange(desc(nAnalytes)) %>%
    dplyr::mutate(SupName=ifelse((nAnalytes>=cutoff_min)|(SupAnnotation==nalabel),
                                 SupAnnotation, minorlabel_sup)) %>%
    dplyr::select(-nAnalytes) %>%
    dplyr::mutate(TempSortNum=row_number()) %>%
    dplyr::left_join(temp_tbl, ., by=c("SupAnnotation"))
##Prepare super levels having only the combined annotation
temp_vec <- temp_tbl %>%
    dplyr::select(SupName, MainName) %>%
    dplyr::distinct() %>%
    dplyr::group_by(SupName) %>%
    dplyr::summarize(Count=n()) %>%
    dplyr::filter(Count==1) %>%
    .$SupName
##Finalize annotations
annotation_tbl <- temp_tbl %>%
    dplyr::mutate(AnnotationName=ifelse(SupName %in% c(temp_vec, minorlabel_sup, nalabel),
                                        SupName, str_c(SupName,": ",MainName))) %>%
    dplyr::select(AnalyteID, AnnotationName, TempSortNum) %>%
    dplyr::mutate(TempSortNum=ifelse(str_detect(AnnotationName, str_c(": ", minorlabel_main,"$")),
                                     TempSortNum+0.1, TempSortNum)) %>%#Dummy shift
    dplyr::mutate(TempSortNum=ifelse(AnnotationName==minorlabel_sup, 1000, TempSortNum)) %>%#Dummy large number
    dplyr::mutate(TempSortNum=ifelse(AnnotationName==nalabel, 2000, TempSortNum)) %>%#Dummy large number
    dplyr::mutate(TempCatNum=2) %>%
    dplyr::bind_rows(annotation_tbl, .)

#Categorize protein nodes
analytetype <- "Protein"
temp_vec <- c("GO:0006954", "GO:0030574")
minorlabel <- "Other GOBPs"
nalabel <- str_c(analytetype,": N.A.")
##Prepare annotations
fileDir <- "./ExportData/"
ipynbName <- "240725_Arivale-APOE-BA_CleanMetadata_"
fileName <- "analyte-metadata.xlsx"
sheetName <- "Protein2GOBP"
temp_tbl <- read_excel(str_c(fileDir,ipynbName,fileName), sheet=sheetName)
##Select the target exclusive annotations
temp_tbl1 <- temp_tbl %>%
    dplyr::filter(AnnotationID %in% temp_vec) %>%
    dplyr::mutate(AnnotationName=str_c(str_to_sentence(AnnotationName)," (",AnnotationID,")")) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::mutate(TempSortNum=1)
##Combine the others
temp_tbl2 <- temp_tbl %>%
    dplyr::filter(!(AnalyteID %in% temp_tbl1$AnalyteID)) %>%
    dplyr::mutate(AnnotationName=minorlabel) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::distinct() %>%
    dplyr::mutate(TempSortNum=2)
##Rescue the unmapped analytes
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::filter(!(AnalyteID %in% c(temp_tbl1$AnalyteID, temp_tbl2$AnalyteID))) %>%
    dplyr::mutate(AnnotationName=nalabel) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::mutate(TempSortNum=3)
##Finalize annotations
annotation_tbl <- dplyr::bind_rows(temp_tbl1, temp_tbl2, temp_tbl) %>%
    dplyr::mutate(TempCatNum=3) %>%
    dplyr::bind_rows(annotation_tbl, .)

#Categorize lab test nodes
analytetype <- "Lab test"
temp_vec <- c("ManualCategory:Cell", "ManualCategory:Lipid", "ManualCategory:Protein")
minorlabel <- "Other biomarkers"
nalabel <- str_c(analytetype,": N.A.")
##Prepare annotations
fileDir <- "./ExportData/"
ipynbName <- "240725_Arivale-APOE-BA_CleanMetadata_"
fileName <- "analyte-metadata.xlsx"
sheetName <- "LabTest2ManCat"
temp_tbl <- read_excel(str_c(fileDir,ipynbName,fileName), sheet=sheetName)
##Select the target exclusive annotations
temp_tbl1 <- temp_tbl %>%
    dplyr::filter(AnnotationID %in% temp_vec) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::mutate(TempSortNum=1)
##Combine the others
temp_tbl2 <- temp_tbl %>%
    dplyr::filter(!(AnalyteID %in% temp_tbl1$AnalyteID)) %>%
    dplyr::mutate(AnnotationName=minorlabel) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::distinct() %>%
    dplyr::mutate(TempSortNum=2)
##Rescue the unmapped analytes
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::filter(!(AnalyteID %in% c(temp_tbl1$AnalyteID, temp_tbl2$AnalyteID))) %>%
    dplyr::mutate(AnnotationName=nalabel) %>%
    dplyr::select(AnalyteID, AnnotationName) %>%
    dplyr::mutate(TempSortNum=3)
##Finalize annotations
annotation_tbl <- dplyr::bind_rows(temp_tbl1, temp_tbl2, temp_tbl) %>%
    dplyr::mutate(TempCatNum=1) %>%
    dplyr::bind_rows(annotation_tbl, .)

#Categorize microbiome nodes
analytetype <- "Microbiome"
maincol <- "Phylum"
cutoff_min <- 10
minorlabel_main <- "Other phyla"
nalabel <- str_c(analytetype,": N.A.")
##Prepare annotations
temp_tbl <- analyte_tbl %>%
    dplyr::filter(AnalyteType==analytetype) %>%
    dplyr::mutate(MainAnnotation=!!as.name(maincol)) %>%
    dplyr::select(AnalyteID, MainAnnotation) %>%
    dplyr::mutate(MainAnnotation=ifelse(is.na(MainAnnotation), nalabel, MainAnnotation))
##Combine minor annotations at the main level
temp_tbl <- temp_tbl %>%
    dplyr::group_by(MainAnnotation) %>%
    dplyr::summarize(nAnalytes=n()) %>%
    dplyr::arrange(desc(nAnalytes)) %>%
    dplyr::mutate(MainName=ifelse((nAnalytes>=cutoff_min)|(MainAnnotation==nalabel),
                                  MainAnnotation, minorlabel_main)) %>%
    dplyr::select(-nAnalytes) %>%
    dplyr::mutate(TempSortNum=row_number()) %>%
    dplyr::left_join(temp_tbl, ., by=c("MainAnnotation"))
##Finalize annotations
annotation_tbl <- temp_tbl %>%
    dplyr::mutate(AnnotationName=MainName) %>%
    dplyr::select(AnalyteID, AnnotationName, TempSortNum) %>%
    dplyr::mutate(TempSortNum=ifelse(AnnotationName==minorlabel_main, 1000, TempSortNum)) %>%#Dummy large number
    dplyr::mutate(TempSortNum=ifelse(AnnotationName==nalabel, 2000, TempSortNum)) %>%#Dummy large number
    dplyr::mutate(TempCatNum=4) %>%
    dplyr::bind_rows(annotation_tbl, .)

#Clean the sorting info
annotation_tbl <- annotation_tbl %>%
    dplyr::group_by(TempCatNum, TempSortNum, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n()) %>%
    dplyr::ungroup() %>%
    dplyr::arrange(TempCatNum, TempSortNum, desc(nAnalytes)) %>%
    dplyr::mutate(AnnotationNum=row_number()) %>%
    dplyr::left_join(annotation_tbl, ., by=c("TempCatNum", "TempSortNum", "AnnotationName")) %>%
    dplyr::select(-TempCatNum, -TempSortNum, -nAnalytes)

print(str_c("nrow: ",nrow(annotation_tbl)))
print(str_c("- Unique analyte: ",length(unique(annotation_tbl$AnalyteID))))
print(str_c("- Unique annotation: ",length(unique(annotation_tbl$AnnotationName))))
head(annotation_tbl)

#Check annotation-based summary
annotation_tbl %>%
    dplyr::group_by(AnnotationNum, AnnotationName) %>%
    dplyr::summarize(nAnalytes=n(), nSigAnalytes=sum(AnalyteID %in% sig_analyte_tbl$AnalyteID)) %>%
    dplyr::arrange(AnnotationNum)

In [None]:
#Add to the node table
temp_tbl <- dplyr::left_join(node_tbl, annotation_tbl, by="AnalyteID")

print(str_c("nrow: ",nrow(temp_tbl)))
head(temp_tbl)
temp_tbl %>%
    dplyr::group_by(AnalyteType, AnnotationNum, AnnotationName) %>%
    dplyr::summarize(nNodes=n(),
                     nSigNodes=sum(!is.na(nEdges))) %>%
    dplyr::mutate(Percentage=nSigNodes/nNodes*100) %>%
    dplyr::arrange(AnnotationNum)

#Update
node_tbl <- temp_tbl

### 3-4-3. Highlight nodes with text labels

> In this case, the number of the common significant pairs is small. So, the number of edges is used for judging nodes to be highlighted, while removing the nodes with edges from only one cohort.  

In [None]:
#Check nodes of the common significant pairs
temp_tbl <- edge_tbl %>%
    dplyr::filter(Category %in% c("Both(+)", "Both(-)"))
temp_tbl
temp_vec <- unique(c(temp_tbl$AnalyteID_1, temp_tbl$AnalyteID_2))
node_tbl %>%
    dplyr::filter(AnalyteID %in% temp_vec)

#Check node summary
summary(node_tbl)
for (i in 2:10) {
    temp_tbl <- node_tbl %>%
        dplyr::filter(nEdges>=i)
    print(str_c("nNodes with ",i," edges: ",nrow(temp_tbl)))
}
##While removing the nodes with edges from only one cohort
for (i in 1:10) {
    temp_tbl <- node_tbl %>%
        dplyr::filter((!is.na(!!as.name(str_c(cohort_vec[1],"_nSigPairs"))))&
                      (!is.na(!!as.name(str_c(cohort_vec[2],"_nSigPairs"))))) %>%
        dplyr::filter(nEdges>=i)
    print(str_c("nNodes with ",i," edges which were at least derived from each: ",nrow(temp_tbl)))
}

In [None]:
#Nodes with higher number of edges than cutoff (while removing the nodes with edges from only one cohort)
cutoff <- 1
temp_vec <- node_tbl %>%
    dplyr::filter((!is.na(!!as.name(str_c(cohort_vec[1],"_nSigPairs"))))&
                  (!is.na(!!as.name(str_c(cohort_vec[2],"_nSigPairs"))))) %>%
    dplyr::filter(nEdges>=cutoff) %>%
    .$AnalyteID
temp_tbl <- node_tbl %>%
    dplyr::mutate(Highlight=ifelse((AnalyteID %in% temp_vec), 1, 0))

print(str_c("nrow: ",nrow(temp_tbl)))
head(temp_tbl)
temp_tbl %>%
    dplyr::group_by(AnalyteType, AnnotationNum, AnnotationName) %>%
    dplyr::summarize(nNodes=n(),
                     nSigNodes=sum(!is.na(nEdges)),
                     nTextNodes=sum(Highlight==1)) %>%
    dplyr::mutate(SigPercent=nSigNodes/nNodes*100,
                  TextPercent=nTextNodes/nNodes*100) %>%
    dplyr::arrange(AnnotationNum)
temp_tbl %>%
    dplyr::filter(Highlight==1) %>%
    dplyr::arrange(desc(nEdges))

#Update
node_tbl <- temp_tbl

### 3-4-4. Add plot settings

In [None]:
#Numbering sector/category and x-coordinate in each category
node_tbl <- node_tbl %>%
    dplyr::mutate(CategoryNum=AnnotationNum) %>%
    dplyr::arrange(CategoryNum, desc(nEdges)) %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::mutate(Xcoord=1:n()) %>%
    dplyr::ungroup()
head(node_tbl)

In [None]:
#Check
print("Check Xcoord around margin")
temp_vec <- node_tbl %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::summarize(nNodes=n()) %>%
    .$nNodes
i <- 1
border <- sum(temp_vec[1:i])
node_tbl[(border-3):(border+3),]
i <- 2
border <- sum(temp_vec[1:i])
node_tbl[(border-3):(border+3),]
i <- 3
border <- sum(temp_vec[1:i])
node_tbl[(border-3):(border+3),]

> Of note, when gaps between labels were tight, label offset calculation was needed to avoid ovalapping.  
–> This offset was handled manually in my previous code, but it can be done automatically by circos.labels() function!!  
–> Skip this step.  

## 3-5. Circos plot

In [None]:
options(repr.plot.width=8, repr.plot.height=8)#Default=7x7
circos.clear()

#Generate xlim table
xlim_tbl <- node_tbl %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::summarize(Xmin=min(Xcoord), Xmax=max(Xcoord))

#Initialize
circos.par(start.degree=90, gap.degree=1.5, track.margin=c(0, 0),
           cell.padding=c(0, 0, 0, 0), unit.circle.segments=nrow(node_tbl),
           canvas.xlim=c(-1.0, 1.5), canvas.ylim=c(-1.25, 1.25),#Change based on the label length
           clock.wise=TRUE, xaxis.clock.wise=TRUE)
circos.initialize(sectors=xlim_tbl$CategoryNum, xlim=xlim_tbl[, c("Xmin", "Xmax")],
                  sector.width=xlim_tbl$Xmax)

#Prepare color palette with standard/dummy sector order
temp_vec <- brewer.pal(4, "Set1")
temp_vec <- darken(temp_vec, amount=0.05)
color_palette1 <- c("Metabolite"=temp_vec[2],#blue
                    "Protein"=temp_vec[1],#red
                    "Lab test"=temp_vec[3],#green
                    "Microbiome"=temp_vec[4])#purple
color_palette2 <- rep(c(rgb(255/255, 69/255, 0/255, alpha=0.6),#orange red
                        rgb(34/255, 139/255, 34/255, alpha=0.3),#forest green
                        rgb(65/255, 105/255, 225/255, alpha=0.3),#royal blue
                        rgb(0, 0, 0, alpha=1.0)),#Just in case
                      each=2)
names(color_palette2) <- c("Both(+)", "Both(-)",
                           str_c(cohort_vec[1],"(+)"),str_c(cohort_vec[1],"(-)"),
                           str_c(cohort_vec[2],"(+)"),str_c(cohort_vec[2],"(-)"),
                           str_c(cohort_vec[1],"(+)",cohort_vec[2],"(-)"),str_c(cohort_vec[1],"(-)",cohort_vec[2],"(+)"))

#Add highlighting node labels (two outer tracks)
##Prepare highlighting nodes
temp_tbl <- node_tbl %>%
    dplyr::filter(Highlight==1) %>%
    dplyr::mutate(TextColor=sapply(AnalyteType, function(x){color_palette1[x]}),
                  LineColor="black")
##Prepare dummy node for the sector without highlighting nodes
temp_tbl <- node_tbl %>%
    dplyr::filter(!(CategoryNum %in% temp_tbl$CategoryNum)) %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::summarize(Xcoord=(min(Xcoord)+max(Xcoord))/2) %>%
    dplyr::mutate(AnalyteLabel="Dummy",
                  TextColor=rgb(0, 0, 0, alpha=0.0),
                  LineColor=rgb(0, 0, 0, alpha=0.0)) %>%
    dplyr::bind_rows(temp_tbl, .)
##Add the target node labels
circos.labels(sectors=temp_tbl$CategoryNum, x=temp_tbl$Xcoord, labels=temp_tbl$AnalyteLabel,
              facing="clockwise", niceFacing=TRUE, col=temp_tbl$TextColor, cex=0.6,
              padding=0.1, line_col=temp_tbl$LineColor, line_lwd=1, side="outside")

#Add category sectors (track 3)
circos.track(ylim=c(0, 1), track.height=0.075, bg.col=NA, bg.border=NA)
for (row_i in 1:nrow(xlim_tbl)) {
    sector_n <- xlim_tbl$CategoryNum[row_i]
    xstart <- xlim_tbl$Xmin[row_i]
    xend <- xlim_tbl$Xmax[row_i]
    theta_start <- circlize(x=xstart, y=0, sector.index=sector_n, track.index=3)[1, 1]
    theta_end <- circlize(x=xend, y=0, sector.index=sector_n, track.index=3)[1, 1]
    #Prepare labels and colors
    label_n <- as.character(sector_n)
    analytetype <- node_tbl[node_tbl$CategoryNum==sector_n, ]$AnalyteType[1]
    fill_color <- color_palette1[analytetype]
    #Add category sectors
    draw.sector(start.degree=theta_start, end.degree=theta_end, clock.wise=TRUE,
                rou1=get.cell.meta.data("cell.top.radius", track.index=3),
                rou2=get.cell.meta.data("cell.bottom.radius", track.index=3),
                col=fill_color, border="black", lwd=2)
    #Add category label
    xcenter <- (xstart + xend) / 2
    circos.text(x=xcenter, y=0.5, labels=label_n, sector.index=sector_n, track.index=3,
                facing="bending.inside", niceFacing=TRUE, adj=c(0.5, 0.5),
                cex=0.6, col="white", font=2)#2 = bold font
    #Add axis ticks
    circos.axis(h="top", major.at=xstart:xend, labels=FALSE, major.tick=TRUE,
                sector.index=sector_n, track.index=3, direction="outside",
                minor.ticks=0, major.tick.length=0.1, lwd=0.25, col="black")
}

#Add links for the significant pairs
temp_tbl <- edge_tbl %>%
    dplyr::mutate(MeanAbsBcoef=mean(!!as.name(str_c(cohort_vec,"_bcoef")))) %>%
    dplyr::arrange(desc(Category), desc(MeanAbsBcoef))#For rendering with consistent order
for (row_i in 1:nrow(temp_tbl)) {
    var1 <- temp_tbl$AnalyteID_1[row_i]
    var2 <- temp_tbl$AnalyteID_2[row_i]
    var1_sector <- node_tbl$CategoryNum[node_tbl$AnalyteID==var1]
    var2_sector <- node_tbl$CategoryNum[node_tbl$AnalyteID==var2]
    var1_xcoord <- node_tbl$Xcoord[node_tbl$AnalyteID==var1]
    var2_xcoord <- node_tbl$Xcoord[node_tbl$AnalyteID==var2]
    #Prepare line color
    label_n <- temp_tbl$Category[row_i]
    line_color <- color_palette2[label_n]
    line_width <- temp_tbl$MeanAbsBcoef*3
    #Add edge
    circos.link(sector.index1=var1_sector, point1=var1_xcoord, rou1=get_most_inside_radius(),
                sector.index2=var2_sector, point2=var2_xcoord, rou2=get_most_inside_radius(),
                h.ratio=0.6, w=4, col=line_color, lwd=line_width)
}

circos.clear()
options(repr.plot.width=5, repr.plot.height=5)#Default=7x7

> –> Don't need to take care about the above "out of plotting" notes because the final font is bit different after export (see below).  

> Because circlize package uses base graphycs, use graphycs device.  

In [None]:
options(repr.plot.width=8, repr.plot.height=8)#Default=7x7
circos.clear()

#To save (circlize uses base graphycs)
fileDir = "./ExportFigures/"
ipynbName <- "240820_Arivale-APOE-BA-visualization-for-Dylan_ver3-3_"
fileName = str_c(str_flatten(cohort_vec, collapse="-vs-"),".pdf")
pdf(str_c(fileDir,ipynbName,fileName), 8, 8)

#Generate xlim table
xlim_tbl <- node_tbl %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::summarize(Xmin=min(Xcoord), Xmax=max(Xcoord))

#Initialize
circos.par(start.degree=90, gap.degree=1.5, track.margin=c(0, 0),
           cell.padding=c(0, 0, 0, 0), unit.circle.segments=nrow(node_tbl),
           canvas.xlim=c(-1.0, 1.5), canvas.ylim=c(-1.25, 1.25),#Change based on the label length
           clock.wise=TRUE, xaxis.clock.wise=TRUE)
circos.initialize(sectors=xlim_tbl$CategoryNum, xlim=xlim_tbl[, c("Xmin", "Xmax")],
                  sector.width=xlim_tbl$Xmax)

#Prepare color palette with standard/dummy sector order
temp_vec <- brewer.pal(4, "Set1")
temp_vec <- darken(temp_vec, amount=0.05)
color_palette1 <- c("Metabolite"=temp_vec[2],#blue
                    "Protein"=temp_vec[1],#red
                    "Lab test"=temp_vec[3],#green
                    "Microbiome"=temp_vec[4])#purple
color_palette2 <- rep(c(rgb(255/255, 69/255, 0/255, alpha=0.6),#orange red
                        rgb(34/255, 139/255, 34/255, alpha=0.3),#forest green
                        rgb(65/255, 105/255, 225/255, alpha=0.3),#royal blue
                        rgb(0, 0, 0, alpha=1.0)),#Just in case
                      each=2)
names(color_palette2) <- c("Both(+)", "Both(-)",
                           str_c(cohort_vec[1],"(+)"),str_c(cohort_vec[1],"(-)"),
                           str_c(cohort_vec[2],"(+)"),str_c(cohort_vec[2],"(-)"),
                           str_c(cohort_vec[1],"(+)",cohort_vec[2],"(-)"),str_c(cohort_vec[1],"(-)",cohort_vec[2],"(+)"))

#Add highlighting node labels (two outer tracks)
##Prepare highlighting nodes
temp_tbl <- node_tbl %>%
    dplyr::filter(Highlight==1) %>%
    dplyr::mutate(TextColor=sapply(AnalyteType, function(x){color_palette1[x]}),
                  LineColor="black")
##Prepare dummy node for the sector without highlighting nodes
temp_tbl <- node_tbl %>%
    dplyr::filter(!(CategoryNum %in% temp_tbl$CategoryNum)) %>%
    dplyr::group_by(CategoryNum) %>%
    dplyr::summarize(Xcoord=(min(Xcoord)+max(Xcoord))/2) %>%
    dplyr::mutate(AnalyteLabel="Dummy",
                  TextColor=rgb(0, 0, 0, alpha=0.0),
                  LineColor=rgb(0, 0, 0, alpha=0.0)) %>%
    dplyr::bind_rows(temp_tbl, .)
##Add the target node labels
circos.labels(sectors=temp_tbl$CategoryNum, x=temp_tbl$Xcoord, labels=temp_tbl$AnalyteLabel,
              facing="clockwise", niceFacing=TRUE, col=temp_tbl$TextColor, cex=0.6,
              padding=0.1, line_col=temp_tbl$LineColor, line_lwd=1, side="outside")

#Add category sectors (track 3)
circos.track(ylim=c(0, 1), track.height=0.075, bg.col=NA, bg.border=NA)
for (row_i in 1:nrow(xlim_tbl)) {
    sector_n <- xlim_tbl$CategoryNum[row_i]
    xstart <- xlim_tbl$Xmin[row_i]
    xend <- xlim_tbl$Xmax[row_i]
    theta_start <- circlize(x=xstart, y=0, sector.index=sector_n, track.index=3)[1, 1]
    theta_end <- circlize(x=xend, y=0, sector.index=sector_n, track.index=3)[1, 1]
    #Prepare labels and colors
    label_n <- as.character(sector_n)
    analytetype <- node_tbl[node_tbl$CategoryNum==sector_n, ]$AnalyteType[1]
    fill_color <- color_palette1[analytetype]
    #Add category sectors
    draw.sector(start.degree=theta_start, end.degree=theta_end, clock.wise=TRUE,
                rou1=get.cell.meta.data("cell.top.radius", track.index=3),
                rou2=get.cell.meta.data("cell.bottom.radius", track.index=3),
                col=fill_color, border="black", lwd=2)
    #Add category label
    xcenter <- (xstart + xend) / 2
    circos.text(x=xcenter, y=0.5, labels=label_n, sector.index=sector_n, track.index=3,
                facing="bending.inside", niceFacing=TRUE, adj=c(0.5, 0.5),
                cex=0.6, col="white", font=2)#2 = bold font
    #Add axis ticks
    circos.axis(h="top", major.at=xstart:xend, labels=FALSE, major.tick=TRUE,
                sector.index=sector_n, track.index=3, direction="outside",
                minor.ticks=0, major.tick.length=0.1, lwd=0.25, col="black")
}

#Add links for the significant pairs
temp_tbl <- edge_tbl %>%
    dplyr::mutate(MeanAbsBcoef=mean(!!as.name(str_c(cohort_vec,"_bcoef")))) %>%
    dplyr::arrange(desc(Category), desc(MeanAbsBcoef))#For rendering with consistent order
for (row_i in 1:nrow(temp_tbl)) {
    var1 <- temp_tbl$AnalyteID_1[row_i]
    var2 <- temp_tbl$AnalyteID_2[row_i]
    var1_sector <- node_tbl$CategoryNum[node_tbl$AnalyteID==var1]
    var2_sector <- node_tbl$CategoryNum[node_tbl$AnalyteID==var2]
    var1_xcoord <- node_tbl$Xcoord[node_tbl$AnalyteID==var1]
    var2_xcoord <- node_tbl$Xcoord[node_tbl$AnalyteID==var2]
    #Prepare line color
    label_n <- temp_tbl$Category[row_i]
    line_color <- color_palette2[label_n]
    line_width <- temp_tbl$MeanAbsBcoef*3
    #Add edge
    circos.link(sector.index1=var1_sector, point1=var1_xcoord, rou1=get_most_inside_radius(),
                sector.index2=var2_sector, point2=var2_xcoord, rou2=get_most_inside_radius(),
                h.ratio=0.6, w=4, col=line_color, lwd=line_width)
}

#Close
dev.off()

circos.clear()
options(repr.plot.width=5, repr.plot.height=5)#Default=7x7

> –> The exported figure is surely .pdf file.  
> –> Furthermore, the Arial font was normally used in the exported file!!  

# — Session information —

In [47]:
sessionInfo()

R version 4.1.1 (2021-08-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.3 LTS

Matrix products: default
BLAS/LAPACK: /opt/conda/envs/arivale-r/lib/libopenblasp-r0.3.18.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] colorspace_2.1-0   RColorBrewer_1.1-3 circlize_0.4.15    readxl_1.4.3      
 [5] forcats_0.5.1      stringr_1.5.1      dplyr_1.1.4        purrr_1.0.2       
 [9] readr_2.1.2        tidyr_1.3.1        tibble_3.2.1       ggplot2_3.5.1     
[13] tidyverse_1.3.1   

loaded via a namespace (and not attached):
 [1