# make conda environment & install packages

In [None]:
conda deactivate; conda remove -n WGCNA --all -y
mamba create -n WGCNA \
    -c conda-forge -c bioconda -c nodefaults --strict-channel-priority \
    r-base libxml2 seaborn-base scnic r-car  r-mice r-igraph r-leidenbase r-xml2 r-xml r-rcurl r-restfulr bioconductor-rtracklayer bioconductor-rgraphviz bioconductor-gostats r-hdf5r xz julia -y

mamba activate WGCNA
R
install.packages(c("remotes","BiocManager","hdf5r"), repos="https://ftp.yz.yamagata-u.ac.jp/pub/cran")
BiocManager::install(c("WGCNA","GenomicRanges","GeneOverlap","UCell","glmGamPoi","clusterProfiler","org.Hs.eg.db","AnnotationDbi","zellkonverter"), ask=F)
BiocManager::install("SGCP", ask=FALSE, lib = .libPaths()[1])
remotes::install_github("satijalab/seurat", "seurat5", quiet = TRUE)
remotes::install_github("smorabit/hdWGCNA", ref="dev", quiet = TRUE)


# DL

In [None]:
API="https://storage.googleapis.com/storage/v1/b/adult-gtex/o"
TMP_URLS="urllist.txt"
: > "$TMP_URLS"

# prefix をクエリ用にエンコード（jqを使って安全に）
PREFIX="$(jq -rn --arg p "bulk-gex/v10/rna-seq/tpms-by-tissue/" '$p|@uri')"

token=""

# === 一覧取得（ページング対応）===
while :; do
    url="${API}?prefix=${PREFIX}&fields=items(name),nextPageToken"
    [[ -n "${token}" ]] && url="${url}&pageToken=${token}"

    json="$(curl -fsS "$url")"

    # .gz のみ抽出し、各パス要素をURLエンコードしつつ、/ はそのまま残す
    # （スペース等は %20 になるが、スラッシュは %2F にならない）
    echo "$json" \
    | jq -r '
        .items[]?.name
        | select(endswith(".gz"))
        | (split("/") | map(@uri) | join("/"))
        | "https://storage.googleapis.com/adult-gtex/\(.)"
        ' >> "$TMP_URLS"

    token="$(echo "$json" | jq -r '.nextPageToken // empty')"
    [[ -z "$token" ]] && break
done

wget -c -nc -i "$TMP_URLS"
gunzip -n *.gz

# データ前処理

In [None]:
options(warn=-1)
options(future.globals.maxSize = 100 * 1024^3)
Pkgs <- c("SGCP", "org.Hs.eg.db", "AnnotationDbi")
for(p in Pkgs) suppressMessages(library(p, character.only=T))

gct_files <- c(
        "adipose_subcutaneous", "adipose_visceral_omentum", "artery_tibial", "breast_mammary_tissue", "cells_cultured_fibroblasts",
        "esophagus_mucosa", "esophagus_muscularis", "lung","muscle_skeletal","nerve_tibial",
        "skin_not_sun_exposed_suprapubic", "skin_sun_exposed_lower_leg", "thyroid", "whole_blood"
)
LoadData("data/GTEx_Analysis_v10_RNASeQCv2.4.2_gene_tpm.gct")
gct_file <- "kidney_medulla"

LoadData <- function(fn) {
        data <- read.csv(paste0("data/gene_tpm_v10_",fn,".gct"), sep="\t", header=TRUE, skip=2)
        data <- data[!duplicated(data[, 2]) & !duplicated(data[, 2], fromLast = TRUE), ]
        genes <- AnnotationDbi::select(org.Hs.eg.db, keys=data$Description, columns=c("ENTREZID"), keytype="SYMBOL")
        genes <- genes[(!is.na(genes$SYMBOL)) & (!is.na(genes$ENTREZID)), ]
        genes <- genes[(!duplicated(genes$SYMBOL)) & (!duplicated(genes$ENTREZID)),]
        names(data)[2] <- "SYMBOL"
        data <- merge(genes, data, by="SYMBOL")
        genes <- data[, 1:2]
        data <- data[, -c(1:3)]
        vars <- apply(data, 1, var)
        zeroInd <- which(vars == 0)
        if(length(zeroInd) != 0) {
                data <- data[-zeroInd, ]
                genes <- genes[-zeroInd, ]
        }
        xx <- as.list(org.Hs.egGO[genes$ENTREZID])
        haveGO <- sapply(xx, function(x) {if (length(x) == 1 && is.na(x)) FALSE else TRUE })
        numNoGO <- sum(!haveGO)
        if(numNoGO != 0){
                data <- data[haveGO, ]
                genes <- genes[haveGO, ]
        }
        rownames(data) <- genes$SYMBOL
        write.table(data, paste0("tmp/",fn,".tsv"), sep='\t', col.names=NA, quote=FALSE)
        # write.table(data, paste0("tmp/all.tsv"), sep='\t', col.names=NA, quote=FALSE)
        rownames(data) <- genes$ENTREZID
        saveRDS(list(expData=data, geneID=genes), paste0("tmp/",fn,".rds"))
        # saveRDS(list(expData=data, geneID=genes), paste0("tmp/all.rds"))
}
for (gct_file in gct_files) {
        LoadData(gct_file)
        print(gct_file)
}


# SGCP

In [None]:
options(warn=-1)
options(future.globals.maxSize = 100 * 1024^3)
Pkgs <- c("SGCP", "org.Hs.eg.db", "AnnotationDbi")
for(p in Pkgs) suppressMessages(library(p, character.only=T))

# ARGS <- commandArgs(trailingOnly = T)
# I <- as.integer(ARGS[1])

# 安定性
## 観測単位
gct_files <- c("adipose_subcutaneous","muscle_skeletal","skin_sun_exposed_lower_leg","whole_blood","all")
gct_file <- gct_files[I]
for (gct_file in gct_files) {
        data <- readRDS(paste0("tmp/",gct_file,".rds"))
        sgcp <- ezSGCP(expData=as.matrix(data$expData), geneID=data$geneID$ENTREZID, annotation_db="org.Hs.eg.db", eff.egs=FALSE , saveOrig=FALSE, sil=TRUE, hm=NULL)

        names(data$geneID) <- c("Symbol","geneID")
        res <- merge(data$geneID, sgcp$clusterLabels, by="geneID")
  
        write.table(res[,c(2,4)], paste0("Result/SGCP/",gct_file,".tsv"), sep='\t', row.names=FALSE)
}

## 変数
Vars <- c(1000, 5000, 10000)
data <- readRDS(paste0("tmp/all.rds"))
for (v in Vars) {
        sgcp <- ezSGCP(expData=as.matrix(data$expData[1:v,]), geneID=data$geneID$ENTREZID[1:v], annotation_db="org.Hs.eg.db", eff.egs=FALSE , saveOrig=FALSE, sil=TRUE, hm=NULL)

        names(data$geneID) <- c("Symbol","geneID")
        res <- merge(data$geneID, sgcp$clusterLabels, by="geneID")
  
        write.table(res[,c(2,4)], paste0("Result/SGCP/",v,".tsv"), sep='\t', row.names=FALSE)
}


# 実行時間測定
options(warn=-1)
options(future.globals.maxSize = 100 * 1024^3)
Pkgs <- c("SGCP", "org.Hs.eg.db", "AnnotationDbi")
for(p in Pkgs) suppressMessages(library(p, character.only=T))
data <- readRDS("tmp/adipose_subcutaneous.rds")
params_obs <- c(100, 200, 400)
params_obs <- c(400)
params_var <- c(500, 1000, 5000, 10000)
work <- function(buf) {
        set.seed(42)
        sgcp <- ezSGCP(expData=buf$expData, geneID=buf$geneID, annotation_db="org.Hs.eg.db", eff.egs=FALSE , saveOrig=FALSE, sil=TRUE, hm=NULL)
        # names(data$geneID) <- c("Symbol","geneID")
        # res <- merge(data$geneID, sgcp$clusterLabels, by="geneID")
        # names(res)[4] <- "SGCP"
  
        # write.table(res[,c(2,4)], paste0("Result/SGCP/",buf$fn,".tsv"), sep='\t', row.names=FALSE, col.names=NA)
        return(NULL)
}
## 観測単位数
results <- do.call(
        rbind,
        lapply(params_obs, function(p) {
                trials <- replicate(10, {
                    buf <- list(expData=as.matrix(data$expData[1:1000,1:p]), geneID=data$geneID$ENTREZID[1:1000])
                        elapsed <- as.numeric(system.time(work(buf))["elapsed"])
                        data.frame(param=p, elapsed=elapsed)
                }, simplify = FALSE)
                do.call(rbind, trials)
        })
)
buf <- readRDS("tmp_SGCP_obs.rds")
results <- rbind(buf, results)
saveRDS(results, file="tmp_SGCP_obs.rds")
## 変数数
# params_var <- c(500, 1000, 5000, 10000)
params_var <- c(5000)
results <- do.call(
    rbind,
    lapply(params_var, function(p) {
            trials <- replicate(10, {
                    buf <- list(expData=as.matrix(data$expData[1:p,1:100]), geneID=data$geneID$ENTREZID[1:p])
                    elapsed <- as.numeric(system.time(work(buf))["elapsed"])
                    data.frame(param=p, elapsed=elapsed)
            }, simplify = FALSE)
            do.call(rbind, trials)
    })
)
results <- do.call(
        rbind,
        lapply(params_var, function(p) {
                trials <- list()
                success <- 0
                while (success < 10) {
                        buf <- list(expData=as.matrix(data$expData[1:p,1:100]), geneID=data$geneID$ENTREZID[1:p])
                        elapsed <- tryCatch({
                                as.numeric(system.time(work(buf))["elapsed"])
                        }, error = function(e) {
                                NULL
                        })
                        if (!is.null(elapsed)) {
                                success <- success + 1
                                trials[[length(trials) + 1]] <- data.frame(param=p, elapsed=elapsed)
                        }
                }
                do.call(rbind, trials)
        })
)

buf <- readRDS("tmp_SGCP_var.rds")
results <- rbind(buf, results)
saveRDS(results, file="tmp_SGCP_var.rds")

write.table(results, "Result/SGCP/time_obs.tsv", sep='\t', row.names=FALSE, quote=FALSE)
write.table(results, "Result/SGCP/time_var.tsv", sep='\t', row.names=FALSE, quote=FALSE)


# WGCNA

In [None]:
options(warn=-1)
Pkgs <- c("WGCNA", "org.Hs.eg.db", "AnnotationDbi")
for(p in Pkgs) suppressMessages(library(p, character.only=T))

options(stringsAsFactors = FALSE)
# enableWGCNAThreads()

# 安定性
## 観測単位
gct_files <- c("adipose_subcutaneous","muscle_skeletal","skin_sun_exposed_lower_leg","whole_blood","all")
ExpMats <- list()
Beta <- c(1:50)
for (gct_file in gct_files){
    data <- readRDS(paste0("tmp/",gct_file,".rds"))
    ExpMats[[gct_file]] <- t(as.matrix(data$expData))

    Sft <- pickSoftThreshold(ExpMats[[gct_file]], powerVector=Beta)
    pdf(paste0("Result/Fig/WGCNA_SI_",gct_file,".pdf"), width=32.4/2.54, height=20/2.54)
        print(
            plot(
                    Sft$fitIndices[,1], -sign(Sft$fitIndices[,3])*Sft$fitIndices[,2],
                    xlab="Soft Threshold (power)", ylab="Scale Free Topology Model Fit,signed R^2", type="n",
                    main=paste("Scale independence")
                ) +
            text(
                    Sft$fitIndices[,1], -sign(Sft$fitIndices[,3])*Sft$fitIndices[,2],
                    labels=Beta, cex=0.9, col="red"
                ) +
            abline(h=0.90,col="red")
        )
    dev.off()

    print(gct_file)
}
saveRDS(ExpMats, file="tmp_WGCNA/ExpMats.rds")
# ExpMats <- readRDS("tmp_WGCNA/ExpMats.rds")
Betas <- list(adipose_subcutaneous=8,muscle_skeletal=11,skin_sun_exposed_lower_leg=11,whole_blood=32,all=4)
Thres <- list(adipose_subcutaneous=0.34,muscle_skeletal=0.34,skin_sun_exposed_lower_leg=0.32,whole_blood=0.3,all=0.3)
Adj <- list()
for (gct_file in gct_files){
    Adj[[gct_file]] <- adjacency(ExpMats[[gct_file]], power=Betas[[gct_file]])
    # Adj[[gct_file]] <-  readRDS(paste0("tmp_WGCNA/",gct_file,".rds"))
    k <- as.vector(apply(Adj[[gct_file]], 2, sum, na.rm=T))
    pdf(paste0("Result/Fig/WGCNA_ScaleFreePlot_",gct_file,".pdf"), width=10/2.54, height=10/2.54)
        scaleFreePlot(k, main="Check scale free topology\n")
    dev.off()
    # saveRDS(Adj[[gct_file]], file=paste0("tmp_WGCNA/",gct_file,".rds"))

    TOM <- TOMsimilarity(Adj[[gct_file]])
    DissTOM <- 1 - TOM
    GeneTree = hclust(as.dist(DissTOM), method="average")
    DynamicMods <- cutreeDynamic(
                                    dendro=GeneTree, 
                                    distM=DissTOM,
                                    deepSplit=4, 
                                    pamStage=TRUE,
                                    pamRespectsDendro=TRUE,
                                    minClusterSize=30
                                )
    DynamicColors <- labels2colors(DynamicMods)
    pdf(paste0("Result/Fig/WGCNA_Dendrogram_1_",gct_file,".pdf"), width=32.4/2.54, height=20/2.54)
        print(
            plotDendroAndColors(
                                    dendro=GeneTree, 
                                    colors=DynamicColors, 
                                    groupLabels="Dynamic Tree Cut",
                                    dendroLabels=FALSE, hang=0.03,
                                    addGuide=TRUE, guideHang=0.05,
                                    main="Gene dendrogram and module colors"
                                )
        )
    dev.off()

    MEList <- moduleEigengenes(ExpMats[[gct_file]], colors=DynamicColors)
    MEs <- MEList$eigengenes
    MEDiss <- 1 - cor(MEs)
    METree <- hclust(as.dist(MEDiss), method="average")
    MEDissThres <- Thres[[gct_file]]
    pdf(paste0("Result/Fig/WGCNA_ClusteringModule_",gct_file,"_",MEDissThres,".pdf"), width=32.4/2.54, height=20/2.54)
        print(
            plot(
                    METree,
                    main="Clustering of module eigengenes",
                    xlab = "", sub = ""
                ) +
            abline(h=MEDissThres, col="red")
        )
    dev.off()
}
for (gct_file in gct_files){
    Merge <- mergeCloseModules(ExpMats[[gct_file]], DynamicColors, cutHeight=MEDissThres, verbose=3)
    MergedColours <- Merge$colors
    MergedMEs <- Merge$newMEs
    png(paste0("Result/Fig/WGCNA_Dendrogram_2.png"), width=960, height=480)
        plotDendroAndColors(
                                dendro=GeneTree,
                                colors=cbind(DynamicColors, MergedColours),
                                groupLabels=c("Dynamic Tree Cut", "Merged dynamic"),
                                dendroLabels=FALSE, hang=0.03,
                                addGuide=TRUE, guideHang=0.05
                            )
    dev.off()

    ColourList <- data.frame(colnames(ExpMats[[gct_file]]), MergedColours)
    colnames(ColourList) <- c("Symbol", "WGCNA")
    write.table(ColourList, "Result/WGCNA.tsv", sep='\t', row.names=F)
}
## 変数
Vars <- c(1000, 5000, 10000)
Data_All <- readRDS(paste0("tmp/all.rds"))
ExpMats <- list()
Beta <- c(1:50)
for (v in Vars){
    ExpMats[[as.character(v)]] <- t(as.matrix(Data_All$expData[1:v,]))

    Sft <- pickSoftThreshold(ExpMats[[as.character(v)]], powerVector=Beta)
    pdf(paste0("Result/Fig/WGCNA_SI_",v,".pdf"), width=32.4/2.54, height=20/2.54)
        print(
            plot(
                    Sft$fitIndices[,1], -sign(Sft$fitIndices[,3])*Sft$fitIndices[,2],
                    xlab="Soft Threshold (power)", ylab="Scale Free Topology Model Fit,signed R^2", type="n",
                    main=paste("Scale independence")
                ) +
            text(
                    Sft$fitIndices[,1], -sign(Sft$fitIndices[,3])*Sft$fitIndices[,2],
                    labels=Beta, cex=0.9, col="red"
                ) +
            abline(h=0.90,col="red")
        )
    dev.off()

    print(v)
}
saveRDS(ExpMats, file="tmp_WGCNA/ExpMatsV.rds")
Betas <- list("1000"=47,"5000"=21,"10000"=15)
Thres <- list("1000"=0.3,"5000"=0.3,"10000"=0.35)
for (v in Vars){
    Adj <- adjacency(ExpMats[[as.character(v)]], power=Betas[[as.character(v)]])
    k <- as.vector(apply(Adj, 2, sum, na.rm=T))
    pdf(paste0("Result/Fig/WGCNA_ScaleFreePlot_",v,".pdf"), width=10/2.54, height=10/2.54)
        print(scaleFreePlot(k, main="Check scale free topology\n"))
    dev.off()

    TOM <- TOMsimilarity(Adj)
    DissTOM <- 1 - TOM
    GeneTree = hclust(as.dist(DissTOM), method="average")
    DynamicMods <- cutreeDynamic(
                                    dendro=GeneTree, 
                                    distM=DissTOM,
                                    deepSplit=4, 
                                    pamStage=TRUE,
                                    pamRespectsDendro=TRUE,
                                    minClusterSize=30
                                )
    DynamicColors <- labels2colors(DynamicMods)
    pdf(paste0("Result/Fig/WGCNA_Dendrogram_1_",v,".pdf"), width=32.4/2.54, height=20/2.54)
        print(
            plotDendroAndColors(
                                    dendro=GeneTree, 
                                    colors=DynamicColors, 
                                    groupLabels="Dynamic Tree Cut",
                                    dendroLabels=FALSE, hang=0.03,
                                    addGuide=TRUE, guideHang=0.05,
                                    main="Gene dendrogram and module colors"
                                )
        )
    dev.off()

    MEList <- moduleEigengenes(ExpMats[[as.character(v)]], colors=DynamicColors)
    MEs <- MEList$eigengenes
    MEDiss <- 1 - cor(MEs)
    METree <- hclust(as.dist(MEDiss), method="average")
    MEDissThres <- Thres[[as.character(v)]]
    pdf(paste0("Result/Fig/WGCNA_ClusteringModule_",v,".pdf"), width=32.4/2.54, height=20/2.54)
        print(
            plot(
                    METree,
                    main="Clustering of module eigengenes",
                    xlab = "", sub = ""
                ) +
            abline(h=MEDissThres, col="red")
        )
    dev.off()

    Merge <- mergeCloseModules(ExpMats[[as.character(v)]], DynamicColors, cutHeight=MEDissThres, verbose=3)
    MergedColours <- Merge$colors
    MergedMEs <- Merge$newMEs
    pdf(paste0("Result/Fig/WGCNA_Dendrogram_2_",v,".pdf"), width=32.4/2.54, height=20/2.54)
        plotDendroAndColors(
                                dendro=GeneTree,
                                colors=cbind(DynamicColors, MergedColours),
                                groupLabels=c("Dynamic Tree Cut", "Merged dynamic"),
                                dendroLabels=FALSE, hang=0.03,
                                addGuide=TRUE, guideHang=0.05
                            )
    dev.off()

    ColourList <- data.frame(colnames(ExpMats[[as.character(v)]]), MergedColours)
    colnames(ColourList) <- c("Symbol", "Colour")
    write.table(ColourList, paste0("Result/WGCNA/",v,".tsv"), sep='\t', row.names=F)
}

# 実行時間測定
options(warn=-1)
Pkgs <- c("WGCNA", "org.Hs.eg.db", "AnnotationDbi")
for(p in Pkgs) suppressMessages(library(p, character.only=T))
data <- readRDS("tmp/adipose_subcutaneous.rds")
params_var <- c(500, 1000, 5000, 10000)
params_obs <- c(100, 200, 400)
work <- function(buf) {
    set.seed(42)
    fit <- pickSoftThreshold(buf, powerVector=1:50)
    Adj <- adjacency(buf, power=10);
    TOM <- TOMsimilarity(Adj)
    DissTOM <- 1 - TOM
    DynamicMods <- cutreeDynamic(
                                    dendro=hclust(as.dist(DissTOM), method="average"), 
                                    distM=DissTOM,
                                    deepSplit=4, 
                                    pamStage=TRUE,
                                    pamRespectsDendro=TRUE,
                                    minClusterSize=30
                                )
    Merge <- mergeCloseModules(buf, labels2colors(DynamicMods), cutHeight=0.3)
}
### 観測単位数
results <- do.call(
    rbind,
    lapply(params_obs, function(p) {
            trials <- replicate(10, {
                    buf <- t(as.matrix(data$expData[1:1000, 1:p]))
                    elapsed <- as.numeric(system.time(work(buf))["elapsed"])
                    data.frame(param=p, elapsed=elapsed)
            }, simplify = FALSE)
            do.call(rbind, trials)
    })
)
write.table(results, "Result/WGCNA/time_obs.tsv", sep='\t', row.names=FALSE, quote=FALSE)
### 変数数
results <- do.call(
    rbind,
    lapply(params_var, function(p) {
            trials <- replicate(10, {
                    buf <- t(as.matrix(data$expData[1:p, 1:100]))
                    elapsed <- as.numeric(system.time(work(buf))["elapsed"])
                    data.frame(param=p, elapsed=elapsed)
            }, simplify = FALSE)
            do.call(rbind, trials)
    })
)
write.table(results, "Result/WGCNA/time_var.tsv", sep='\t', row.names=FALSE, quote=FALSE)


# SGCRNA

In [None]:
##### SGCRNA_ALL #####
using CSV, DataFrames
using StatsBase
using JLD2
using SGCRNAs
using CairoMakie

function dot_plot(res, n, fn)
    cols = [(mod(i-1, n) + 1) for i in 1:length(res)]
    rows = [ceil(Int, i / n) for i in 1:length(res)]
    rows = (maximum(rows)+1) .- rows
    sizes_raw  = [v[1] for v in res]
    colours_raw = [v[2] for v in res]

    smin, smax = extrema(sizes_raw)
    sizes_px = 6 .+ 24 .* (sizes_raw .- smin) ./ (smax - smin + eps())

    f = Figure(size=(800, 650))
    ax = Axis(
        f[1, 1];
        xticks=1:n, yticks=1:n,
        aspect=1, title="",
        xgridvisible=false, ygridvisible=false
    )
    hidespines!(ax); hidedecorations!(ax, grid=false)
    xlims!(ax, 0.5, n+0.5); ylims!(ax, 0.5, n+0.5)

    scatter!(ax, cols, rows; markersize=sizes_px, color=colours_raw, colormap=:heat, colorrange=extrema(colours_raw))
    text!(ax, cols .- 0.3, rows .- 0.3, text=string.(1:length(res)), align=(:right, :baseline), fontsize=10, color = :black)

    vals = [0, 5, 10]
    sizes_for_legend = 6 .+ 24 .* (vals .- smin) ./ (smax - smin + eps())
    handles = [MarkerElement(marker=:circle, color=:black, strokecolor=:transparent, markersize=s) for s in sizes_for_legend]
    Legend(f[1, 2], handles, string.(vals), "size value", framevisible=false)
    Colorbar(f[1, 3], limits = (0, 1.0), colormap=:heat, label="Simpson index")

    save(fn, f)
end

# 安定性・再現性・特異性
fl = ["adipose_subcutaneous","muscle_skeletal","skin_sun_exposed_lower_leg","whole_blood","all"]
## 観測単位
for f in fl
    Data = CSV.read("tmp/"*f*".tsv", header=1, comment="#", delim='\t', DataFrame);
    CorData, GradData = CGM(Data.Column1, Matrix(Data[:,2:end]));
    clust, pos, edge_data = SpectralClustering(CorData, GradData);
    clust = clust[1]
    gene_list = names(edge_data)
    clust_list = []
    for j in 1:maximum(clust)
        push!(clust_list, gene_list[clust .== j])
    end
    save_object("tmp_SGCRNA/"*fn*".jld2", clust_list);
end
## 変数
Data = CSV.read("tmp/all.tsv", header=1, comment="#", delim='\t', DataFrame);
for i in [1000, 5000, 10000]
    CorData, GradData = CGM(Data.Column1[1:i], Matrix(Data[1:i,2:end]));
    clust, pos, edge_data = SpectralClustering(CorData, GradData);
    clust = clust[1]
    gene_list = names(edge_data)
    clust_list = []
    for j in 1:maximum(clust)
        push!(clust_list, gene_list[clust .== j])
    end
    save_object("tmp_SGCRNA/"*string(fn)*".jld2", clust_list);
end

# 実行時間測定
using CSV, DataFrames
using StatsBase
using JLD2
using SGCRNAs
Data = CSV.read("tmp/adipose_subcutaneous.tsv", header=1, comment="#", delim='\t', DataFrame);
params_obs = [100, 200, 400];
params_var = [500, 1000, 5000, 10000];
function f(buf_g, buf)
    CorData, GradData = CGM(buf_g, buf);
    clust, pos, edge_data = SpectralClustering(CorData, GradData);

    return nothing
end
f(Data.Column1[1:100], Matrix(Data[1:100, 2:101]));
## 観測単位数
Result = DataFrame(param=Int[], elapsed=Float64[])
for p in params_obs
    buf_g = Data.Column1[1:1000]
    buf = Matrix(Data[1:1000, 2:(p+1)])

    times = [@elapsed f(buf_g, buf) for _ in 1:10]
    append!(Result, DataFrame(param=fill(p, 10), elapsed=times))
    println(p)
end
## 変数数
Result = DataFrame(param=Int[], elapsed=Float64[]);
for p in params_var
    buf_g = Data.Column1[1:p]
    buf = Matrix(Data[1:p, 2:101])

    times = [@elapsed f(buf_g, buf) for _ in 1:10]
    append!(Result, DataFrame(param=fill(p, 10), elapsed=times))
end
Result |> CSV.write("Result/SGCRNA/time_obs.tsv", delim='\t')
Result |> CSV.write("Result/SGCRNA/time_var.tsv", delim='\t')

# 実行時間

In [None]:
using CSV, DataFrames
using Colors, CairoMakie

Tools = ["SGCRNA", "WGCNA", "SGCP"]
# load data
Obs = CSV.read("Result/SGCRNA/time_obs.tsv", header=1, comment="#", delim='\t', DataFrame);
rename!(Obs, "elapsed" => "SGCRNA")
for i in 2:length(Tools)
    buf = CSV.read("Result/"*Tools[i]*"/time_obs.tsv", header=1, comment="#", delim='\t', DataFrame);
    rename!(buf, "elapsed" => Tools[i])
    Obs = hcat(Obs, buf[:,[Tools[i]]])
end
Obs = stack(Obs, 2:ncol(Obs))

Var = CSV.read("Result/SGCRNA/time_var.tsv", header=1, comment="#", delim='\t', DataFrame);
rename!(Var, "elapsed" => "SGCRNA")
for i in 2:length(Tools)
    if i == 2
        buf = CSV.read("Result/"*Tools[i]*"/time_var.tsv", header=1, comment="#", delim='\t', DataFrame);
    else
        buf = CSV.read("Result/"*Tools[i]*"/time_var_buf.tsv", header=1, comment="#", delim='\t', DataFrame);
        buf = vcat(buf, DataFrame(param=vcat(repeat([5000],10),repeat([10000],10)), elapsed=vcat(repeat([669600],10),repeat([669600],10))))
    end
    rename!(buf, "elapsed" => Tools[i])
    Var = hcat(Var, buf[:,[Tools[i]]])
end
Var = stack(Var, 2:ncol(Var))

ToolTypes = unique(Obs.variable)
Palette = [colorant"rgb(255,75,0)", colorant"rgb(3,175,122)", colorant"rgb(0,90,255)", colorant"rgb(77,196,255)"]
ColourMap = Dict(ToolTypes[i] => Palette[i] for i in eachindex(ToolTypes))
fig = Figure(size=(1200, 600), fontsize=14, markersize=16)
ax = []
push!(ax, Axis(fig[1, 1]; xlabel="# of observation", ylabel="elapsed time (s)", title="", xgridvisible=false, ygridvisible=false, yscale=log10, limits=(0,nothing,1,10000)))
push!(ax, Axis(fig[1, 2]; xlabel="# of variable", ylabel="", title="", xgridvisible=false, ygridvisible=false, yscale=log10, limits=(0,nothing,1,10000)))
linkyaxes!(ax[1], ax[2])
for t in ToolTypes
    scatter!(ax[1], Obs.param[Obs.variable .== t], Obs.value[Obs.variable .== t]; color=ColourMap[t], label=string(t), strokewidth=0)
    scatter!(ax[2], Var.param[Var.variable .== t], Var.value[Var.variable .== t]; color=ColourMap[t], label=string(t), strokewidth=0)
end
fig[1, 3] = Legend(fig, ax[1]; framevisible=false)
save("Result/Fig/elapsed.pdf", fig)


# 安定性・再現性・特異性

In [None]:
# SGCRNA
using CSV, DataFrames
using JLD2
using Colors, CairoMakie

function load_data(t::String, f::String)
    Data = CSV.read("Result/"*t*"/"*f*".tsv", header=1, comment="#", delim='\t', DataFrame);
    Clust = sort(unique(Data[:,2]))
    return ([Data.Symbol[Data[:,2] .== c] for c in Clust], Clust)
end
fl = [
    "adipose_subcutaneous","muscle_skeletal","skin_sun_exposed_lower_leg","whole_blood",
    "1000", "5000", "10000"
]
Size = Dict("SGCRNA" => (2000,750), "WGCNA" => (2000,500), "SGCP" => (2000,500))
for t in ["SGCRNA","WGCNA","SGCP"]
    ## Simpson係数
    All = Set()
    Src_Clust = []
    if t != "SGCRNA"
        All, Src_Clust = load_data(t, "all")
        All = Set.(All)
    else
        All = Set.(load_object("tmp_SGCRNA/all.jld2"))
        Src_Clust = collect(1:length(All))
    end
    N_All = length(All)
    Result = []
    ClustName =[]
    for f in fl
        Buf = Set()
        if t != "SGCRNA"
            Buf, Clust = load_data(t, f)
            Buf = Set.(Buf)
            push!(ClustName, Clust)
        else
            Buf = Set.(load_object("tmp_SGCRNA/"*f*".jld2"))
            push!(ClustName, collect(1:length(Buf)))
        end
        N_Buf = length(Buf)
        Result_Buf = zeros(N_Buf, N_All)
        for i in 1:N_All
            for j in 1:N_Buf
                Result_Buf[j, i] = length(All[i] ∩ Buf[j]) / min(length(All[i]), length(Buf[j]))
            end
        end

        push!(Result, Result_Buf)
    end

    fig = Figure(size=Size[t], fontsize=20)
    ax = []
    for i in 1:length(Result)
        if t == "SGCP"
            push!(ax, Axis(fig[1, i]; xlabel=fl[i], ylabel="", title="", xgridvisible=false, ygridvisible=false, xticks=(1:size(Result[i],1), string.(ClustName[i])), xticklabelrotation=π/4, yticklabelsvisible=false, yticksvisible=false))
        elseif t == "WGCNA"
            push!(ax, Axis(fig[1, i]; xlabel=fl[i], ylabel="", title="", xgridvisible=false, ygridvisible=false, xticklabelsvisible=false, xticksvisible=false, yticklabelsvisible=false, yticksvisible=false))
        else
            push!(ax, Axis(fig[1, i]; xlabel=fl[i], ylabel="", title="", xgridvisible=false, ygridvisible=false, xticklabelrotation=π/4, yticklabelsvisible=false, yticksvisible=false))
        end
        heatmap!(ax[i], Result[i], colormap=:heat)
    end
    ax[1].ylabel = "All"
    ax[1].yticklabelsvisible = true
    ax[1].yticksvisible = true
    if t == "SGCP"
        ax[1].yticks = (1:size(Result[1],2), string.(Src_Clust))
    elseif t == "WGCNA"
        ax[1].yticklabelsvisible = false
        ax[1].yticksvisible = false
    end

    Colorbar(fig[:, end+1], limits=(0, 1), colormap=:heat)
    save("Result/Fig/"*t*"_quality.pdf", fig)
end

