# SGCP

In [None]:
options(warn=-1)
Pkgs <- c("WGCNA", "SGCP", "clusterProfiler", "enrichplot", "org.Hs.eg.db", "ggplot2", "igraph")
for(p in Pkgs) suppressMessages(library(p, character.only=T))

Analysis_GO <- function(x) {
    result <- enrichGO(
                        gene=x,
                        keyType="SYMBOL",
                        OrgDb=org.Hs.eg.db,
                        ont="ALL",                #"BP","CC","MF","ALL"
                        pAdjustMethod="BH",
                        pvalueCutoff=0.05,
                        qvalueCutoff=0.05
                    )
}
PlotFig <- function(res, m) {
    # res_simple <- simplify(res)
    # Fig <- clusterProfiler::dotplot(res_simple)
    # ggsave(paste0("Result/Fig/WGCNA_GO_cluser-",m,"_dot_simple.pdf"), plot=Fig)
    Fig <- clusterProfiler::dotplot(res)
    ggsave(paste0("Result/Fig/SGCP_GO_cluser-",m,"_dot_all.pdf"), plot=Fig)
}

ExpMat <- read.csv("Result/Norm/normalizedCounts_coding.tsv", sep="\t", row.names=NULL)
genes <- AnnotationDbi::select(org.Hs.eg.db, keys=ExpMat$Symbol, columns=c("ENTREZID"), keytype="SYMBOL")
genes <- genes[(!is.na(genes$SYMBOL)) & (!is.na(genes$ENTREZID)), ]
genes <- genes[(!duplicated(genes$SYMBOL)) & (!duplicated(genes$ENTREZID)),]
names(ExpMat)[1] <- "SYMBOL"
ExpMat <- merge(genes, ExpMat, by="SYMBOL")
genes <- ExpMat[, 1:2]
ExpMat <- ExpMat[, -c(1:5)]
vars <- apply(ExpMat, 1, var)
zeroInd <- which(vars == 0)
if(length(zeroInd) != 0) {
        ExpMat <- ExpMat[-zeroInd, ]
        genes <- genes[-zeroInd, ]
}
xx <- as.list(org.Hs.egGO[genes$ENTREZID])
haveGO <- sapply(xx, function(x) {if (length(x) == 1 && is.na(x)) FALSE else TRUE })
numNoGO <- sum(!haveGO)
if(numNoGO != 0){
        ExpMat <- ExpMat[haveGO, ]
        genes <- genes[haveGO, ]
}
rownames(ExpMat) <- genes$ENTREZID

sgcp <- ezSGCP(expData=as.matrix(ExpMat), geneID=genes$ENTREZID, annotation_db="org.Hs.eg.db", eff.egs=FALSE , saveOrig=FALSE, sil=TRUE, hm=NULL)
names(genes) <- c("Symbol","geneID")
res <- merge(genes, sgcp$clusterLabels, by="geneID")[,c(2,4)]
write.table(res, paste0("Result/SGCP.tsv"), sep='\t', row.names=FALSE)

# GO
for (m in unique(res$finalClusters))
{
    Gene <- res$Symbol[res$finalClusters == m]

    result <- try(Analysis_GO(Gene), silent=FALSE)
    if (class(result) != "try-error") {
        y <- try(PlotFig(result, m), silent=FALSE)
    }
}

# module-phenomenon correlation
rownames(ExpMat) <- genes$SYMBOL
res <- res[match(rownames(ExpMat), res$Symbol), ]
colours <- setNames(paste0("M", as.character(res$finalClusters)), res$SYMBOL)
MEs <- moduleEigengenes(t(ExpMat), colors=colours, excludeGrey=TRUE)

Phen <- read.csv("SraRunTable.csv", row.names=1, sep=',')
Phen <- Phen[,c(1,2,4)]
ModCor <- bicor(MEs$eigengenes, Phen, use="pairwise.complete.obs")
ModP <- corPvalueStudent(ModCor, nSamples = nrow(MEs$eigengenes))
AdjP <- matrix( p.adjust(as.vector(ModP), method="BH"),  nrow=nrow(ModP), ncol=ncol(ModP),  dimnames=dimnames(ModP))
LabelMat <- matrix(paste0("r: ",signif(ModCor, 2),"\n(FDR: ",signif(AdjP, 2), ")"), nrow=nrow(ModCor), ncol=ncol(ModCor), dimnames=dimnames(ModCor))
pdf(paste0("Result/Fig/SGCP_CorPhenMod.pdf"), width=20/2.54, height=32.4/2.54)
    par(mar=c(5, 10, 2, 3))
    labeledHeatmap(Matrix=ModCor,
            xLabels=names(Phen), yLabels=sub("^MEM", "", names(MEs$eigengenes)),
            ySymbols=sub("^MEM", "", names(MEs$eigengenes)),
            colorLabels=FALSE,
            colors=blueWhiteRed(50),
            #    textMatrix=signif(ModCor, 2),
            textMatrix=LabelMat,
            setStdMargins=FALSE,
        #    cex.text = 0.5,
        #    zlim = c(-1,1),
            main=paste("Module-phenomenon relationships")
    )
dev.off()


# WGCNA

In [None]:
options(warn=-1)
Pkgs <- c("WGCNA", "clusterProfiler", "enrichplot", "org.Hs.eg.db", "ggplot2")
for(p in Pkgs) suppressMessages(library(p, character.only=T))

options(stringsAsFactors = FALSE)
# enableWGCNAThreads()

Analysis_GO <- function(x) {
    result <- enrichGO(
                        gene=x,
                        keyType="SYMBOL",
                        OrgDb=org.Hs.eg.db,
                        ont="ALL",                #"BP","CC","MF","ALL"
                        pAdjustMethod="BH",
                        pvalueCutoff=0.05,
                        qvalueCutoff=0.05
                    )
}
PlotFig <- function(res, c) {
    # res_simple <- simplify(res)
    # Fig <- clusterProfiler::dotplot(res_simple)
    # ggsave(paste0("Result/Fig/WGCNA_GO_cluser-",c,"_dot_simple.pdf"), plot=Fig)
    Fig <- clusterProfiler::dotplot(res)
    ggsave(paste0("Result/Fig/WGCNA_GO_cluser-",c,"_dot_all.pdf"), plot=Fig)
}

ExpMat <- read.csv("Result/Norm/normalizedCounts_coding.tsv", sep="\t", row.names=1)
ExpMat <- t(ExpMat[,4:ncol(ExpMat)])

Beta <- c(1:50)
Sft <- pickSoftThreshold(ExpMat, powerVector=Beta)
pdf(paste0("Result/Fig/WGCNA_ScaleIndependence.pdf"), width=32.4/2.54, height=20/2.54)
    print(
        plot(
                Sft$fitIndices[,1], -sign(Sft$fitIndices[,3])*Sft$fitIndices[,2],
                xlab="Soft Threshold (power)", ylab="Scale Free Topology Model Fit,signed R^2", type="n",
                main=paste("Scale independence")
            ) +
        text(
                Sft$fitIndices[,1], -sign(Sft$fitIndices[,3])*Sft$fitIndices[,2],
                labels=Beta, cex=0.9, col="red"
            ) +
        abline(h=0.90,col="red")
    )
dev.off()

Beta <- 34
Adj <- adjacency(ExpMat, power=Beta);
k <- as.vector(apply(Adj, 2, sum, na.rm=T))
pdf(paste0("Result/Fig/WGCNA_ScaleFreePlot.pdf"), width=20/2.54, height=20/2.54)
    scaleFreePlot(k, main="Check scale free topology\n")
dev.off()

TOM <- TOMsimilarity(Adj)
DissTOM <- 1 - TOM
GeneTree = hclust(as.dist(DissTOM), method="average")
DynamicMods <- cutreeDynamic(
                                dendro=GeneTree, 
                                distM=DissTOM,
                                deepSplit=4, 
                                pamStage=TRUE,
                                pamRespectsDendro=TRUE,
                                minClusterSize=30
                            )
DynamicColors <- labels2colors(DynamicMods)
pdf(paste0("Result/Fig/WGCNA_Dendrogram_1.pdf"), width=32.4/2.54, height=20/2.54)
    plotDendroAndColors(
                            dendro=GeneTree, 
                            colors=DynamicColors, 
                            groupLabels="Dynamic Tree Cut",
                            dendroLabels=FALSE, hang=0.03,
                            addGuide=TRUE, guideHang=0.05,
                            main="Gene dendrogram and module colors"
                        )
dev.off()

MEList <- moduleEigengenes(ExpMat, colors=DynamicColors)
MEs <- MEList$eigengenes
MEDiss <- 1 - cor(MEs)
METree <- hclust(as.dist(MEDiss), method="average")
MEDissThres <- 0.3
pdf(paste0("Result/Fig/WGCNA_ClusteringModule.pdf"), width=32.4/2.54, height=20/2.54)
    plot(METree, main="Clustering of module eigengenes", xlab="", sub="")
    abline(h=MEDissThres, col="red")
dev.off()

Merge <- mergeCloseModules(ExpMat, DynamicColors, cutHeight=MEDissThres, verbose=3)
MergedColours <- Merge$colors
MergedMEs <- Merge$newMEs
pdf(paste0("Result/Fig/WGCNA_Dendrogram_2.pdf"), width=32.4/2.54, height=20/2.54)
    plotDendroAndColors(
                            dendro=GeneTree,
                            colors=cbind(DynamicColors, MergedColours),
                            groupLabels=c("Dynamic Tree Cut", "Merged dynamic"),
                            dendroLabels=FALSE, hang=0.03,
                            addGuide=TRUE, guideHang=0.05
                        )
dev.off()

ColourList <- data.frame(colnames(ExpMat), MergedColours)
colnames(ColourList) <- c("Symbol", "Colour")
write.table(ColourList, "Result/WGCNA.tsv", sep='\t', row.names=F)

# GO
for (c in unique(MergedColours))
{
    Gene <- ColourList$Symbol[ColourList$Colour == c]

    result <- try(Analysis_GO(Gene), silent=FALSE)
    if (class(result) != "try-error") {
        y <- try(PlotFig(result, c), silent=FALSE)
    }
}

# module-phenomenon correlation
Phen <- read.csv("SraRunTable.csv", row.names=1, sep=',')
Phen <- Phen[,c(1,2,4)]
ModCor <- bicor(MergedMEs, Phen, use="pairwise.complete.obs")
ModP <- corPvalueStudent(ModCor, nSamples = nrow(MergedMEs))
AdjP <- matrix( p.adjust(as.vector(ModP), method="BH"),  nrow=nrow(ModP), ncol=ncol(ModP),  dimnames=dimnames(ModP))
LabelMat <- matrix(paste0("r: ",signif(ModCor, 2),"\n(FDR: ",signif(AdjP, 2), ")"), nrow=nrow(ModCor), ncol=ncol(ModCor), dimnames=dimnames(ModCor))
pdf(paste0("Result/Fig/WGCNA_CorPhenMod.pdf"), width=20/2.54, height=32.4/2.54)
    par(mar=c(5, 10, 2, 3))
    labeledHeatmap(Matrix=ModCor,
            xLabels=names(Phen), yLabels=names(MergedMEs),
            ySymbols=names(MergedMEs),
            colorLabels=FALSE,
            colors=blueWhiteRed(50),
            #    textMatrix=signif(ModCor, 2),
            textMatrix=LabelMat,
            setStdMargins=FALSE,
        #    cex.text = 0.5,
        #    zlim = c(-1,1),
            main=paste("Module-phenomenon relationships")
    )
dev.off()


# SGCRNA

In [None]:
using CSV, DataFrames
using JLD2
using RCall
using SGCRNAs


Data = CSV.read("Result/Norm/normalizedCounts_coding.tsv", header=1, comment="#", delim='\t', DataFrame);
CorData, GradData = CGM(Data.Symbol, Matrix(Data[:,5:end]));
# save_object("Result/coding_predata.jld2", (CorData, GradData));
# CorData, GradData = load_object("Result/coding_predata.jld2");
clust, pos, edge_data = SpectralClustering(CorData, GradData);
# save_object("Result/coding_scdata.jld2", (clust, pos, edge_data));
# clust, pos, edge_data = load_object("Result/coding_scdata.jld2");


# set parameter
for i in 1:length(clust) println(maximum(clust[i])); end
for i in 1:maximum(clust[1]) println(i, "-", sum(clust[1] .== i)); end

d = 1; k = maximum(clust[d]);
GeneClust = DataFrame(Symbol=names(edge_data), Module=clust[d]);
# edge_data |> CSV.write("Result/Score_coding.tsv", delim='\t', writeheader=true)
GeneClust |> CSV.write("Result/SGCRNA.tsv", delim='\t', writeheader=true)

# draw network
nw, new_pos, cnctdf, new_clust, score = SGCRNAs.SetNetwork(edge_data, clust[d], pos, il=collect(1:k));
save_object("Result/coding_nwdata.jld2", (nw, new_pos, cnctdf, new_clust, score, edge_data))
# (nw, new_pos, cnctdf, new_clust, score, edge_data) = load_object("Result/coding_nwdata.jld2"); k = maximum(new_clust);
SGCRNAs.DrawNetwork("Result/Fig/bulk_AllNetWork_All-0.8.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, edge_mode=:ALL, edge_threshold=0.8)
NodeLabel = [x in ["BARX1","BARX2","COL14A1","GDF5","MMP1","PRG4","COL10A1","IHH","MMP13","PTH1R","RUNX2","SP7"] ? x : "" for x in names(edge_data)];
DrawNetwork("Result/Fig/bulk_AllNetWork_coding.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, edge_mode=:A)
DrawNetwork("Result/Fig/bulk_AllNetWork_coding-0.9.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, node_labels=NodeLabel, edge_mode=:A, edge_threshold=0.9)
DrawNetwork("Result/Fig/bulk_NegativeNetWork_coding-0.9.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, node_labels=NodeLabel, edge_mode=:N, edge_threshold=0.9)
DrawNetwork("Result/Fig/bulk_PositiveNetWork_coding-0.9.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, node_labels=NodeLabel, edge_mode=:P, edge_threshold=0.9)

GeneList = sort(unique(vcat(cnctdf.e1,cnctdf.e2)))
# GO analysis
ro = RObject(names(edge_data));
new_clust = RObject(clust[1]);
R"""
    Pkgs <- c("clusterProfiler", "enrichplot", "org.Hs.eg.db", "ggplot2")
    for(p in Pkgs) suppressMessages(library(p, character.only=T))

    options(ggrepel.max.overlaps = Inf)

    Analysis_GO <- function(x) {
        result <- enrichGO(
                            gene=x, keyType="SYMBOL", OrgDb=org.Hs.eg.db, ont="ALL", #"BP","CC","MF","ALL"
                            pAdjustMethod="BH", pvalueCutoff=0.05, qvalueCutoff=0.05
                        )
    }
    PlotFig <- function(res, i) {
            # res_simple <- simplify(res)
            # Fig <- clusterProfiler::dotplot(res_simple)
            # ggsave(paste0("Result/Fig/SGCRNA_GO_cluser-",i,"_dot_simple.pdf"), plot=Fig)
            Fig <- clusterProfiler::dotplot(res)
            ggsave(paste0("Result/Fig/SGCRNA_GO_cluser-",i,"_dot_all.pdf"), plot=Fig)
    }

    for(i in 1:$k)
    {
        ed <- $ro
        nc <- $new_clust
        commun = ed[which(nc == i)]
        result <- try(Analysis_GO(commun), silent=FALSE)
        if (class(result) != "try-error") {
            y <- try(PlotFig(result, i), silent=FALSE)
        }
    }
"""
end

# module-phenomenon correlation
Phen = CSV.read("SraRunTable.csv", header=1, comment="#", delim=',', DataFrame);
sort!(Phen);
Data = innerjoin(Data, DataFrame(Symbol=names(edge_data)), on=:Symbol, order=:right);
CorPhenMod(Data[:,5:end], Phen[:,[2,3,5]], new_clust, "Result/Fig/SGCRNA_CorPhenMod")


# draw network
nw, new_pos, cnctdf, new_clust, score = SetNetwork(edge_data, clust[d], pos, il=[8]);
NodeLabel = unique(sort(vcat(cnctdf.e1,cnctdf.e2)))
NodeLabel = [x in ["BARX1","BARX2","COL14A1","GDF5","MMP1","PRG4","COL10A1","IHH","MMP13","PTH1R","RUNX2","SP7"] ? x : "" for x in NodeLabel]
DrawNetwork("Result/Fig/bulk_AllNetWork_coding_module8.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, edge_mode=:A)
DrawNetwork("Result/Fig/bulk_AllNetWork_coding-0.9_module8.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, node_labels=NodeLabel, edge_mode=:A, edge_threshold=0.9)
DrawNetwork("Result/Fig/bulk_NegativeNetWork_coding-0.9_module8.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, node_labels=NodeLabel, edge_mode=:N, edge_threshold=0.9)
DrawNetwork("Result/Fig/bulk_PositiveNetWork_coding-0.9_module8.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, node_labels=NodeLabel, edge_mode=:P, edge_threshold=0.9)

nw, new_pos, cnctdf, new_clust, score = SetNetwork(edge_data, clust[d], pos, il=[10]);
NodeLabel = unique(sort(vcat(cnctdf.e1,cnctdf.e2)))
GeneList = NodeLabel[sortperm(score, rev=true)[1:10]]
NodeLabel = [x in GeneList ? x : "" for x in NodeLabel]
DrawNetwork("Result/Fig/bulk_AllNetWork_coding_module10.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, edge_mode=:A)
DrawNetwork("Result/Fig/bulk_AllNetWork_coding-0.9_module10.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, node_labels=NodeLabel, edge_mode=:A, edge_threshold=0.9)
DrawNetwork("Result/Fig/bulk_NegativeNetWork_coding-0.9_module10.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, node_labels=NodeLabel, edge_mode=:N, edge_threshold=0.9)
DrawNetwork("Result/Fig/bulk_PositiveNetWork_coding-0.9_module10.png", nw, new_pos, cnctdf, new_clust, k, node_scores=score, node_labels=NodeLabel, edge_mode=:P, edge_threshold=0.9)



In [None]:
# sankey
using CSV, DataFrames
using SankeyMakie
using CairoMakie

SGCRNA = CSV.read("Result/GeneCluster_coding.tsv", header=1, comment="#", delim='\t', DataFrame);
WGCNA = CSV.read("Result/WGCNA.tsv", header=1, comment="#", delim='\t', DataFrame);
SGCP = CSV.read("Result/SGCP.tsv", header=1, comment="#", delim='\t', DataFrame);
Data = outerjoin(SGCP, SGCRNA, WGCNA, on=:Symbol)
Data = coalesce.(Data, "NA")
Data[!, [:SGCP,:SGCRNA]] = string.(Data[:, [:SGCP,:SGCRNA]])
Data.SGCP = "SGCP_" .* Data.SGCP
Data.SGCRNA = "SGCRNA_" .* Data.SGCRNA
Label = vcat(sort(unique(Data.SGCP)), sort(unique(Data.SGCRNA)), sort(unique(Data.WGCNA)))
gdf = groupby(Data[:,[:SGCP, :SGCRNA]], [:SGCP, :SGCRNA])
G2S = combine(gdf, nrow)
sort!(G2S)
gdf = groupby(Data[:,[:SGCRNA, :WGCNA]], [:SGCRNA, :WGCNA])
S2W = combine(gdf, nrow)
sort!(S2W)

src = [findfirst(x -> x == i, Label) for i in G2S.SGCP]
dst = [findfirst(x -> x == i, Label) for i in G2S.SGCRNA]
cnct = [(src[i], dst[i], G2S.nrow[i]) for i in 1:length(src)]
src = [findfirst(x -> x == i, Label) for i in S2W.SGCRNA]
dst = [findfirst(x -> x == i, Label) for i in S2W.WGCNA]
cnct = vcat(cnct, [(src[i], dst[i], S2W.nrow[i]) for i in 1:length(src)])

f, ax, s = sankey(cnct, nodelabels=Label, figure=(; size = (2000, 6000)))
hidedecorations!(ax)
hidespines!(ax)
save("Result/Fig/Sankey.pdf", f)


# 要素比較ヒートマップ

In [None]:
using CSV, DataFrames
using Colors, CairoMakie

Tools = ["WGCNA", "SGCP"]
SGCRNA = CSV.read("Result/SGCRNA.tsv", header=1, comment="#", delim='\t', DataFrame);
N_Sgcrna = maximum(SGCRNA[:,2])
Result = [];
ClustName =[]
for t in Tools
    Buf = CSV.read("Result/"*t*".tsv", header=1, comment="#", delim='\t', DataFrame)
    push!(ClustName, sort(unique(Buf[:,2])))
    N_Buf = length(ClustName[end])
    Result_Buf = zeros(N_Buf, N_Sgcrna)
    for c in 1:N_Sgcrna
        A = Set(SGCRNA.Symbol[SGCRNA[:,2] .== c])
        for c2 in 1:N_Buf
            B = Set(Buf.Symbol[Buf[:,2] .== ClustName[end][c2]])
            Result_Buf[c2, c] = length(A ∩ B) / min(length(A), length(B))
        end
    end
    push!(Result, Result_Buf)
end

fig = Figure(size=(sum([size(Result[i],1) for i in 1:length(Result)])*30, size(Result[1],2)*6), fontsize=16)
ax = []
for i in 1:length(Result)
    push!(ax, Axis(fig[1, i]; xlabel="", ylabel="", title=Tools[i], xgridvisible=false, ygridvisible=false, xticks=(1:size(Result[i],1), string.(ClustName[i])), xticklabelrotation=π/4, yticklabelsvisible=false, yticksvisible=false))
    heatmap!(ax[i], Result[i], colormap=:heat)
end
ax[1].ylabel = "SGCRNA"
ax[1].yticklabelsvisible = true
ax[1].yticksvisible = true
Colorbar(fig[:, end+1], limits=(0, 1), colormap=:heat)
colsize!(fig.layout, 1, Relative(size(Result[1],1)/(size(Result[1],1)+size(Result[2],1))))
save("Result/Fig/ModuleSimilarity.pdf", fig)
