In [1]:
using Pkg
using Eirene
using MAT
using CSV
using GraphPlot
using LightGraphs
using DataFrames
using JLD
using Plots
using Measures
using StatsPlots
using RCall
using PyCall
using Statistics
using StatsBase
using HypothesisTests
using LinearAlgebra
using MultivariateStats

# Call R functions
R"library(TDA)"
R"source('~/Dropbox/Top Sim and Homog/Scripts/bottleneck_computations_functions2.R')"
R"library(igraph)"
R"library(ggplot2)"
R"source('~/Dropbox/Top Sim and Homog/Scripts/local_network_functions.R')"




│ Attaching package: ‘igraph’
│ 
│ The following objects are masked from ‘package:stats’:
│ 
│     decompose, spectrum
│ 
│ The following object is masked from ‘package:base’:
│ 
│     union
│ 
└ @ RCall /Users/annsizemore/.julia/packages/RCall/ffM0W/src/io.jl:113


RObject{VecSxp}
$value
function (Xdata, Ydata, filename) 
{
    df <- data.frame(xvar = Xdata, yvar = Ydata)
    plt1 <- ggplot(df, aes(x = xvar, y = yvar)) + geom_smooth(method = lm) + 
        theme_bw()
    ggplot2::ggsave(paste(filename, "1.pdf", sep = ""), device = "pdf", 
        width = 4, height = 4)
    print("saved 1")
    plt2 <- ggplot(df, aes(x = xvar, y = yvar)) + stat_density2d(geom = "tile", 
        aes(fill = ..density..^0.25, alpha = 1), contour = FALSE) + 
        scale_fill_gradientn(colours = colorRampPalette(c("white", 
            blues9))(256)) + coord_fixed() + geom_smooth(method = lm) + 
        theme_bw()
    pdf(paste(filename, "2.pdf", sep = ""))
    print(plt2)
    dev.off()
    print("saved 2")
}

$visible
[1] FALSE



In [2]:
## Functions

function toWeightedAdj_byweight(network_und,node_weights)
    #Assume that HIGHER node weight means born earlier-- this means we
    # want to keep the MINIMUM
    nNodes = length(node_weights)
    t0 = network_und.*node_weights
    t1 = deepcopy(t0)
    for i in collect(1:nNodes)
        for j in collect(i:nNodes)
            t1[i,j] = minimum([t0[i,j],t0[j,i]])
            t1[j,i] = t1[i,j]
        end
    end
    t1
end

# To calculate the bettiCurves from the barcodes
function bettiCurveFromBarcode(barcode_array,nNodes,nmats,maxDim)
    
    nNodes = Int(nNodes)
    nmats = Int(nmats)
    maxDim = Int(maxDim)
    bettiBar = zeros(nmats,maxDim)
    bettiCurve = zeros(nmats,nNodes+1,maxDim)
    birthCurve = zeros(nmats,nNodes,maxDim)
    deathCurve = zeros(nmats,nNodes,maxDim)
    

    for dimn in collect(1:maxDim)
        dimn = Int(dimn)
       
        for matn in collect(1:nmats)
            matn = Int(matn)
            bb = 0
            currentCurve = barcode_array[matn,:]
            currentCurveDim = currentCurve[dimn]
            for barn in collect(1:size(currentCurveDim,1))
               

                # Add to birth curve
                birthCurve[matn,Int(currentCurveDim[barn,1]),dimn] = birthCurve[matn,Int(currentCurveDim[barn,1]),dimn] .+1


                if currentCurveDim[barn,2]>nNodes
                  
                    bettiCurve[matn,Int(currentCurveDim[barn,1]):Int(nNodes+1),dimn] = bettiCurve[matn,Int(currentCurveDim[barn,1]):Int(nNodes+1),dimn] .+1
                    bb = bb+(nNodes+1-currentCurveDim[barn,1])
                    else 
                   
                    bettiCurve[matn,Int(currentCurveDim[barn,1]):Int(currentCurveDim[barn,2]),dimn] = bettiCurve[matn,Int(currentCurveDim[barn,1]):Int(currentCurveDim[barn,2]),dimn].+1
                    deathCurve[matn,Int(currentCurveDim[barn,2]),dimn] = deathCurve[matn,Int(currentCurveDim[barn,2]),dimn] .+1
                    bb = bb+(currentCurveDim[barn,2] - currentCurveDim[barn,1])

                end
            end
            
            bettiBar[matn,dimn] = deepcopy(bb)
        end
    end
    
    return bettiCurve, birthCurve, deathCurve, bettiBar 
end


## Calculating edge density
function calculateEdgeQuantity(jadj_array)
    nmats = size(jadj_array,3)
    nNodes = size(jadj_array,1)
    edgeQuantity = zeros(nmats,nNodes)
    
    # Loop through to compute number of edges at each filtration step
    for matn in collect(1:nmats)
        adj = jadj_array[:,:,matn];
        for noden in collect(1:nNodes)
            edgesAdded = (LinearIndices(adj))[findall(adj.<= noden)]
            edgeQuantity[Int(matn),Int(noden)] = length(edgesAdded)/2
        end
    end
    edgeQuantity = edgeQuantity.-35   # get rid of self-loops = 0
    
end

function calculateDegreesFiltration(jadj_array,nNodes)
    nmats = size(jadj_array,3)
    nNodes = size(jadj_array,1)
    degree_array = zeros(nNodes,nNodes,nmats)
    
    jadj_array[jadj_array.==(nNodes*2)] .= 0
    
    
    # Loop through to compute number of edges at each filtration step
    for matn in collect(1:nmats)
        adj = jadj_array[:,:,matn];
        for noden in collect(1:nNodes)
            adj_i = deepcopy(adj)
            adj_i[adj_i.>noden] .= 0
            adj_i[adj_i.>0] .= 1
            degree_array[:,Int(noden),Int(matn)] = sum(adj_i,dims = 1)
        end
    end
    
    return degree_array
    
end


## Making weights from order
function orderToWeights(s_0_array,nNodes)
        s_wei_array = zeros(size(s_0_array))
    for i0 in collect(1:size(s_0_array,1))

        for n0 in collect(1:nNodes)
            n0 = Int(n0)
            s_wei_array[i0,n0] = indexin([n0],s_0_array[i0,:])[1]
        end
    end
    s_wei_array
end

function calculateTheoreticaMaxDistance(s_wei_array,n) 
    theo_dist= Vector{Float64}(undef,n)   #10100 or 48320
    for i1 in collect(1:size(s_wei_array,1))
        norm1 = norm(s_wei_array[1,:].-s_wei_array[i1,:],Inf)
        theo_dist[i1] = norm1
    end
    return theo_dist
end

## Making Diagrams from barcode for TDA in R
function makeDiagramFromBarcode(barcode_array,graphn,maxDim)

    diag = []
    nNodes = 70


    for d in collect(1:maxDim)
        
        bd = barcode_array[graphn,d]
        infs = findall(bd[:,2] .> nNodes)
        bd[infs,2] .= nNodes+1
        nbars = size(bd,1)
        
        if nbars>0
            bda = hcat(d*ones(nbars),bd)
        else
            bda = [d 0 0]
        end
                
                        
       
        
        if isequal(diag,[])
            diag = bda
            else
            diag = vcat(diag,bda)
        end
                
    end
    diag
end




function computeBNDistances_glob(barcode_array)

    nReps = 101
    nGraphs = 100
    maxDim = 3

    graphOriginals = collect(1:101:10100)
    distanceBN_array = zeros(10100,3)
    

    for nG in graphOriginals
               
        diagO = makeDiagramFromBarcode(barcode_array,nG,maxDim)
        
        for nR in collect(1:(nReps-1))
            diagR = makeDiagramFromBarcode(barcode_array,(nG+nR),maxDim)

            for nD in collect(1:3)
                distanceBN_dimn = R"computeBNDistance($diagO,$diagR,$nD)"
                distanceBN_array[(nG+nR),nD] = distanceBN_dimn
            end
        end
    end
    
    return distanceBN_array
end


            
function computeBNDistances_globSampled(barcode_array)

    nReps = 101
    nGraphs = 100
    maxDim = 3
    runs = collect(1:100)

    graphOriginals = collect(1:101:10100)
    distanceBN_array = zeros(10000,3)
    

    for nG in collect(1:nGraphs)
                    
        a = (nG-1)*nReps + 1
                    
        for runi in runs
                        
        p = sample(collect(a:(a+100)),2,replace = false)
        diaga = makeDiagramFromBarcode(barcode_array,p[1],maxDim)
        diagb = makeDiagramFromBarcode(barcode_array,p[2],maxDim)

            for nD in collect(1:3)
                distanceBN_dimn = R"computeBNDistance($diaga,$diagb,$nD)"
                distanceBN_array[((nG-1)*(nReps-1) + runi),nD] = distanceBN_dimn
            end
        end

    end
    
    return distanceBN_array
end

            
            
            
function computeBNDistances_Sampled(barcode_array,nGraphs)

    
    maxDim = 3
    distanceBN_array = zeros(10000,3)
    
    runs = collect(1:size(distanceBN_array)[1])

    for runi in runs
        p = sample(collect(1:nGraphs),2,replace = false)
        diaga = makeDiagramFromBarcode(barcode_array,p[1],maxDim)
        diagb = makeDiagramFromBarcode(barcode_array,p[2],maxDim)

        for nD in collect(1:3)
            distanceBN_dimn = R"computeBNDistance($diaga,$diagb,$nD)"
            distanceBN_array[runi,nD] = distanceBN_dimn
        end

    end
    
    return distanceBN_array
end
            

function computeBNDistances_local(barcode_array)

    nReps = 2416
    nGraphs = 20
    maxDim = 3
    nNodes = 70

    #graphOriginals = collect(1:2416:24160)
    #distanceBN_array = zeros(24160,3)
    graphOriginals = collect(1:2416:48320)
    distanceBN_array = zeros(48320,3)

    for nG in graphOriginals
        diagO = makeDiagramFromBarcode(barcode_array,nG,maxDim)
        println(nG)
        for nR in collect(1:(nReps-1))
            diagR = makeDiagramFromBarcode(barcode_array,(nG+nR),maxDim)

            for nD in collect(1:3)
                distanceBN_dimn = R"computeBNDistance($diagO,$diagR,$nD)"
                distanceBN_array[(nG+nR),nD] = distanceBN_dimn
            end
        end
    end
    
    return distanceBN_array
end
    
    
function plotBarcode(allPIs,nNodes,graphN,maxDim,fontSize)

    nNodes = Int(nNodes)
    graphn = Int(graphN)
    maxDim = Int(maxDim)
    counter1 = 0
    pbar = plot(1:6,zeros(6),c=:black)
    
    colors = [:blue :green :red]
    for dim in collect(1:maxDim)

        barn = barcode_array[graphN, dim]
        barn = barn[sortperm(barn[:,1]),:]

        nbars = size(barn)[1]


        for cntr1 in collect(1:nbars)
            birth = barn[cntr1,1]
            death = barn[cntr1,2]

            plot!([birth, death],[cntr1+counter1, cntr1+counter1],c=colors[dim], legend = false,
                            xlim = (0,nNodes), ytickfont = font(fontSize), xtickfont = font(fontSize))
        end

        display(pbar)



        counter1 = counter1+nbars
    end

    return pbar
end

            
function strictLTvector(x)
    xVec = [x[i,j] for j in 1:size(x)[2]-1 for i in j+1:size(x)[1]]
    return xVec
end
            
function strictLTvector3D(x)
    xVec = [x[i,j,d] for d in 1:size(x)[3] for j in 1:size(x)[2]-1 for i in j+1:size(x)[1]]
    return xVec
end

strictLTvector3D (generic function with 1 method)

In [3]:
## Processing output -- Betti Curves
## For local reordering
## Load everything

####### ---- stuff to define ------ #####
maxDim = 3
yLim = 6         # 70 for propprob, 6 for RG and abssin, 250 for ER, 80 for pa, 60 spgr
graph_name = "NF_RG_ep015_1218"
###### ---------------------------- ######

## Load Stuff
graph_name_0 = "$(graph_name)"
graph_name_local = "$(graph_name)_local"




#if pick == "old"
#    dict5 = matread("Results/$(graph_name_local)_bottleneckDistances.mat")
#    dmat_local = dict5["distanceMat"]
#else
z = load("Results/$(graph_name_local)_pis.jld")
barcode_array = z["barcode_array"]
dmat_local = computeBNDistances_local(barcode_array)
#end

println("loaded :)")

1
2417
4833
7249
9665
12081
14497
16913
19329
21745
24161
26577
28993
31409
33825
36241
38657
41073
43489
45905
loaded :)


In [4]:
## Local reordering analyses
## Processing output -- Bottleneck Distnaces


######## ----- ######
colors_orig = [RGB(0.1,0.1,1) RGB(0.1,1,0.1) RGB(1,0.1,0.1)]
colors_r = [RGB(0.3,0.5,1) RGB(0.23,0.9,0.4) RGB(0.9,0.3,0.5)]

######## ----- ######

#dict5 = matread("Results/$(graph_name_local)_bottleneckDistances.mat")
#dmat_local = dict5["distanceMat"]
dict6 = matread("Results/$(graph_name_local).mat")
s_0_array_local = dict6["s_0_array"]
nNodes = dict6["nNodes"]
nNodes = Int(nNodes)
nGraphs = dict6["nGraphs"]
nGraphs = Int(nGraphs)
nReps = 2416
dict6 = nothing
#dict7 = matread("Results/$(graph_name_local)_pis.mat")
#allPIs_local = dict7["allPersistenceIntervals"]
z1= load("Results/$(graph_name_local)_pis.jld")
barcode_array_local = z1["barcode_array"]

dict7 = nothing
z1 = nothing
GC.gc()
print("loaded relevant data")
println(nGraphs)
println(nReps)


# Need to make an ordering to distance function
# Will make tomorrow for now will plot
s_wei_array_local = orderToWeights(s_0_array_local,nNodes)
theo_dist_local = calculateTheoreticaMaxDistance(s_wei_array_local,48320) 


inds1 = collect(0:nReps:(nReps*nGraphs-1))
inds1 = inds1 .+1
inds1 = Int.(inds1)
barcode_array_orig = barcode_array_local[inds1,:]
indsr = setdiff(1:(nReps*nGraphs),inds1)
indsr = Int.(indsr)
barcode_array_r = barcode_array_local[indsr,:]

## Swap thing
theo_dist_swap = zeros(Int(nNodes),Int(nNodes),nGraphs)
bdist_swap = zeros(Int(nNodes),Int(nNodes),nGraphs,maxDim)


iter = 1
for r in 1:nGraphs
    iter = iter+1
    
    for i0 in collect(1:nNodes)
        for j0 in collect((i0+1):nNodes)
            theo_dist_swap[Int(i0),Int(j0),Int(r)] = theo_dist_local[iter]
            theo_dist_swap[Int(j0),Int(i0),Int(r)] = theo_dist_swap[Int(i0),Int(j0),Int(r)]
            for d0 in collect(1:maxDim)
                bdist_swap[Int(i0),Int(j0),Int(r),d0] = dmat_local[iter,d0]
                bdist_swap[Int(j0),Int(i0),Int(r),d0] = bdist_swap[Int(i0),Int(j0),Int(r),d0]
            end
            iter = iter+1
        end
    end
end

println(size(bdist_swap))
bdist_swap_mean1 = dropdims(mean(bdist_swap,dims = 4), dims = 4)
println(size(bdist_swap_mean1))
bdist_swap_mean = dropdims(mean(bdist_swap_mean1,dims = 3),dims = 3)
simRatio = 1 .- bdist_swap_mean./theo_dist_swap[:,:,1]
simRatio[isnan.(simRatio)] .= 1
println(size(bdist_swap_mean))


#### Compute topological overlap
dict6 = matread("Results/$(graph_name_local).mat")
badj_array_all = dict6["badj_array"]
origs = collect(1:nReps:size(badj_array_all,3))
badj_array = badj_array_all[:,:,origs]
badj_array_all = nothing
dict6 = nothing
GC.gc()

R"source('~/Dropbox/Top Sim and Homog/Scripts/local_network_functions.R')"
## Compute topological overlap on everything
tolap_all = zeros(nNodes, nNodes, nGraphs)
for i0 in collect(1:nGraphs)
    badj1 = badj_array[:,:,i0]
    R_tolap = R"calculate_top_overlap"
    tolapR = R_tolap(badj1)
    tolap = rcopy(tolapR)
    tolap_all[:,:,i0] = deepcopy(tolap)

end

println("Finished this block :)")

loaded relevant data20
2416
(70, 70, 20, 3)
(70, 70, 20)
(70, 70)
Finished this block :)


In [5]:
# Plot first heatmaps
gr()
p2a = plot(0:nNodes,0:nNodes,c = RGB(0.64,0.64,0.64),title = "Distance plots")
scatter!(theo_dist_local,dmat_local[:,1],title = graph_name, xlabel = "Theoretical max distance",
    ylabel = "Bottleneck Distance",aspect_ratio = :equal,)
p2b = heatmap(theo_dist_swap[:,:,1],yflip = true,aspect_ratio =:equal, title = "Theo Distance", color = :blues)
p2c = heatmap(bdist_swap_mean,yflip = true, aspect_ratio=:equal, title = "$(graph_name) BN Distance Dim1", color = :Greys_r)
p2d = heatmap(simRatio,yflip = true, aspect_ratio=:equal, title = "Similarity Ratio Dim1", color = :tempo)
p2e = heatmap(tolap_all[:,:,1], yflip = true, aspect_ratio = :equal, title = "Tolap ex 1")
p2f = heatmap(tolap_all[:,:,2], yflip = true, aspect_ratio = :equal, title = "Tolap ex 2")

p2all = plot(p2a,p2b,p2c,p2d,p2e,p2f,layout = (3,2),margin = 10mm, size = (900,800))
#display(p2all)

savefig("$(graph_name)_local.pdf")

println("finished saving")

finished saving


In [6]:
## Run through and fit linear models to distance data

record_slopes_mat =zeros(nGraphs,maxDim)
Tdist = strictLTvector(theo_dist_swap[:,:,1])

inds_of_interest = findall(Tdist.<=14)
X = reshape(Tdist[inds_of_interest],length(inds_of_interest),1)

for r0 = collect(1:Int(nGraphs))
    for d0 = collect(1:maxDim)
        rep_dim_array = strictLTvector(bdist_swap[:,:,r0,d0])
        Y = reshape(rep_dim_array[inds_of_interest],length(inds_of_interest),1)
        
        
        # For the near neighbor fits we can assume b=0 in Y=mX+b
        slopeVal = llsq(X,Y; bias = false)
        record_slopes_mat[r0,d0] = slopeVal[1]
    end
end
        
# Now we have fits in record_slopes_mat which is nGraphs x maxDim
# Make boxplots

println(mean(record_slopes_mat,dims = 1))

[0.115723 0.0104598 0.0]


In [None]:
### Scatter and box plots

# First calculate degrees of original graph and strengths of similarity graph
# Recall bdist_swap is the nNodes x nNodes x nReps x maxDim matrix of distances
sim_array = 1 .- bdist_swap./theo_dist_swap
sim_array[isnan.(sim_array)] .= 0
sim_weighted_degree_array = dropdims(sum(sim_array,dims = 1), dims = 1)
badj_degree_array = dropdims(sum(badj_array, dims = 1), dims = 1)

tolap_all_vec = strictLTvector3D(tolap_all)
sim_matrices = 1 .- bdist_swap_mean1./theo_dist_swap
sim_matrices_vec = strictLTvector3D(sim_matrices)


p3a = scatter(reshape(sim_matrices_vec,length(sim_matrices_vec),1),reshape(tolap_all_vec,length(tolap_all_vec),1),
    markeralpha = 0.2,xlabel = "Top Similarity mean", ylabel = "Tolap", legend = false)
#p3a = scatter([badj_degree_array...], [sim_weighted_degree_array...], xlabel = "Degree in badj",
#    ylabel= "Strength in TS", aspect_ratio = :equal, markeralpha = 0.5, xlim = (0,70))

p3b = boxplot([1.0],record_slopes_mat[:,1], c = colors_orig[1],markersize = 2)
boxplot!([2.0],record_slopes_mat[:,2], c = colors_orig[2],markersize = 2)
boxplot!([3.0],record_slopes_mat[:,3], title = "Local Slope Fits", xlabel = "Dimension",
    ylabel = "Slope", legend = false,c = colors_orig[3], markersize = 2, xlim = (0,4), size = (300,600), framestyle = :box)

p3c = plot(0:nNodes,0:nNodes,c = RGB(0.64,0.64,0.64),title = "Distance plots dim1", legend = false, aspect_ratio = :equal)
for i0 in collect(1:69)
    keep1 = dmat_local[theo_dist_local.==i0,1]
    boxplot!([i0],keep1,linewidth = 0.5, markersize = 0.5, c = RGB(0.1,0.2,0.3))
end

p3d = plot(0:nNodes,0:nNodes,c = RGB(0.64,0.64,0.64),title = "Distance plots dim2", legend = false, aspect_ratio = :equal)
for i0 in collect(1:69)
    keep1 = dmat_local[theo_dist_local.==i0,2]
    boxplot!([i0],keep1,linewidth = 0.5, markersize = 0.5, c = RGB(0.1,0.2,0.3))
end

p3e = plot(0:nNodes,0:nNodes,c = RGB(0.64,0.64,0.64),title = "Distance plots dim3", legend = false, aspect_ratio = :equal)
for i0 in collect(1:69)
    keep1 = dmat_local[theo_dist_local.==i0,3]
    boxplot!([i0],keep1,linewidth = 0.5, markersize = 0.5, c = RGB(0.1,0.2,0.3))
end


p3f = plot(0:nNodes,0:nNodes,c = RGB(0.64,0.64,0.64),title = "Distance plots avg", legend = false, framestyle = :box)
dmat_local_meanDim = mean(dmat_local,dims = 2)
for i0 in collect(1:69)
    keep1 = dmat_local_meanDim[theo_dist_local.==i0,1]
    boxplot!([i0],keep1,linewidth = 0.5, markersize = 0.5, c = RGB(0.1,0.2,0.3), ylim = (0,20),frame = true,
        ylabel = "BN Distance", framestyle = :box)
end


p3all = plot(p3a,p3b,p3c,p3d,p3e,p3f, grid = (3,2))
#display(p3all)
savefig("$(graph_name)_local_boxplots.pdf")

# Save ggplot file
R_plotGGplot = R"plotGGplot"
#R_plotGGplot(sim_matrices_vec,tolap_all_vec,"$(graph_name)_local_ggscatter")

println("Done saving plots")

In [None]:
R_communities_wu =  R"calculate_communities_wu"
R_calculate_connected_components = R"calculate_connected_components"

ex1 = 1
# calculate on just one instance of sim ratio
simRatio_comms_listR = R_communities_wu(simRatio)   # simRatio for averaged or simRatio_1 for one instance
simRatio_comms = rcopy(simRatio_comms_listR[1])
Q = rcopy(simRatio_comms_listR[2])

node_strength = sum(simRatio,dims = 2)

maximum(simRatio_comms)

In [None]:


matwrite("$(graph_name)_simratiocomms.mat", Dict(
	"badj" => badj_array[:,:,ex1],
	"simRatio_comms" => simRatio_comms,
    "node_sim_strength" => node_strength,
    "Q" => Q,
    "simRatio" => simRatio
))


G = nothing
GC.gc()
println("done!")

In [None]:
## Only show the subplots we use in the main figures


l = @layout [ a; b; c{0.6w} d{0.2w}]
pall = plot(p2c,p2d,p3f,p3b, layout = l,size = (1300,1600),tickfont=font(28))


savefig("$(graph_name)_local_all.pdf")