## Analyses for pairwise swaps

In [1]:

# Import packages and load in helper functions
include("helper_functions.jl")


│ Attaching package: ‘igraph’
│ 
│ The following objects are masked from ‘package:stats’:
│ 
│     decompose, spectrum
│ 
│ The following object is masked from ‘package:base’:
│ 
│     union
│ 
└ @ RCall /Users/annsizemore/.julia/packages/RCall/ffM0W/src/io.jl:113


computeBettiDistances_glob (generic function with 1 method)

## Load in the data

In [2]:

####### ---- parameters to define ------ #####
maxDim = 3
yLim = 6         # 70 for propprob, 6 for RG and abssin, 250 for ER, 80 for pa, 60 spgr
graph_name = "NF_ER_p04_1218"
###### ---------------------------- ######


graph_name_0 = "$(graph_name)"
graph_name_local = "$(graph_name)_local"


z = load("Results/$(graph_name_local)_pis.jld")
barcode_array = z["barcode_array"]
dmat_local = computeBNDistances_local(barcode_array)


println("loaded :)")

1
2417
4833
7249
9665
12081
14497
16913
19329
21745
24161
26577
28993
31409
33825
36241
38657
41073
43489
45905
loaded :)


In [3]:
# Load in reordering data
colors_orig = [RGB(0.1,0.1,1) RGB(0.1,1,0.1) RGB(1,0.1,0.1)]
colors_r = [RGB(0.3,0.5,1) RGB(0.23,0.9,0.4) RGB(0.9,0.3,0.5)]


#dict5 = matread("Results/$(graph_name_local)_bottleneckDistances.mat")
#dmat_local = dict5["distanceMat"]
dict6 = matread("Results/$(graph_name_local).mat")
s_0_array_local = dict6["s_0_array"]
nNodes = dict6["nNodes"]
nNodes = Int(nNodes)
nGraphs = dict6["nGraphs"]
nGraphs = Int(nGraphs)
nReps = 2416
dict6 = nothing
z1= load("Results/$(graph_name_local)_pis.jld")
barcode_array_local = z1["barcode_array"]

dict7 = nothing
z1 = nothing
GC.gc()
print("loaded relevant data")
println(nGraphs)
println(nReps)


# Create a weight function on the nodes induced by the ordering
s_wei_array_local = orderToWeights(s_0_array_local,nNodes)
theo_dist_local = calculateTheoreticaMaxDistance(s_wei_array_local,48320) 


inds1 = collect(0:nReps:(nReps*nGraphs-1))
inds1 = inds1 .+1
inds1 = Int.(inds1)
barcode_array_orig = barcode_array_local[inds1,:]
indsr = setdiff(1:(nReps*nGraphs),inds1)
indsr = Int.(indsr)
barcode_array_r = barcode_array_local[indsr,:]

# Calculate and record the maximum bottleneck distance possible
# Reshape bottleneck distance data
theo_dist_swap = zeros(Int(nNodes),Int(nNodes),nGraphs)
bdist_swap = zeros(Int(nNodes),Int(nNodes),nGraphs,maxDim)


iter = 1
for r in 1:nGraphs
    iter = iter+1
    
    for i0 in collect(1:nNodes)
        for j0 in collect((i0+1):nNodes)
            theo_dist_swap[Int(i0),Int(j0),Int(r)] = theo_dist_local[iter]
            theo_dist_swap[Int(j0),Int(i0),Int(r)] = theo_dist_swap[Int(i0),Int(j0),Int(r)]
            for d0 in collect(1:maxDim)
                bdist_swap[Int(i0),Int(j0),Int(r),d0] = dmat_local[iter,d0]
                bdist_swap[Int(j0),Int(i0),Int(r),d0] = bdist_swap[Int(i0),Int(j0),Int(r),d0]
            end
            iter = iter+1
        end
    end
end

println(size(bdist_swap))
bdist_swap_mean1 = dropdims(mean(bdist_swap,dims = 4), dims = 4)
println(size(bdist_swap_mean1))
bdist_swap_mean = dropdims(mean(bdist_swap_mean1,dims = 3),dims = 3)
simRatio = 1 .- bdist_swap_mean./theo_dist_swap[:,:,1]
simRatio[isnan.(simRatio)] .= 1
println(size(bdist_swap_mean))


#### Compute topological overlap
dict6 = matread("Results/$(graph_name_local).mat")
badj_array_all = dict6["badj_array"]
origs = collect(1:nReps:size(badj_array_all,3))
badj_array = badj_array_all[:,:,origs]
badj_array_all = nothing
dict6 = nothing
GC.gc()

## Compute topological overlap on everything
tolap_all = zeros(nNodes, nNodes, nGraphs)
for i0 in collect(1:nGraphs)
    badj1 = badj_array[:,:,i0]
    R_tolap = R"calculate_top_overlap"
    tolapR = R_tolap(badj1)
    tolap = rcopy(tolapR)
    tolap_all[:,:,i0] = deepcopy(tolap)

end

println("Finished this block :)")

loaded relevant data20
2416
(70, 70, 20, 3)
(70, 70, 20)
(70, 70)
Finished this block :)


In [4]:
# Plot first heatmaps
gr()
p2a = plot(0:nNodes,0:nNodes,c = RGB(0.64,0.64,0.64),title = "Distance plots")
scatter!(theo_dist_local,dmat_local[:,1],title = graph_name, xlabel = "Theoretical max distance",
    ylabel = "Bottleneck Distance",aspect_ratio = :equal,)
p2b = heatmap(theo_dist_swap[:,:,1],yflip = true,aspect_ratio =:equal, title = "Theo Distance", color = :blues)
p2c = heatmap(bdist_swap_mean,yflip = true, aspect_ratio=:equal, title = "$(graph_name) BN Distance Dim1", color = :Greys_r)
p2d = heatmap(simRatio,yflip = true, aspect_ratio=:equal, title = "Similarity Ratio Dim1", color = :tempo)
p2e = heatmap(tolap_all[:,:,1], yflip = true, aspect_ratio = :equal, title = "Tolap ex 1")
p2f = heatmap(tolap_all[:,:,2], yflip = true, aspect_ratio = :equal, title = "Tolap ex 2")

p2all = plot(p2a,p2b,p2c,p2d,p2e,p2f,layout = (3,2),margin = 10mm, size = (900,800))
#display(p2all)

savefig("$(graph_name)_local.pdf")

println("finished saving $(graph_name)_local.pdf")

finished saving NF_ER_p04_1218_local.pdf


In [5]:
## Run through and fit linear models to distance data

record_slopes_mat =zeros(nGraphs,maxDim)
Tdist = strictLTvector(theo_dist_swap[:,:,1])

inds_of_interest = findall(Tdist.<=14)
X = reshape(Tdist[inds_of_interest],length(inds_of_interest),1)

for r0 = collect(1:Int(nGraphs))
    for d0 = collect(1:maxDim)
        rep_dim_array = strictLTvector(bdist_swap[:,:,r0,d0])
        Y = reshape(rep_dim_array[inds_of_interest],length(inds_of_interest),1)
        
        
        # For the near neighbor fits we can assume b=0 in Y=mX+b
        slopeVal = llsq(X,Y; bias = false)
        record_slopes_mat[r0,d0] = slopeVal[1]
    end
end
        
# Now we have fits in record_slopes_mat which is nGraphs x maxDim


println(mean(record_slopes_mat,dims = 1))

[0.195396 0.379079 0.121824]


In [6]:
### Scatter and box plots

# First calculate degrees of original graph and strengths of similarity graph
# Recall bdist_swap is the nNodes x nNodes x nReps x maxDim matrix of distances
sim_array = 1 .- bdist_swap./theo_dist_swap
sim_array[isnan.(sim_array)] .= 0
sim_weighted_degree_array = dropdims(sum(sim_array,dims = 1), dims = 1)
badj_degree_array = dropdims(sum(badj_array, dims = 1), dims = 1)

tolap_all_vec = strictLTvector3D(tolap_all)
sim_matrices = 1 .- bdist_swap_mean1./theo_dist_swap
sim_matrices_vec = strictLTvector3D(sim_matrices)


p3a = scatter(reshape(sim_matrices_vec,length(sim_matrices_vec),1),reshape(tolap_all_vec,length(tolap_all_vec),1),
    markeralpha = 0.2,xlabel = "Top Similarity mean", ylabel = "Tolap", legend = false)
#p3a = scatter([badj_degree_array...], [sim_weighted_degree_array...], xlabel = "Degree in badj",
#    ylabel= "Strength in TS", aspect_ratio = :equal, markeralpha = 0.5, xlim = (0,70))

p3b = boxplot([1.0],record_slopes_mat[:,1], c = colors_orig[1],markersize = 2)
boxplot!([2.0],record_slopes_mat[:,2], c = colors_orig[2],markersize = 2)
boxplot!([3.0],record_slopes_mat[:,3], title = "Local Slope Fits", xlabel = "Dimension",
    ylabel = "Slope", legend = false,c = colors_orig[3], markersize = 2, xlim = (0,4), size = (300,600), framestyle = :box)

p3c = plot(0:nNodes,0:nNodes,c = RGB(0.64,0.64,0.64),title = "Distance plots dim1", legend = false, aspect_ratio = :equal)
for i0 in collect(1:69)
    keep1 = dmat_local[theo_dist_local.==i0,1]
    boxplot!([i0],keep1,linewidth = 0.5, markersize = 0.5, c = RGB(0.1,0.2,0.3))
end

p3d = plot(0:nNodes,0:nNodes,c = RGB(0.64,0.64,0.64),title = "Distance plots dim2", legend = false, aspect_ratio = :equal)
for i0 in collect(1:69)
    keep1 = dmat_local[theo_dist_local.==i0,2]
    boxplot!([i0],keep1,linewidth = 0.5, markersize = 0.5, c = RGB(0.1,0.2,0.3))
end

p3e = plot(0:nNodes,0:nNodes,c = RGB(0.64,0.64,0.64),title = "Distance plots dim3", legend = false, aspect_ratio = :equal)
for i0 in collect(1:69)
    keep1 = dmat_local[theo_dist_local.==i0,3]
    boxplot!([i0],keep1,linewidth = 0.5, markersize = 0.5, c = RGB(0.1,0.2,0.3))
end


p3f = plot(0:nNodes,0:nNodes,c = RGB(0.64,0.64,0.64),title = "Distance plots avg", legend = false, framestyle = :box)
dmat_local_meanDim = mean(dmat_local,dims = 2)
for i0 in collect(1:69)
    keep1 = dmat_local_meanDim[theo_dist_local.==i0,1]
    boxplot!([i0],keep1,linewidth = 0.5, markersize = 0.5, c = RGB(0.1,0.2,0.3), ylim = (0,20),frame = true,
        ylabel = "BN Distance", framestyle = :box)
end


p3all = plot(p3a,p3b,p3c,p3d,p3e,p3f, grid = (3,2))

savefig("$(graph_name)_local_boxplots.pdf")

println("Done saving $(graph_name)_local_boxplots.pdf")

Done saving NF_ER_p04_1218_local_boxplots.pdf


In [7]:
R_communities_wu =  R"calculate_communities_wu"
R_calculate_connected_components = R"calculate_connected_components"


simRatio_comms_listR = R_communities_wu(simRatio)   # simRatio for averaged or simRatio_1 for one instance
simRatio_comms = rcopy(simRatio_comms_listR[1])
Q = rcopy(simRatio_comms_listR[2])

node_strength = sum(simRatio,dims = 2)

maximum(simRatio_comms)

[1] "Modularity = 0.0222840827090667for the weighted graph"


17.0

In [8]:
# Save to matlab file for further plotting

ex1 = 1
matwrite("$(graph_name)_simratiocomms.mat", Dict(
	"badj" => badj_array[:,:,ex1],
	"simRatio_comms" => simRatio_comms,
    "node_sim_strength" => node_strength,
    "Q" => Q,
    "simRatio" => simRatio
))


G = nothing
GC.gc()
println("done!")

done!


In [9]:
## Only show the subplots we use in the main figures


l = @layout [ a; b; c{0.6w} d{0.2w}]
pall = plot(p2c,p2d,p3f,p3b, layout = l,size = (1300,1600),tickfont=font(28))


savefig("$(graph_name)_local_all.pdf")