In [27]:
using Graphs, Graphs.Experimental.ShortestPaths
using GraphIO

In [28]:
using GraphIO.NET
using Statistics
using DataFrames

file_path = "A1-networks/toy/wheel.net"
graph = loadgraph(file_path, "graph_key", NETFormat())

{9, 16} undirected simple Int64 graph

In [29]:
function read_weighted_net(file_path)
    graph = loadgraph(file_path, "graph_key", NETFormat())
    
    edge_weights = Dict{Tuple{Int,Int}, Float64}()
    open(file_path, "r") do file
        reading_edges = false
        for line in eachline(file)
            if occursin("*Edges", line)
                reading_edges = true
            elseif reading_edges
                edge_data = split(line)
                if length(edge_data) >= 3
                    src, dst, weight = parse(Int, edge_data[1]), parse(Int, edge_data[2]), parse(Float64, edge_data[3])
                    edge_weights[(src, dst)] = weight
                    # Assuming you add edges to your graph here
                    add_edge!(graph, src, dst)  # Add the edge to the graph if not already added
                end
            end
        end
    end
    return graph, edge_weights
end

read_weighted_net (generic function with 1 method)

In [30]:
graph, edge_weights = read_weighted_net("A1-networks/toy/wheel.net")

(SimpleGraph{Int64}(16, [[2, 3, 4, 5, 6, 7, 8, 9], [1, 3, 9], [1, 2, 4], [1, 3, 5], [1, 4, 6], [1, 5, 7], [1, 6, 8], [1, 7, 9], [1, 2, 8]]), Dict((4, 5) => 1.0, (1, 2) => 1.0, (1, 3) => 1.0, (1, 4) => 1.0, (9, 2) => 1.0, (7, 8) => 1.0, (8, 9) => 1.0, (1, 9) => 1.0, (3, 4) => 1.0, (5, 6) => 1.0…))

In [44]:
function network_num_descriptors(graph, verbose = true)
    # Number of nodes and edges
    num_nodes = nv(graph)
    num_edges = ne(graph)

    # Degrees
    degrees = degree(graph)
    min_degree = minimum(degrees)
    max_degree = maximum(degrees)
    avg_degree = mean(degrees)

    # Average clustering coefficient 
    avg_clustering_coefficient = sum(local_clustering_coefficient(graph, vertices(graph)))/nv(graph)

    # Assortativity 
    assort = assortativity(graph)

    # Average path length and Diameter
    # For a large graph, this might be computationally expensive
    avg_path_length = sum(shortest_paths(graph).dists)/(num_nodes*(num_nodes-1))
    diam = Graphs.diameter(graph)  # Same adjustment as above
    
    if verbose
        # Printing the results
        println("Number of nodes: $num_nodes")
        println("Number of edges: $num_edges")
        println("Degree -- Min: $min_degree, Max: $max_degree, Avg: $avg_degree")
        println("Average Clustering Coefficient: $avg_clustering_coefficient")
        println("Assortativity: $assort")
        println("Average Path Length: $avg_path_length")
        println("Diameter: $diam")
    end
    
    return [isa(i, AbstractFloat) ? round(i, digits=4) : i for i in [num_nodes, num_edges, min_degree, max_degree, avg_degree, avg_clustering_coefficient, assort, avg_path_length, diam]]
end

network_num_descriptors (generic function with 2 methods)

In [46]:
network_num_descriptors(graph)

Number of nodes: 9
Number of edges: 16
Degree -- Min: 3, Max: 8, Avg: 3.5555555555555554
Average Clustering Coefficient: 0.6243386243386244
Assortativity: -0.3333333333333333
Average Path Length: 1.5555555555555556
Diameter: 2


9-element Vector{Float64}:
  9.0
 16.0
  3.0
  8.0
  3.5556
  0.6243
 -0.3333
  1.5556
  2.0

In [13]:
function construct_df(data_dict, col_names = [])
    # Initialize an empty DataFrame with appropriate column names
    if isempty(col_names)
        col_names = ["Col$(i)" for i in 1:length(first(values(data_dict)))]
    end
    
    col_names_with_key = ["Index"; col_names]
    
    # Initialize an empty DataFrame with the updated column names
    df = DataFrame(; (Symbol(col_name) => Any[] for col_name in col_names_with_key)...)
    
    # Sort the dictionary by keys to maintain the order
    sorted_keys = sort(collect(keys(data_dict)))

    # Populate the DataFrame, including the key as the first column in each row
    for key in sorted_keys
        row_data = [key; data_dict[key]]
        push!(df, row_data)
    end
    
    return df
end

construct_df (generic function with 2 methods)

In [36]:
function compute_strength(edge_weights, mode="undirected")
    weights_count = Dict{Int, AbstractFloat}()
    if mode == "undirected"
        for (key,value) in edge_weights
            for v in key
                if haskey(weights_count, v)
                    weights_count[v] += value
                else
                    weights_count[v] = value
                end
            end
        end
    elseif mode == "directed"
        for (key,value) in edge_weights
            v = key[1]
            if haskey(weights_count, v)
                weights_count[v] += value
            else
                weights_count[v] = value
            end
        end
    end
    return weights_count
end

compute_strength (generic function with 2 methods)

In [4]:
function nodes_num_descriptors(graph, edge_weights, verbose = false)
    num_nodes = nv(graph)
    node_descriptors = Dict{Int, Vector}()
    degrees = degree(graph)
    strenghts = compute_strength(edge_weights)
    aspls = shortest_paths(graph).dists
    lspls = maximum(collect(adjacency_matrix(graph)); dims = 2)
    b_centralities = betweenness_centrality(graph) 
    e_centralities = eigenvector_centrality(graph)
    pageranks = pagerank(graph)
    for i in vertices(graph)
        deg = degrees[i]
        stren = strenghts[i]
        aspl = sum(aspls[i,:])/(num_nodes-1)
        lspl = lspls[i]
        cl_cf = local_clustering_coefficient(graph, i)
        bc = b_centralities[i] 
        ec = e_centralities[i]
        pr = pageranks[i]
        
        node_descriptors[i] = [deg,stren, aspl, lspl, cl_cf, bc, ec, pr]
    end
    
    df_nodes_descriptors = construct_df(node_descriptors, ["Degree", "Strength", "ASPL", "LSPL", "Clust Coeff", "Betweeness", "Eigenvector", "PageRank"])
    
    if verbose
        println(df_nodes_descriptors)
    end
    
    return df_modes_descriptors
end

nodes_num_descriptors (generic function with 1 method)

In [176]:
nodes_num_descriptors(graph)

[1m9×8 DataFrame[0m
[1m Row [0m│[1m Degree  [0m[1m Strength [0m[1m ASPL    [0m[1m LSPL    [0m[1m Clust Coeff [0m[1m Betweeness [0m[1m Eigenvector [0m[1m PageRank  [0m
     │[90m Float64 [0m[90m Float64  [0m[90m Float64 [0m[90m Float64 [0m[90m Float64     [0m[90m Float64    [0m[90m Float64     [0m[90m Float64   [0m
─────┼──────────────────────────────────────────────────────────────────────────────────────
   1 │     8.0       8.0    1.0        1.0     0.285714   0.571429      0.57735   0.233766
   2 │     3.0       3.0    1.625      1.0     0.666667   0.0178571     0.288675  0.0957793
   3 │     3.0       3.0    1.625      1.0     0.666667   0.0178571     0.288675  0.0957793
   4 │     3.0       3.0    1.625      1.0     0.666667   0.0178571     0.288675  0.0957793
   5 │     3.0       3.0    1.625      1.0     0.666667   0.0178571     0.288675  0.0957793
   6 │     3.0       3.0    1.625      1.0     0.666667   0.0178571     0.288675  0.0957793
   7

## Final Tests

In [1]:
include("NetworkProcessing.jl")

Main.NetworkProcessing

In [2]:
graph, edge_weights = NetworkProcessing.read_network("A1-networks/real/airports_UW.net")
NetworkProcessing.network_num_descriptors(graph)

Number of nodes: 3618
Number of edges: 14142
Degree -- Min: 1, Max: 250, Avg: 7.817578772802653
Average Clustering Coefficient: 0.4957489312349576
Assortativity: 0.04622413053190826
Average Path Length: 4.439594641910406
Diameter: 17


9-element Vector{Float64}:
  3618.0
 14142.0
     1.0
   250.0
     7.8176
     0.4957
     0.0462
     4.4396
    17.0

In [17]:
NetworkProcessing.nodes_num_descriptors(graph, edge_weights)

[1m9×9 DataFrame[0m
[1m Row [0m│[1m Index [0m[1m Degree [0m[1m Strength [0m[1m ASPL  [0m[1m LSPL [0m[1m Clust Coeff [0m[1m Betweeness [0m[1m Eigenvector [0m[1m PageRank  [0m
     │[90m Any   [0m[90m Any    [0m[90m Any      [0m[90m Any   [0m[90m Any  [0m[90m Any         [0m[90m Any        [0m[90m Any         [0m[90m Any       [0m
─────┼───────────────────────────────────────────────────────────────────────────────────────
   1 │ 1.0    8.0     8.0       1.0    1.0   0.285714     0.571429    0.57735      0.233766
   2 │ 2.0    3.0     3.0       1.625  1.0   0.666667     0.0178571   0.288675     0.0957793
   3 │ 3.0    3.0     3.0       1.625  1.0   0.666667     0.0178571   0.288675     0.0957793
   4 │ 4.0    3.0     3.0       1.625  1.0   0.666667     0.0178571   0.288675     0.0957793
   5 │ 5.0    3.0     3.0       1.625  1.0   0.666667     0.0178571   0.288675     0.0957793
   6 │ 6.0    3.0     3.0       1.625  1.0   0.666667     0.0178571   0

In [18]:
using CSV

net_descriptors = Dict{String, Vector}()
for dir in readdir("A1-Networks")
    for file in readdir("A1-networks/$dir")
        graph, edge_weights = NetworkProcessing.read_weighted_net("A1-networks/$dir/$file")
       net_descriptors[file] = NetworkProcessing.network_num_descriptors(graph, false)
    end
end

In [19]:
df = NetworkProcessing.construct_df(net_descriptors, ["# Nodes", "# Edged", "Degree (min)", "Degree (max)", "Degree (avg)", "ACC", "Assort.", "APL", "Diameter"])
CSV.write("ExerciseA_Results.csv", df)
df

Row,Index,# Nodes,# Edged,Degree (min),Degree (max),Degree (avg),ACC,Assort.,APL,Diameter
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any,Any
1,20x2+5x2.net,50.0,404.0,4.0,22.0,16.16,0.9716,0.9186,2.3878,4.0
2,256_4_4_2_15_18_p.net,256.0,2274.0,15.0,23.0,17.7656,0.7331,0.0286,2.7821,5.0
3,256_4_4_4_13_18_p.net,256.0,2299.0,10.0,25.0,17.9609,0.5113,0.0007,2.6511,4.0
4,BA1000.net,1000.0,3990.0,4.0,115.0,7.98,0.0354,-0.0542,3.1833,5.0
5,ER1000k8.net,1000.0,3956.0,1.0,17.0,7.912,0.008,-0.0168,3.5698,6.0
6,ER5000k8.net,5000.0,19980.0,4.0,17.0,7.9918,0.0014,-0.0555,4.3797,6.0
7,PGP.net,10680.0,24316.0,1.0,205.0,4.5536,0.2659,0.2382,7.4855,24.0
8,SF_1000_g2.5.net,1000.0,1905.0,2.0,30.0,3.81,0.0096,0.02,4.6149,10.0
9,SF_1000_g2.7.net,1000.0,1668.0,2.0,24.0,3.336,0.0067,-0.002,5.4688,12.0
10,SF_1000_g3.0.net,1000.0,1517.0,2.0,26.0,3.034,0.0052,-0.0085,5.9651,13.0


In [3]:
graph, edge_weights, vertex_labels = NetworkProcessing.read_network("A1-networks/real/airports_UW.net")
NetworkProcessing.nodes_num_descriptors(graph, edge_weights, vertex_labels)

Row,Index,Degree,Strength,ASPL,LSPL,Clust Coeff,Betweeness,Eigenvector,PageRank
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any,Any,Any,Any
1,AAA,2.0,92.0,4.517,1.0,1.0,0.0,8.96615e-5,0.000146141
2,AAE,6.0,7284.0,3.67763,1.0,0.733333,6.3893e-5,0.00591853,0.000183379
3,AAL,3.0,13071.0,3.91042,1.0,1.0,0.0,0.00332087,0.000104625
4,AAN,11.0,3224.5,3.92784,1.0,0.672727,1.01215e-5,0.0069863,0.000277637
5,AAQ,10.0,1690.0,3.81006,1.0,0.333333,2.00262e-5,0.00656959,0.000274695
6,AAR,6.0,11653.0,3.60077,1.0,0.933333,1.69907e-8,0.00911095,0.000166138
7,AAT,1.0,28.5,4.68261,1.0,0.0,0.0,0.000124775,6.9491e-5
8,AAU,1.0,38.0,5.63202,1.0,0.0,0.0,8.64069e-7,0.000107245
9,AAY,4.0,646.0,4.51783,1.0,0.833333,6.11664e-8,0.000595804,0.000142846
10,ABA,1.0,164.0,4.87255,1.0,0.0,0.0,6.83965e-5,6.97388e-5
