# Generating Metrics
This code generates a lot of the evaluation metrics for a graph and give a value for them.

In [19]:
import os
import networkx as nx
import pandas as pd

In [20]:
# Compute global network metrics for graph G
def compute_metrics(G):
    metrics = {}
    
    # Clustering coefficient (average)
    metrics['Average Clustering Coefficient'] = nx.average_clustering(G)
    
    # Average path length (only if graph is connected)
    if nx.is_connected(G):
        metrics['Average Path Length'] = nx.average_shortest_path_length(G)
    else:
        metrics['Average Path Length'] = 'Graph is not connected'
    
    return metrics

In [21]:
# Path to folder containing edge and node lists
data_folder = "networks"  
output_file = "network_metrics.csv"

# Initialize results list
results = []

# Get all edge list files in the folder
edge_files = sorted([f for f in os.listdir(data_folder) if "edge_list" in f and f.endswith(".csv")])

In [22]:
edge_files

["'blues'_edge_list.csv",
 "'classical'_edge_list.csv",
 "'country'_edge_list.csv",
 "'disco'_edge_list.csv",
 "'hiphop'_edge_list.csv",
 "'jazz'_edge_list.csv",
 "'metal'_edge_list.csv",
 "'pop'_edge_list.csv",
 "'reggae'_edge_list.csv",
 "'rock'_edge_list.csv",
 'null_edge_list.csv']

In [23]:
# Iterate through edge list files
for edge_file in edge_files:
    graph_id = edge_file.split("_")[0]  # Extract identifier
    node_file = f"nodes_{graph_id}.csv"  # Corresponding node file

    # Read node list (if it exists)
    node_path = os.path.join(data_folder, node_file)
    nodes = pd.read_csv(node_path)["Node"].tolist() if os.path.exists(node_path) else []

    # Read edge list
    edge_path = os.path.join(data_folder, edge_file)
    edges = pd.read_csv(edge_path)[["Node1", "Node2"]].values.tolist()

    # Create graph
    G = nx.Graph()
    G.add_nodes_from(nodes)  # Ensure all nodes are included
    G.add_edges_from(edges)

    # Compute global metrics
    metrics = compute_metrics(G)
    clustering = metrics['Average Clustering Coefficient']
    path_length = metrics['Average Path Length']

    # Append results
    results.append([graph_id, len(G.nodes), len(G.edges), clustering, path_length])

# Convert to DataFrame and save
df = pd.DataFrame(results, columns=["Graph_ID", "Num_Nodes", "Num_Edges", "Clustering_Coefficient", "Avg_Path_Length"])
df.to_csv(output_file, index=False)

print(f"Saved network metrics to {output_file}")

Saved network metrics to network_metrics.csv
