In [4]:
import pandas as pd

# Load Gephi-ready CSVs
edges = pd.read_csv("song_edges_emotion_similarity.csv")
nodes = pd.read_csv("song_nodes_emotion_space.csv")

# Compute (unweighted) degree: count of incident edges for each node
degree_series = (
    pd.concat([edges["source"], edges["target"]])
      .value_counts()
      .rename("degree")
)

# Compute weighted degree: sum of weights of incident edges
weighted_series = (
    pd.concat([
        edges[["source", "weight"]].rename(columns={"source": "id"}),
        edges[["target", "weight"]].rename(columns={"target": "id"}),
    ])
    .groupby("id")["weight"]
    .sum()
    .rename("weighted_degree")
)

# Join with node attributes
stats = nodes.set_index("id").join([degree_series, weighted_series])
stats["degree"] = stats["degree"].fillna(0).astype(int)
stats["weighted_degree"] = stats["weighted_degree"].fillna(0.0)

num_nodes = len(stats)
num_edges = len(edges)
avg_degree = stats["degree"].mean()

# Most and least connected nodes
most_connected = stats.sort_values("degree", ascending=False).iloc[0]
least_connected_non_iso = (
    stats[stats["degree"] > 0]
    .sort_values("degree", ascending=True)
    .iloc[0]
    if (stats["degree"] > 0).any()
    else None
)
num_isolated = int((stats["degree"] == 0).sum())

print(f"Nodes: {num_nodes}")
print(f"Edges: {num_edges}")
print(f"Average degree: {avg_degree:.2f}")
print()
print("Most connected node (by degree):")
print(f"  id: {most_connected.name}")
print(f"  title: {most_connected['label']}")
print(f"  artist: {most_connected['artist']}")
print(f"  degree: {most_connected['degree']}")
print(f"  weighted_degree: {most_connected['weighted_degree']:.3f}")

if least_connected_non_iso is not None:
    print()
    print("Least connected node with at least 1 edge:")
    print(f"  id: {least_connected_non_iso.name}")
    print(f"  title: {least_connected_non_iso['label']}")
    print(f"  artist: {least_connected_non_iso['artist']}")
    print(f"  degree: {least_connected_non_iso['degree']}")
    print(f"  weighted_degree: {least_connected_non_iso['weighted_degree']:.3f}")

print()
print(f"Isolated nodes (degree = 0): {num_isolated}")



Nodes: 1560
Edges: 29042
Average degree: 37.23

Most connected node (by degree):
  id: drake - Under Ground Kings
  title: Under Ground Kings
  artist: drake
  degree: 200
  weighted_degree: 198.802

Least connected node with at least 1 edge:
  id: drake - I Could Never
  title: I Could Never
  artist: drake
  degree: 1
  weighted_degree: 0.991

Isolated nodes (degree = 0): 161


In [5]:
from pathlib import Path

# Derive additional graph facts from `stats` and `edges`
zero_degree = stats[stats["degree"] == 0]
one_degree = stats[stats["degree"] == 1]
top3 = stats.sort_values("degree", ascending=False).head(3)

num_nodes = len(stats)
num_edges = len(edges)

out_path = Path("graph_facts.txt")
with out_path.open("w", encoding="utf-8") as f:
    f.write("Graph statistics\n")
    f.write("=================\n\n")
    f.write(f"Total nodes: {num_nodes}\n")
    f.write(f"Total edges: {num_edges}\n")
    f.write(f"Average degree: {stats['degree'].mean():.2f}\n")
    f.write(f"Isolated nodes (degree = 0): {len(zero_degree)}\n")
    f.write(f"Nodes with degree = 1: {len(one_degree)}\n\n")

    f.write("Top 3 most connected nodes (by degree):\n")
    for row in top3.itertuples():
        f.write(
            f"  - id: {row.Index} | title: {row.label} | artist: {row.artist} | "
            f"degree: {row.degree} | weighted_degree: {row.weighted_degree:.3f}\n"
        )

    f.write("\nNodes with degree = 0 (isolated):\n")
    for row in zero_degree.itertuples():
        f.write(f"  - id: {row.Index} | title: {row.label} | artist: {row.artist}\n")

    f.write("\nNodes with degree = 1:\n")
    for row in one_degree.itertuples():
        f.write(
            f"  - id: {row.Index} | title: {row.label} | artist: {row.artist} | "
            f"weighted_degree: {row.weighted_degree:.3f}\n"
        )

print(f"Wrote detailed stats to {out_path.resolve()}")



Wrote detailed stats to /Users/iamwafula/GitHub/kendickLamarVDrake/graph_facts.txt
