In [7]:
import networkx as nx
import pandas as pd
from scipy.stats import spearmanr
from pathlib import Path

# Directories
graph_dir = Path("../data/networks/projected")
results_dir = Path("../data/results")
results_dir.mkdir(parents=True, exist_ok=True)

repos = ["scikit-learn", "pytorch", "kubernetes", "apache-spark"]
phases = ["pre", "during", "post"]
k_samples = 100 


In [8]:
# --- Load all graphs ---
repo_graphs = {}
for repo in repos:
    repo_graphs[repo] = {}
    for phase in phases:
        file = graph_dir / f"{repo}_{phase}_projected.gml"
        try:
            G = nx.read_gml(file)
            repo_graphs[repo][phase] = G
        except Exception as e:
            print(f"Failed to load {file}: {e}")


In [9]:
# --- Initialize results ---
spearman_results = []
node_retention = []
edge_overlap = []

# --- Analysis ---
for repo, graphs in repo_graphs.items():
    for phase1, phase2 in [("pre", "during"), ("during", "post")]:
        if phase1 not in graphs or phase2 not in graphs:
            continue
        G1 = graphs[phase1]
        G2 = graphs[phase2]

        # Spearman: degree
        deg1 = dict(G1.degree())
        deg2 = dict(G2.degree())
        common_nodes = set(deg1).intersection(deg2)
        vals1 = [deg1[n] for n in common_nodes]
        vals2 = [deg2[n] for n in common_nodes]
        if vals1 and vals2:
            corr, _ = spearmanr(vals1, vals2)
            spearman_results.append({
                "repo": repo,
                "metric": "degree",
                "phase_pair": f"{phase1}_{phase2}",
                "spearman_corr": corr
            })

        # Spearman: approx. betweenness
        bet1 = nx.betweenness_centrality(G1, k=min(k_samples, len(G1)))
        bet2 = nx.betweenness_centrality(G2, k=min(k_samples, len(G2)))
        common_nodes = set(bet1).intersection(bet2)
        vals1 = [bet1[n] for n in common_nodes]
        vals2 = [bet2[n] for n in common_nodes]
        if vals1 and vals2:
            corr, _ = spearmanr(vals1, vals2)
            spearman_results.append({
                "repo": repo,
                "metric": "betweenness",
                "phase_pair": f"{phase1}_{phase2}",
                "spearman_corr": corr
            })

        # Node retention
        nodes1 = set(G1.nodes())
        nodes2 = set(G2.nodes())
        if nodes1:
            retention = len(nodes1.intersection(nodes2)) / len(nodes1)
            node_retention.append({
                "repo": repo,
                "from_phase": phase1,
                "to_phase": phase2,
                "retention_rate": retention
            })

        # Edge overlap
        edges1 = set(map(frozenset, G1.edges()))
        edges2 = set(map(frozenset, G2.edges()))
        if edges1:
            overlap = len(edges1.intersection(edges2)) / len(edges1)
            edge_overlap.append({
                "repo": repo,
                "from_phase": phase1,
                "to_phase": phase2,
                "edge_overlap": overlap
            })

# --- Export or view results ---
df_spearman = pd.DataFrame(spearman_results)
df_retention = pd.DataFrame(node_retention)
df_overlap = pd.DataFrame(edge_overlap)

# Combine all in one display if preferred
combined = pd.concat([df_spearman, df_retention, df_overlap], axis=0, ignore_index=True)

# Save to CSV or view
df_spearman.to_csv("../data/results/spearman_centrality.csv", index=False)
df_retention.to_csv("../data/results/node_retention.csv", index=False)
df_overlap.to_csv("../data/results/edge_overlap.csv", index=False)
combined.to_csv("../data/results/temporal_metrics_summary.csv", index=False)

# Optional: view in notebook
combined

Unnamed: 0,repo,metric,phase_pair,spearman_corr,from_phase,to_phase,retention_rate,edge_overlap
0,scikit-learn,degree,pre_during,0.323894,,,,
1,scikit-learn,betweenness,pre_during,0.457687,,,,
2,scikit-learn,degree,during_post,0.515726,,,,
3,scikit-learn,betweenness,during_post,0.377502,,,,
4,pytorch,degree,pre_during,0.44388,,,,
5,pytorch,betweenness,pre_during,0.301184,,,,
6,pytorch,degree,during_post,0.741406,,,,
7,pytorch,betweenness,during_post,0.678722,,,,
8,kubernetes,degree,pre_during,0.552296,,,,
9,kubernetes,betweenness,pre_during,0.561898,,,,
