In [2]:

import networkx as nx
import pickle
import pandas as pd
import itertools
import os

In [2]:

graph_files = ["English_Liguori.gpickle", "French_Liguori.gpickle", "Spanish_Liguori.gpickle", "German_Liguori.gpickle"]
graphs = []

for f in graph_files:
    graph_name = os.path.splitext(os.path.basename(f))[0]  # graph_id = filename without extension
    if f.endswith(".gpickle"):
        G = nx.read_gpickle(f)
    else:
        with open(f, "rb") as file:
            G = pickle.load(file)
    graphs.append((graph_name, G))

In [3]:

combined = nx.MultiDiGraph()

for graph_id, G in graphs:
    # Add nodes with source graph info
    for n, data in G.nodes(data=True):
        if n not in combined:
            combined.add_node(n, **data, source_graph=[graph_id])
        else:
            combined.nodes[n]['source_graph'].append(graph_id)
    # Add edges with source graph info
    for u, v, key, data in G.edges(keys=True, data=True):
        new_key = f"{key}_{graph_id}"
        combined.add_edge(u, v, key=new_key, **data, source_graph=graph_id)


In [4]:
len(combined.nodes())

463

In [5]:
len(combined.edges())

7302

In [6]:

from rapidfuzz import fuzz

In [15]:
print(fuzz.ratio("benziger", "benziger brothers"))

64.0


In [7]:
print(fuzz.token_set_ratio("benziger", "benziger brothers")) 

100.0


In [8]:

import re

def clean_name(name: str) -> str:
    # Lowercase
    n = name.lower()
    # Remove common words and punctuation
    n = re.sub(r'\b(ltd|co|inc|press|publishing|house|editions|edition|books?)\b', '', n)
    n = re.sub(r'[^\w\s]', '', n)
    # Collapse whitespace
    n = re.sub(r'\s+', ' ', n).strip()
    return n

from collections import defaultdict

def ask_and_merge_candidates(G, candidates):
    merged_map = {}  # keep track of which nodes were merged
    skip_all = False

    for n1, n2, score in candidates:
        if skip_all:
            break
        if n1 not in G or n2 not in G:
            continue  # node already merged

        print(f"\n‚ùì Possible duplicate:\n   {n1} ‚Üî {n2}  (similarity: {score})")
        choice = input("Merge these? [y]es / [n]o / [s]kip all: ").strip().lower()

        if choice == 's':
            skip_all = True
            continue
        if choice != 'y':
            continue

        # Merge nodes n2 into n1
        merge_nodes(G, n1, n2)
        merged_map[n2] = n1
        print(f"‚úÖ Merged '{n2}' into '{n1}'")

    return merged_map

In [9]:

def find_candidate_duplicates(G, threshold=80):
    node_names = {n: clean_name(str(n)) for n in G.nodes()}
    candidates = []
    nodes_list = list(G.nodes())

    for i in range(len(nodes_list)):
        for j in range(i+1, len(nodes_list)):
            n1, n2 = nodes_list[i], nodes_list[j]
            sg1 = set(G.nodes[n1].get('source_graph', []))
            sg2 = set(G.nodes[n2].get('source_graph', []))
            # only consider nodes from different graphs
            if sg1.isdisjoint(sg2):
                clean1 = node_names[n1]
                clean2 = node_names[n2]
                score = fuzz.token_set_ratio(clean1, clean2)
                if clean1 == clean2 or score >= threshold:
                    candidates.append((n1, n2, score))
    candidates.sort(key=lambda x: x[2], reverse=True)
    return candidates


In [10]:

def merge_nodes(G, keep, remove):
    # merge attributes
    for key, val in G.nodes[remove].items():
        if key not in G.nodes[keep]:
            G.nodes[keep][key] = val
        else:
            keep_val = G.nodes[keep][key]

            # Handle source_graph specially
            if key == 'source_graph':
                G.nodes[keep][key] = list(set(keep_val + val))

            # Handle publications (list of dicts)
            elif key == 'publications' and isinstance(keep_val, list) and isinstance(val, list):
                # Deduplicate based on title + year combo (or customize)
                seen = set()
                merged = []
                for pub in keep_val + val:
                    # Create a simple signature to avoid duplicates
                    title = pub.get('title')
                    years = tuple(pub.get('years', [])) if isinstance(pub.get('years'), list) else pub.get('years')
                    sig = (title, years)
                    if sig not in seen:
                        seen.add(sig)
                        merged.append(pub)
                G.nodes[keep][key] = merged

            # Handle lists of hashable types (e.g., list of strings)
            elif isinstance(keep_val, list) and isinstance(val, list):
                try:
                    G.nodes[keep][key] = list(set(keep_val + val))
                except TypeError:
                    # Fallback to concatenation if not hashable
                    G.nodes[keep][key] = keep_val + val

            # If one value is missing or not list, just override conservatively
            else:
                # You could also define a more sophisticated merge rule here if needed
                if keep_val != val:
                    # store both in a list if they are different
                    if not isinstance(keep_val, list):
                        keep_val = [keep_val]
                    if val not in keep_val:
                        keep_val.append(val)
                    G.nodes[keep][key] = keep_val

    # redirect all edges from 'remove' to 'keep'
    for u, v, key, data in list(G.in_edges(remove, keys=True, data=True)):
        G.add_edge(u if u != remove else keep,
                   v if v != remove else keep,
                   key=key, **data)
    for u, v, key, data in list(G.out_edges(remove, keys=True, data=True)):
        G.add_edge(u if u != remove else keep,
                   v if v != remove else keep,
                   key=key, **data)

    # finally remove the node
    G.remove_node(remove)


In [11]:
candidates = find_candidate_duplicates(combined, threshold=80)

In [26]:
print(f"Found {len(candidates)} possible duplicate publisher pairs.")

Found 21 possible duplicate publisher pairs.


In [12]:

if candidates:
    merged_map = ask_and_merge_candidates(combined, candidates)
    print(f"\n‚úÖ Finished merging. {len(merged_map)} nodes were merged.")


‚ùì Possible duplicate:
   Thomas Richardson & Son ‚Üî thomas  (similarity: 100.0)


Merge these? [y]es / [n]o / [s]kip all:  n



‚ùì Possible duplicate:
   Fr. Pustet ‚Üî Pustet  (similarity: 100.0)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'Pustet' into 'Fr. Pustet'

‚ùì Possible duplicate:
   Thomas Sweeney ‚Üî thomas  (similarity: 100.0)


Merge these? [y]es / [n]o / [s]kip all:  n



‚ùì Possible duplicate:
   J. Murphy ‚Üî Murphy  (similarity: 100.0)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'Murphy' into 'J. Murphy'

‚ùì Possible duplicate:
   Benziger Brothers ‚Üî benziger  (similarity: 100.0)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'benziger' into 'Benziger Brothers'

‚ùì Possible duplicate:
   Benziger Brothers ‚Üî Benziger  (similarity: 100.0)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'Benziger' into 'Benziger Brothers'

‚ùì Possible duplicate:
   mame ‚Üî Mame  (similarity: 100.0)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'Mame' into 'mame'

‚ùì Possible duplicate:
   fischer ‚Üî Fischer  (similarity: 100.0)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'Fischer' into 'fischer'

‚ùì Possible duplicate:
   larumbe ‚Üî Larumbe  (similarity: 100.0)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'Larumbe' into 'larumbe'

‚ùì Possible duplicate:
   deckherr ‚Üî Deckherr  (similarity: 100.0)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'Deckherr' into 'deckherr'

‚ùì Possible duplicate:
   lefevbre ‚Üî Lefevre  (similarity: 93.33333333333333)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'Lefevre' into 'lefevbre'

‚ùì Possible duplicate:
   fischer ‚Üî Fleischer  (similarity: 87.5)


Merge these? [y]es / [n]o / [s]kip all:  n



‚ùì Possible duplicate:
   desclee ‚Üî Descl√©e  (similarity: 85.71428571428571)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'Descl√©e' into 'desclee'

‚ùì Possible duplicate:
   vagner ‚Üî Wagner  (similarity: 83.33333333333333)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'Wagner' into 'vagner'

‚ùì Possible duplicate:
   pillot ‚Üî Pillet  (similarity: 83.33333333333333)


Merge these? [y]es / [n]o / [s]kip all:  n



‚ùì Possible duplicate:
   Walder ‚Üî Walter  (similarity: 83.33333333333333)


Merge these? [y]es / [n]o / [s]kip all:  n



‚ùì Possible duplicate:
   Pillet ‚Üî Piller  (similarity: 83.33333333333333)


Merge these? [y]es / [n]o / [s]kip all:  y


‚úÖ Merged 'Piller' into 'Pillet'

‚ùì Possible duplicate:
   vial ‚Üî Avrial  (similarity: 80.0)


Merge these? [y]es / [n]o / [s]kip all:  n



‚úÖ Finished merging. 12 nodes were merged.


In [28]:

len(combined.nodes())

451

In [28]:
len(combined.edges())

7302

In [13]:

title_map = pd.read_excel("en_spa_fr_ger_liguori_titles_unified_3.xlsx")  # Columns: title_lang1, title_lang2, etc.
language_cols = ['EN', 'SPA', 'FR', 'GER']

In [14]:

title_to_nodes = {}  # key = title, value = list of nodes containing this title
for n, data in combined.nodes(data=True):
    for pub in data['publications']:
        title = pub['title']
        if title not in title_to_nodes:
            title_to_nodes[title] = []
        title_to_nodes[title].append((n, pub))  # store node + publication info

In [15]:

def clean_years(years):
    """
    Convert a list of years to integers, skip invalid/missing entries.
    """
    clean = []
    for y in years:
        if pd.isna(y):
            continue
        try:
            clean.append(int(y))
        except:
            pass  # skip non-numeric strings
    return sorted(clean)


In [16]:

def clean_editions(ed):
    """
    Convert editions to a single numeric value for weighting.
    If it's a list, take the max.
    """
    if isinstance(ed, list):
        # Filter numeric values
        nums = [int(x) for x in ed if pd.notna(x)]
        return max(nums) if nums else 1
    try:
        return int(ed)
    except:
        return 1


In [17]:

# existing_edges = set()

def add_edge_with_attributes(G, publisher_A, publisher_B, title, source_years, target_years,
                             source_translator, target_translator, source_num_editions,
                             target_num_editions, source_place, target_place, weight, relation, key):
    G.add_edge(publisher_A, publisher_B, title=title, source_years=source_years, target_years=target_years,
               source_translator=source_translator, target_translator=target_translator,
               source_num_editions=source_num_editions, target_num_editions=target_num_editions,
               source_place=source_place, target_place=target_place, weight=weight, relation=relation, key=key)


In [18]:

import itertools

def add_cross_language_edges(G, title_map_df, existing_edges):
    """
    Add directed edges between publishers when their publications correspond
    across languages (based on the Excel title mapping).
    """

    # Extract columns for the relevant language titles
    lang_cols = ["EN", "SPA", "FR", "GER"]
    title_dict = {}

    # Build a lookup from each title to its equivalent titles in other languages
    for _, row in title_map_df.iterrows():
        titles = [row[col] for col in lang_cols if pd.notna(row[col])]
        for t in titles:
            title_dict[t] = [other for other in titles if other != t]

    # Loop over all pairs of publishers
    for pub_A, pub_B in itertools.combinations(G.nodes, 2):
        publications_A = G.nodes[pub_A].get('publications', [])
        publications_B = G.nodes[pub_B].get('publications', [])

        # Compare publications across language
        for publication_A in publications_A:
            title_A = publication_A.get('title')
            if title_A not in title_dict:
                continue

            matching_titles = set(title_dict[title_A])  # translations

            for publication_B in publications_B:
                title_B = publication_B.get('title')
                if title_B not in matching_titles:              
                    continue

                years_A = publication_A.get('years', [])
                years_B = publication_B.get('years', [])

                # Ensure translators are lists
                translators_A = publication_A.get('translator', [])
                translators_B = publication_B.get('translator', [])
                if not isinstance(translators_A, list):
                    translators_A = [] if pd.isna(translators_A) else [translators_A]
                if not isinstance(translators_B, list):
                    translators_B = [] if pd.isna(translators_B) else [translators_B]

                # Handle editions and place
                editions_A = publication_A.get('editions', 1)
                editions_B = publication_B.get('editions', 1)
                place_A = publication_A.get('place')
                place_B = publication_B.get('place')

                years_A = clean_years(years_A) if isinstance(years_A, list) else []
                years_B = clean_years(years_B) if isinstance(years_B, list) else []

                # Determine edge direction and type
                edge_counter = 0

                # Co-publication
                if title_A != title_B and len(years_A)>0 and len(years_B)>0 and years_A[0] == years_B[0]:
                    edge_key = f"sync_{pub_A}_{pub_B}_{title_A}_{title_B}_{len(existing_edges)}" # PUT STH ELSE HERE THAN LEN OF EXISTING EDGES; SAY years_A[0]
                    if edge_key not in existing_edges:
                        weight = max(clean_editions(editions_A), clean_editions(editions_B))
                        add_edge_with_attributes(
                            G, pub_A, pub_B, f"{title_A} ‚Üî {title_B}", years_A, years_B,
                            translators_A, translators_B, editions_A, editions_B,
                            place_A, place_B, weight, 'synchronization', edge_key
                        )
                        existing_edges.add(edge_key)

                # Reprint or retranslation (directed)
                elif years_A and years_B:
                    if years_A[0] < years_B[0]:
                        relation_type = (
                            'cross_reprint' if set(translators_A) == set(translators_B)
                            else 'precedence'
                        )
                        edge_key = f"{relation_type}_{pub_A}_{pub_B}_{title_A}_{title_B}_{len(existing_edges)}"
                        if edge_key not in existing_edges:
                            weight = clean_editions(editions_B)
                            add_edge_with_attributes(
                                G, pub_A, pub_B, f"{title_A} ‚Üí {title_B}", years_A, years_B,
                                translators_A, translators_B, editions_A, editions_B,
                                place_A, place_B, weight, relation_type, edge_key
                            )
                            existing_edges.add(edge_key)
                    elif years_B[0] < years_A[0]:
                        relation_type = (
                            'cross_reprint' if set(translators_A) == set(translators_B)
                            else 'precedence'
                        )
                        edge_key = f"{relation_type}_{pub_B}_{pub_A}_{title_B}_{title_A}_{len(existing_edges)}"
                        if edge_key not in existing_edges:
                            weight = clean_editions(editions_A)
                            add_edge_with_attributes(
                                G, pub_B, pub_A, f"{title_B} ‚Üí {title_A}", years_B, years_A,
                                translators_B, translators_A, editions_B, editions_A,
                                place_B, place_A, weight, relation_type, edge_key
                            )
                            existing_edges.add(edge_key)


In [19]:

existing_edges = set(
    f"{u}_{v}_{k}" for u, v, k in combined.edges(keys=True)
)

add_cross_language_edges(combined, title_map, existing_edges)

In [40]:

len(combined.edges())

30497

In [36]:
# SECOND RUN
len(combined.edges())

30449

In [21]:

sync_edges_key = [
    (u, v, k)
    for u, v, k, data in combined.edges(keys=True, data=True)
    if "sync" in str(k).lower()
]

print(f"üîó Number of sync edges (by edge_key): {len(sync_edges_key)}")


üîó Number of sync edges (by edge_key): 261


In [22]:
sync_edges_key[:2]

[('E. Cummiskey',
  'lefort',
  'sync_E. Cummiskey_lefort_The Glories of Mary_Gloires de Marie_7323'),
 ('E. Cummiskey',
  'Cremer',
  'sync_E. Cummiskey_Cremer_The Glories of Mary_Die Herrlichkeiten Mariens_7350')]

In [24]:

sync_edges_key[10]

('John Coyne',
 'Pons y C√≠a',
 'sync_John Coyne_Pons y C√≠a_The Selva : or, a collection of matter for sermons and instructions for ecclesiastical retreats and for private spiritual lectures_Selva de materias predicables e instructivas_8240')

In [25]:

reprint_edges = [
    (u, v, k)
    for u, v, k, data in G.edges(keys=True, data=True)
    if str(data.get("relation_type", "")).lower().startswith("cross_")
]

print(f"üîó Number of reprint edges (by relation_type): {len(reprint_edges)}")


üîó Number of reprint edges (by relation_type): 0


In [1]:
# PICKLE MIGHT BE BETTER THAN GPICKLE
import pickle

with open("Liguori_19th_Century.pkl", 'rb') as f:
    combined1 = pickle.load(f)

In [None]:

# DEGREES


In [None]:

# ANALYZING THE PUBLISHER NETWORK


In [31]:

def normalize_weight(w):
    if isinstance(w, list):
        return w[0] if len(w) > 0 else 0
    return w


In [40]:

degree_data_pub = []

for node in combined1.nodes:

    # Unweighted degrees (must count edges manually on MultiDiGraph)
    unweighted_in_pub = combined1.in_degree(node)
    unweighted_out_pub = combined1.out_degree(node)
    unweighted_total_pub = unweighted_in_pub + unweighted_out_pub

    # Weighted degrees ‚Äî ensure weight is numeric
    def w(data):
        w_ = data.get("weight", 1)
        if isinstance(w_, list):
            return w_[0] if len(w_) > 0 else 0
        return w_

    weighted_in_pub = sum(w(data) for _, _, data in combined1.in_edges(node, data=True))
    weighted_out_pub = sum(w(data) for _, _, data in combined1.out_edges(node, data=True))
    weighted_total_pub = weighted_in_pub + weighted_out_pub

    degree_data_pub.append({
        "Publisher": node,
        "Unweighted_In": int(unweighted_in_pub),
        "Unweighted_Out": int(unweighted_out_pub),
        "Unweighted_Total": int(unweighted_total_pub),
        "Weighted_In": float(weighted_in_pub),
        "Weighted_Out": float(weighted_out_pub),
        "Weighted_Total": float(weighted_total_pub),
        "Total_Editions": combined1.nodes[node].get("total_editions", 0)
    })


In [41]:

degree_df_pub = pd.DataFrame(degree_data_pub)
print(f"‚úÖ Computed degrees for {len(degree_df_pub)} publishers.")


‚úÖ Computed degrees for 451 publishers.


In [42]:

degree_df_pub_sorted = degree_df_pub.sort_values(by="Weighted_Total", ascending=False)
degree_df_pub_sorted.head(20)

Unnamed: 0,Publisher,Unweighted_In,Unweighted_Out,Unweighted_Total,Weighted_In,Weighted_Out,Weighted_Total,Total_Editions
446,Manz,415,408,823,4557.0,866.0,5423.0,0
126,mame,735,411,1146,3828.0,1015.0,4843.0,0
427,Laumann,243,98,341,4334.0,348.0,4682.0,0
68,casterman,284,301,585,3004.0,1021.0,4025.0,0
116,ardant,343,377,720,2389.0,896.0,3285.0,0
134,barbou,402,249,651,2686.0,496.0,3182.0,0
400,Mechitaristes,113,370,483,1796.0,1359.0,3155.0,0
29,Fr. Pustet,280,133,413,2028.0,341.0,2369.0,0
428,Barth,242,13,255,2178.0,16.0,2194.0,0
354,Garnier,267,197,464,1557.0,506.0,2063.0,0


In [43]:

degree_df_pub_sorted.to_excel("liguori_all_publishers_graph_sorted_weighted_degree_analysis.xlsx", index=False)
print("üìä Saved degree data to liguori_all_publishers_graph_sorted_weighted_degree_analysis.xlsx")


üìä Saved degree data to liguori_all_publishers_graph_sorted_weighted_degree_analysis.xlsx


In [47]:

degree_df_pub.head(10)


Unnamed: 0,Publisher,Unweighted_In,Unweighted_Out,Unweighted_Total,Weighted_In,Weighted_Out,Weighted_Total,Total_Editions
0,J. P. Walsh,0,2,2,0.0,2.0,2.0,0
1,P.F. Cunningham,6,7,13,6.0,10.0,16.0,0
2,M.T. Cozans,1,6,7,1.0,10.0,11.0,0
3,Paulist Press,0,0,0,0.0,0.0,0.0,0
4,E. Cummiskey,7,38,45,7.0,137.0,144.0,0
5,John J. Bodkin,1,0,1,1.0,0.0,1.0,0
6,Uknown,165,106,271,330.0,310.0,640.0,0
7,P. J. Kenedy,0,0,0,0.0,0.0,0.0,0
8,J. and C. Mozley,2,0,2,2.0,0.0,2.0,0
9,H. & C. McGrath,7,7,14,7.0,12.0,19.0,0


In [None]:

# CENTRALITIES


In [None]:

# Unweighted centralities
closeness_unw = nx.closeness_centrality(combined1)

In [49]:
closeness_we = nx.closeness_centrality(combined1, distance=lambda u, v, data: 1 / (data.get("weight", 1) + 1e-9))

In [51]:

combined_simple = nx.DiGraph()

for u, v, data in combined1.edges(data=True):
    w = data.get("weight", 1)
    if isinstance(w, list):
        w = w[0] if len(w) else 0

    if combined_simple.has_edge(u, v):
        combined_simple[u][v]["weight"] += w
    else:
        combined_simple.add_edge(u, v, weight=w)


In [52]:


# Betweenness (unweighted)
betweenness_unw = nx.betweenness_centrality(combined_simple, normalized=True, weight=None)
# Betweenness (weighted)
betweenness_we = nx.betweenness_centrality(combined_simple, weight="weight", normalized=True)


In [53]:


try:
    eigen_unw = nx.eigenvector_centrality_numpy(combined_simple)
except nx.NetworkXException:
    eigen_unw = {n: 0 for n in combined_simple.nodes}
    
try:
    eigen_we = nx.eigenvector_centrality_numpy(combined_simple, weight="weight")
except nx.NetworkXException:
    eigen_we = {n: 0 for n in combined_simple.nodes}


In [54]:


centrality_df_pub = degree_df_pub_sorted.copy()

centrality_df_pub["Betweenness_Unweighted"] = centrality_df_pub["Publisher"].map(betweenness_unw)
centrality_df_pub["Betweenness_Weighted"] = centrality_df_pub["Publisher"].map(betweenness_we)
centrality_df_pub["Closeness_Unweighted"] = centrality_df_pub["Publisher"].map(closeness_unw)
centrality_df_pub["Closeness_Weighted"] = centrality_df_pub["Publisher"].map(closeness_we)
centrality_df_pub["Eigenvector_Unweighted"] = centrality_df_pub["Publisher"].map(eigen_unw)
centrality_df_pub["Eigenvector_Weighted"] = centrality_df_pub["Publisher"].map(eigen_we)

# Sort by overall weighted degree or betweenness
centrality_df_pub = centrality_df_pub.sort_values(by="Betweenness_Weighted", ascending=False)
centrality_df_pub.head(20)


Unnamed: 0,Publisher,Unweighted_In,Unweighted_Out,Unweighted_Total,Weighted_In,Weighted_Out,Weighted_Total,Total_Editions,Betweenness_Unweighted,Betweenness_Weighted,Closeness_Unweighted,Closeness_Weighted,Eigenvector_Unweighted,Eigenvector_Weighted
126,mame,735,411,1146,3828.0,1015.0,4843.0,0,0.155292,0.136543,0.52943,0.52943,0.16623,0.374367
446,Manz,415,408,823,4557.0,866.0,5423.0,0,0.080956,0.077474,0.498175,0.498175,0.078704,0.434127
45,Benziger Brothers,573,109,682,1458.0,207.0,1665.0,0,0.079189,0.069246,0.587511,0.587511,0.151874,0.208414
314,Aguado,120,184,304,216.0,547.0,763.0,0,0.013483,0.055748,0.426462,0.426462,0.035329,0.013842
228,gaume,175,77,252,205.0,178.0,383.0,0,0.012787,0.049047,0.449161,0.449161,0.091981,0.015205
68,casterman,284,301,585,3004.0,1021.0,4025.0,0,0.084477,0.048692,0.427117,0.427117,0.073364,0.353846
329,Bossange,59,197,256,62.0,577.0,639.0,0,0.004081,0.04781,0.342113,0.342113,0.010979,0.00173
91,caron,17,221,238,17.0,661.0,678.0,0,0.013481,0.038647,0.33961,0.33961,0.008541,0.002362
37,James Duffy,209,314,523,778.0,890.0,1668.0,0,0.033313,0.03105,0.433095,0.433095,0.044969,0.100751
279,maitre,213,15,228,213.0,19.0,232.0,0,0.000415,0.029504,0.513801,0.513801,0.157491,0.017275


In [55]:

centrality_df_pub.to_excel("liguori_all_publishers_graph_sorted_weighted_betweenness_and_other_centralities_and_degrees.xlsx", index=False)

In [57]:

# Define all centrality columns to sort by
centrality_measures_pub = [
    "Closeness_Unweighted",
    "Closeness_Weighted",
    "Betweenness_Unweighted",
    "Betweenness_Weighted",
    "Eigenvector_Unweighted",
    "Eigenvector_Weighted"
]

# Sort and display top 10 for each
for metric in centrality_measures_pub:
    print(f"\nüîù Top 20 Publishers by {metric}:")
    display(
        centrality_df_pub[["Publisher", metric]]
        .sort_values(by=metric, ascending=False)
        .head(20)
        .reset_index(drop=True)
    )
    


üîù Top 20 Publishers by Closeness_Unweighted:


Unnamed: 0,Publisher,Closeness_Unweighted
0,Benziger Brothers,0.587511
1,Calleja,0.54179
2,Eberle & Rickenbach,0.537419
3,mame,0.52943
4,Sch√∂ningh,0.528425
5,Avrial,0.519552
6,Ostendorff,0.519552
7,maitre,0.513801
8,desclee,0.510037
9,Librer√≠a Religiosa de Enrique Hern√°ndez,0.506539



üîù Top 20 Publishers by Closeness_Weighted:


Unnamed: 0,Publisher,Closeness_Weighted
0,Benziger Brothers,0.587511
1,Calleja,0.54179
2,Eberle & Rickenbach,0.537419
3,mame,0.52943
4,Sch√∂ningh,0.528425
5,Ostendorff,0.519552
6,Avrial,0.519552
7,maitre,0.513801
8,desclee,0.510037
9,Librer√≠a Religiosa de Enrique Hern√°ndez,0.506539



üîù Top 20 Publishers by Betweenness_Unweighted:


Unnamed: 0,Publisher,Betweenness_Unweighted
0,mame,0.155292
1,casterman,0.084477
2,Manz,0.080956
3,Benziger Brothers,0.079189
4,lefort,0.0405
5,fischer,0.037863
6,James Duffy,0.033313
7,Laumann,0.028817
8,perisse,0.024203
9,J.B. Piet,0.021475



üîù Top 20 Publishers by Betweenness_Weighted:


Unnamed: 0,Publisher,Betweenness_Weighted
0,mame,0.136543
1,Manz,0.077474
2,Benziger Brothers,0.069246
3,Aguado,0.055748
4,gaume,0.049047
5,casterman,0.048692
6,Bossange,0.04781
7,caron,0.038647
8,James Duffy,0.03105
9,maitre,0.029504



üîù Top 20 Publishers by Eigenvector_Unweighted:


Unnamed: 0,Publisher,Eigenvector_Unweighted
0,Eberle & Rickenbach,0.204011
1,Sch√∂ningh,0.183572
2,Ostendorff,0.176079
3,mame,0.16623
4,Librer√≠a Religiosa de Enrique Hern√°ndez,0.164569
5,zech,0.164393
6,maitre,0.157491
7,Baena,0.155724
8,Benziger Brothers,0.151874
9,L√∂wenberg,0.150961



üîù Top 20 Publishers by Eigenvector_Weighted:


Unnamed: 0,Publisher,Eigenvector_Weighted
0,Manz,0.434127
1,mame,0.374367
2,casterman,0.353846
3,Laumann,0.234039
4,barbou,0.228466
5,Barth,0.222591
6,ardant,0.212107
7,Benziger Brothers,0.208414
8,Fr. Pustet,0.176759
9,rittler,0.153145
