Import relevant libraries for network and data handling / transformation

In [27]:
import networkx as nx
from pyvis.network import Network
import pandas as pd

Read the cartel file and print some metrics to get an overview of the data

In [28]:
# Read Excel File which contains data on cartels and their corresponding firms
cartel_df = pd.read_excel("./transformed_data/base/cartels/cartels.xlsx")

# Count number of unique case numbers, cartels and entities (companies)
number_of_cases = cartel_df["case"].nunique()
number_of_cartels = cartel_df["cartel"].nunique()
number_of_entities = cartel_df["entity_name"].nunique()

print("Number of cases: ", number_of_cases)
print("Number of Cartels: ", number_of_cartels)
print("Number of companies: ", number_of_entities)
print("Number of Records in Table: ", len(cartel_df))

# Print head as an overview over the dataframe
cartel_df.head(20)

Number of cases:  76
Number of Cartels:  75
Number of companies:  229
Number of Records in Table:  292


Unnamed: 0,case,cartel,entity_name,key_iustin,key_invented,start,end,duration,number_of_firms,number_cartel_firms,sector,subsector,cartel_classification
0,31865,PVC (II),Elf Aquitaine SA,C000007728,31865C000007728,1981,1994,14,6,12.0,3,3,"1.2, 1.6"
1,31865,PVC (II),BASF SE,C000008351,31865C000008351,1981,1994,14,6,12.0,3,3,"1.2, 1.6"
2,31865,PVC (II),Koninklijke DSM,C000013105,31865C000013105,1981,1994,14,6,12.0,3,3,"1.2, 1.6"
3,31865,PVC (II),ENI,C901505544,31865C901505544,1981,1994,14,6,12.0,3,3,"1.2, 1.6"
4,31865,PVC (II),Wacker Chemie AG,C901711026,31865C901711026,1981,1994,14,6,12.0,3,3,"1.2, 1.6"
5,31865,PVC (II),Royal Dutch Shell,C901842957,31865C901842957,1981,1994,14,6,12.0,3,3,"1.2, 1.6"
6,31906,FLAT GLASS,PPG Industries Inc,C000003476,31906C000003476,1982,1987,6,2,3.0,3,4,"1.2, 1.6, 1.7"
7,31906,FLAT GLASS,Saint Gobain,C000007729,31906C000007729,1982,1987,6,2,3.0,3,4,"1.2, 1.6, 1.7"
8,32800,Quantel International - continuum / Quantel SA,Hoya Corporation,C000087990,32800C000087990,1986,1992,7,1,2.0,3,5,1.6
9,33016,ANSAC,FMC Corp.,C000001711,33016C000001711,1984,1990,7,4,7.0,3,3,1.5


Prepare the different nodes and edges for the graph creation

In [29]:
# Create entity list and set
entity_list = []
entity_set = set()  # Used to check if the firm/company was already added

# Create cartel list and set
cartel_list = []
cartel_set = set() # Used for checking if the cartel was already added

# Create an empty list for the edges between cartels and firms
edge_list = []

for _, row in df.iterrows(): # _ is the index and will not be used
    
    # Entities 
    if row["entity_name"] not in entity_set:

        # add firm to entity list as a dictionary
        entity_list.append({
            "entity_name": row["entity_name"],
            "key_iustin": row["key_iustin"],
            "key_invented": row["key_invented"],
        })

        # Add firm to set for faster lookup
        entity_set.add(row["entity_name"])  

    # Cartels
    if row["cartel"] not in cartel_set:

        # Add cartel to cartel list as a dictionary
        cartel_list.append({
            "cartel": row["cartel"],
            "case": row["case"],
            "start": row["start"],
            "end": row["end"],
            "duration": row["duration"],
            "number_of_firms": row["number_of_firms"],
            "number_of_cartel_firms": row["number_cartel_firms"],
            "sector": row["sector"],
            "subsector": row["subsector"],
            "cartel_classification": row["cartel_classification"]
        })

        # Add cartel to set for faster lookup
        cartel_set.add(row["cartel"])  

    #Edges
    edge_list.append((row["entity_name"], row["cartel"]))


print("Number of Firm-Nodes in List: ", len(entity_list))
print("Number of Cartel-Nodes in List: ", len(cartel_list))
print("Number of Edges: ", len(edge_list))

Number of Firm-Nodes in List:  229
Number of Cartel-Nodes in List:  75
Number of Edges:  292


Create the graph with the previously created lists

In [30]:
# Create a simple graph
G = nx.Graph()

# Add firm nodes with type "company" and bipartite 0 for left sided alignment in visualization
for entity in entity_list:
    G.add_node(entity["entity_name"], type="Company", bipartite=0, key_iustin=entity["key_iustin"], key_iustin_long=entity["key_invented"])

# Add cartel nodes with type "cartel" and bipartite 1 for right sided alignment in visualization
for cartel in cartel_list:
    G.add_node(cartel["cartel"], type="Cartel", bipartite=1, case=cartel["case"], startyear=cartel["start"], endyear=cartel["end"], 
               duration=cartel["duration"], number_of_firms=cartel["number_of_firms"], number_of_cartel_firms=cartel["number_of_cartel_firms"], 
               sector=cartel["sector"], subsector=cartel["subsector"], classification=cartel["cartel_classification"])

# Add edges
G.add_edges_from(edge_list)

Save it for later visualization

In [31]:
# Save the graph as a graphml file for later visualization
nx.write_graphml(G, f"./transformed_data/cartel_networks/cartel_network.graphml", named_key_ids=True, infer_numeric_types=True)

Calculate centrality metrics for cartel and company nodes

In [32]:
# Calculate centrality measures for all nodes
centrality_measures = {
    "degree_centrality": nx.degree_centrality(G),
    "betweenness_centrality": nx.betweenness_centrality(G),
    "closeness_centrality": nx.closeness_centrality(G),
    "eigenvector_centrality": nx.eigenvector_centrality(G),
    "pagerank": nx.pagerank(G),
    "harmonic_centrality": nx.harmonic_centrality(G),
    "degree": dict(G.degree())
}

# Create seperate lists for company/firm and cartel centralities
company_centrality = []
cartel_centrality = []

# Loop through the nodes in the graph
for node, attributes in G.nodes(data=True):

    # Get the node type (Cartel or Company)
    node_type = attributes["type"]

    # Create dictionary with name of company or cartel and their corresponding centrality measures
    centrality_data = {
        "name": node,
        "degree_centrality": centrality_measures["degree_centrality"].get(node, 0),
        "betweenness_centrality": centrality_measures["betweenness_centrality"].get(node, 0),
        "closeness_centrality": centrality_measures["closeness_centrality"].get(node, 0),
        "eigenvector_centrality": centrality_measures["eigenvector_centrality"].get(node, 0),
        "pagerank": centrality_measures["pagerank"].get(node, 0),
        "harmonic_centrality": centrality_measures["harmonic_centrality"].get(node, 0),
        "degree": centrality_measures["degree"].get(node, 0)
    }

    if node_type == "Company":
        company_centrality.append(centrality_data)
    elif node_type == "Cartel":
        cartel_centrality.append(centrality_data)

# Create dataframes for cartel and company centrality
df_cartel_centrality = pd.DataFrame(cartel_centrality).add_prefix('cartel_')
df_company_centrality = pd.DataFrame(company_centrality).add_prefix('company_')

df_cartel_centrality.head()



Unnamed: 0,cartel_name,cartel_degree_centrality,cartel_betweenness_centrality,cartel_closeness_centrality,cartel_eigenvector_centrality,cartel_pagerank,cartel_harmonic_centrality,cartel_degree
0,PVC (II),0.019802,0.035391,0.080578,0.0001408994,0.007593,30.213095,6
1,FLAT GLASS,0.016502,0.000229,0.016973,7.140057e-13,0.006984,5.5,5
2,Quantel International - continuum / Quantel SA,0.0033,0.0,0.0033,4.31736e-37,0.003289,1.0,1
3,ANSAC,0.013201,0.004002,0.056937,7.167807e-05,0.005473,21.963492,4
4,Ciment Cement,0.029703,0.001137,0.028524,3.739372e-09,0.01491,9.833333,9


In [33]:
# Merge the cartel_centrality dataframe
df_merged = pd.merge(cartel_df, df_cartel_centrality, left_on="cartel", right_on="cartel_name", how="left")

# Merge the company_centrality dataframe
df_merged = pd.merge(df_merged, df_company_centrality, left_on="entity_name", right_on="company_name", how="left")

# Drop the company_name and cartel_name column (redundant)
df_merged.drop(columns=["company_name"], inplace=True)
df_merged.drop(columns=["cartel_name"], inplace=True)

# Save merged data to excel
df_merged.to_excel("./transformed_data/cartel_networks/cartel_data_with_centrality_measures.xlsx", index=False)