Calculate the Centrality Metrics for the Cartels where we have at least 80% of the investor files

In [39]:
import pandas as pd
import numpy as np
import os
import networkx as nx

Identify cartels with at least 80% of companies have a corresponding file

In [40]:
cartel_df = pd.read_excel('./transformed_data/cartel_networks/cartel_df_herfindal.xlsx')

cartel_df = cartel_df[['case', 'cartel', 'number_of_firms', 'number_cartel_firms', 'number_of_firms_with_file', 'start', 'end', 'duration', 'entity_name', 'file_name']]

# get rows where number_of_firms_with_file is at least 50% of number_of_firms
cartel_df_filtered = cartel_df[cartel_df['number_of_firms_with_file'] >= 0.8 * cartel_df['number_of_firms']]

cartel_df_filtered.head(10)

Unnamed: 0,case,cartel,number_of_firms,number_cartel_firms,number_of_firms_with_file,start,end,duration,entity_name,file_name
81,37444,SAS Maersk Air and Sun-Air,1,2,1,1999,2001,3,SAS AB,SAS AB
140,38432,Professional videotape,3,8,3,2000,2001,2,Hitachi Maxell Limited,Hitachi Maxell
141,38432,Professional videotape,3,8,3,2000,2001,2,Sony Corporation,Sony
142,38432,Professional videotape,3,8,3,2000,2001,2,Fujifilm Holdings Corp.,Fujifilm
165,38662,Gaz de France/ENEL+ENI,1,2,1,2000,2003,4,ENI,ENI
231,38511,DRAM,9,10,9,1999,2001,3,Micron,Micron
232,38511,DRAM,9,10,9,1999,2001,3,Infineon,Infineon
233,38511,DRAM,9,10,9,1999,2001,3,Samsung,samsung
234,38511,DRAM,9,10,9,2000,2001,2,Elpida,Elpida
235,38511,DRAM,9,10,9,1999,2001,3,NEC Corporation,NEC


filter the dataframe further to select cartels with more than 1 investor file, because of star-shaped graph problem

In [41]:
# filer out rows where 'number_of_firms' is 1 
cartel_df_filtered = cartel_df_filtered[cartel_df_filtered['number_of_firms'] > 1]


In [42]:
# get distinct file_names
file_names = cartel_df_filtered['file_name'].unique()

df_all = pd.DataFrame()

# get each file in the directory
for file in os.listdir("./transformed_data/base/shareholders"):
    if file.endswith(".xlsx") and file[:-5] in file_names:
        # read the file
        file_path = os.path.join("./transformed_data/base/shareholders", file)
        
        df = pd.read_excel(file_path)

        # union all the dataframes
        df_all = pd.concat([df_all, df], ignore_index=True)

df_all_company_names = df_all['company_name'].unique()
print(df_all_company_names)

['Aalberts' 'AU Optronics' 'Chimei' 'Elpida' 'Fujifilm' 'Hannstar Display'
 'Henkel' 'hitachi ltd' 'Hitachi Maxell' 'IMI PLC' 'Infineon' 'LG Display'
 'Micron' 'Mitsubishi' 'Mueller Industries' 'Nanya Tech' 'NEC'
 'procter gamble' 'samsung' 'Sony' 'Toshiba' 'Unilever NV' 'Unilever PLC']


In [None]:
cartel_df_filtered_with_sh = cartel_df_filtered.merge(df_all, left_on='file_name', right_on='company_name', how='left', suffixes=('', '_y'), indicator=True)

cartel_df_filtered_with_sh.head(10)

In [None]:

# get all perc_os columns in the dataframe
perc_os_columns = [col for col in cartel_df_filtered_with_sh.columns if 'perc_os' in col]

# count unique company names
cartel_names = cartel_df_filtered_with_sh['cartel'].unique()
print(cartel_names)

# order dataframe by cartel
cartel_df_filtered_with_sh = cartel_df_filtered_with_sh.sort_values(by=['cartel'])

# split the dataframe into multiple dataframes based on cartel names
cartel_dfs = []
for cartel_name in cartel_names:
    cartel_dfs.append(cartel_df_filtered_with_sh[cartel_df_filtered_with_sh['cartel'] == cartel_name].copy())



# for each column in perc_os_columns
for col in perc_os_columns:

    # Get year from the column name
    year = col[-4:]

    for single_cartel_df in cartel_dfs:
        
        # get the cartel name
        cartel_name = single_cartel_df['cartel'].iloc[0]

        for index, row in single_cartel_df.iterrows():

            # Define a new graph for the year e.g. 1997 and cartel name
            G = nx.Graph()

            # if value is not null in row[col]
            if pd.notnull(row[col]):
                
                # add node with company name
                G.add_node(row['company_name'], type='Company')
                # add investor name
                G.add_node(row['investor_name'], type='Investor')
                # add edge between cartel and company
                G.add_edge(row['company_name'], row['investor_name'], weight=row[col])

        # save the graph to a file
        nx.write_graphml(G, f"./transformed_data/reduced_connected_shareholder_networks/shareholder_network_{cartel_name}_{year}.graphml", named_key_ids=True, infer_numeric_types=True)



['Professional videotape' 'DRAM' 'LCD panel producers ' 'Fittings'
 'Consumer Detergents']
