# Ownership-Networks

## Data Import

In [1]:
import pandas as pd
import numpy as np

In [2]:
codebook = 'new'

In [3]:
file_name = '3030-own'
df = pd.read_csv(f'Data/{file_name}.csv',
                     encoding='unicode_escape')

df.rename(columns={'Company name Latin alphabet': 'Comp_Name',
                   'Country ISO code': 'Country',
                   'NACE Rev. 2, core code (4 digits)': 'NACE',
                   'BvD ID number': 'BvD',
                   'Operating revenue (Turnover)\r\nth USD Last avail. yr': 'TURN',
                   'Cash flow\r\nth USD Last avail. yr': 'CF',
                   'Total assets\r\nth USD Last avail. yr': 'TASS',
                   'Shareholders funds\r\nth USD Last avail. yr': 'EC',
                   'Number of employees\r\nLast avail. yr': 'EM',
                   'Shareholder - BvD ID number': 'Sha_BvD',
                   'Shareholder - Direct %': 'Sha_%',
                   'Subsidiary - BvD ID number': 'Sub_BvD',
                   'Subsidiary - Direct %': 'Sub_%'
                  },
            inplace=True)

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,Comp_Name,Country,NACE,BvD,TURN,CF,TASS,EC,EM,Sha_BvD,Sha_%,Sub_BvD,Sub_%
0,1.0,AIRBUS SE,NL,3030.0,NL24288945,59 736 738,7 471 764,121 241 465,10 743 847,126 495,DE6070642164,10.9,LULB51757,100
1,,,,,,,,,,,FR318186756,10.9,DEFEB14827,100
2,,,,,,,,,,,US149114345L,9.63,GB02449259,100
3,,,,,,,,,,,ESQ2820015B,4.12,NL28086907,100
4,,,,,,,,,,,YY*4000000169624,3.02,NL63545128,100


In [32]:
# Codebook
# If it has already been generated
if codebook == 'old':   
    cdbk = pd.read_csv(f'Data\\Codebook.csv',index_col=0)
    # Creating dictionary so that it can be used for index in the main dataframe
    di_cdbk = dict([(bvd, index) for index, bvd in zip(cdbk.index, cdbk.BvD)])
elif codebook == 'new':
    # Add Targi companies
    count = 1
    di_cdbk = {}
    df_temp = df[['Comp_Name', 'BvD']].dropna().drop_duplicates(subset=['Comp_Name', 'BvD']).sort_values(by='Comp_Name')
    for company, bvd_code in zip(df_temp['Comp_Name'], df_temp['BvD']):
        index = f'Targi_{count}'
        di_cdbk[index] = [company, bvd_code]
        count += 1
        
    # Add Neigh companies
    count = 1
    df_temp = set(list(df['Sha_BvD'].unique()) + list(df['Sub_BvD'].unique()))
    df_temp2 = [value[1] for value in di_cdbk.values()]
    df_temp = [neigh for neigh in df_temp if neigh not in df_temp2]    

In [34]:
df_temp

[nan,
 'HK0000226591',
 'PL006746410',
 'FR*110330653865',
 'ZZ*J00M*131154',
 'US327897497L',
 'US350015603L',
 'LT*690110907',
 'US169344864L',
 'US336496328L',
 'US143712402L',
 'FR913616413',
 'WW*210312561',
 'ZZ*110292372202',
 'IE523577',
 'IE489499',
 'MA*110337119264',
 'PL250978025',
 'US247300307L',
 'ES2-207054',
 'BG179663043',
 'GB00926916',
 'GB00034871',
 'ESB91864116',
 'GB03135337',
 'FR*81565651',
 'DE2011241833',
 'WW*211354735',
 'US2-131599',
 'IE455484',
 'KY2-88294',
 'SA2-88493',
 'MXIME650311C66',
 'YY*110320526848',
 'CN*110140437947',
 'CHCHE105927761',
 'WW*338732248',
 'PLPP70072112079SPL',
 'IE605126',
 'FR514459676',
 'ITPPGMBMRZ60E31A479G',
 'IT06958541218',
 'FR311079701',
 'IN0007180674',
 'IE621431',
 'UY*110337112528',
 'SG198901923R',
 'US*110187921615',
 'US130921671L',
 'FR2-88331',
 'IN0001487721',
 'US149173823L',
 'IE*110344871599',
 'CA256431908L',
 'US276544025L',
 'HK0000044163',
 'WW*923024267',
 'US247029848L',
 'US132999902L',
 'IE144820

In [7]:
len(di_cdbk.keys())

1169

## Creating separate dataframe with economic attributes

In [8]:
import copy
df_econ = copy.copy(df[['Comp_Name', 'Country', 'NACE', 'BvD', 'TURN', 'CF',
       'TASS', 'EC', 'EM']]).drop_duplicates(subset=['BvD'], keep='first').dropna()

df_econ['index'] = df_econ['BvD'].map(di_cdbk)
df_econ.set_index('index', inplace=True)
df_econ.head()

  return Index(sequences[0], name=names)


Unnamed: 0_level_0,Comp_Name,Country,NACE,BvD,TURN,CF,TASS,EC,EM
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
,AIRBUS SE,NL,3030.0,NL24288945,59 736 738,7 471 764,121 241 465,10 743 847,126 495
,AIRBUS,FR,3030.0,FR383474814,46 503 021,1 450 146,67 241 069,11 035 552,8 944
,BAE SYSTEMS PLC,GB,3030.0,GB01470151,26 837 099,3 328 945,36 453 156,10 301 190,82 000
,SAFRAN,FR,3030.0,FR562082909,18 434 203,1 792 906,47 247 554,15 029 606,77 008
,LEONARDO S.P.A.,IT,3030.0,IT00401990585,16 651 490,1 258 319,32 142 064,7 310 935,50 413


In [9]:
df_econ.replace('n.a.', np.nan, inplace=True)

In [10]:
# MISSING DATA in %
df_econ.isna().sum()/len(df_econ)*100

Comp_Name     0.000000
Country       0.000000
NACE          0.000000
BvD           0.000000
TURN         17.108640
CF           27.373824
TASS          5.645851
EC            5.560308
EM           33.190761
dtype: float64

In [11]:
len(df_econ)

1169

In [12]:
# Treats the NA in column with company name and BvD-code in the main dataframe
df['Comp_Name'].fillna(method='ffill', inplace=True)
df['BvD'].fillna(method='ffill', inplace=True)

## Creating separate dataframes for links with shareholders and subsidiaries

In [13]:
import copy
df_sub = copy.copy(df[['Comp_Name', 'BvD','Sub_BvD', 'Sub_%']].dropna(subset='Sub_BvD')).drop('Comp_Name', axis=1)
df_sha = copy.copy(df[['Comp_Name', 'BvD','Sha_BvD', 'Sha_%',]].dropna(subset='Sha_BvD')).drop('Comp_Name', axis=1)

### Subsidiaries Dataframe

In [14]:
df_sub.head()

Unnamed: 0,BvD,Sub_BvD,Sub_%
0,NL24288945,LULB51757,100
1,NL24288945,DEFEB14827,100
2,NL24288945,GB02449259,100
3,NL24288945,NL28086907,100
4,NL24288945,NL63545128,100


#### Checking for non-numeric values

In [15]:
df_sub[(df_sub['Sub_%'].str.isalpha())]['Sub_%'].value_counts()

WO    54
MO    13
NG     9
VE     3
Name: Sub_%, dtype: int64

#### Changing strings into numeric values

In [16]:
# Converting entire cells
df_sub['Sub_%'].replace({
    'WO': 100,
    'MO': 51,
    'NG': 0.01,
    'VE': 0.01,
    '-': 0
}, 
                        inplace=True)

# Removing special signs from values
import regex
df_sub['Sub_%'] = df_sub['Sub_%'].replace('[<>]+','',regex=True).astype(float)

In [17]:
# Renames columns
df_sub.insert(1, 'source', df_sub['BvD'].map(di_cdbk))
df_sub.insert(2, 'target', df_sub['Sub_BvD'].map(di_cdbk))
df_sub.rename(columns={'Sub_%': 'weight'}, inplace=True)
df_sub.drop(['BvD', 'Sub_BvD'], axis=1, inplace=True)

# Keeps only present links (Drops Nans and weight 0)
df_sub.dropna(inplace=True)

df_sub = df_sub[df_sub['weight'] > 0]
df_sub.head()

Unnamed: 0,source,target,weight


In [18]:
# Extracting EASIN only links
newDict = { key:value for (key,value) in di_cdbk.items() if str(value).startswith('Targi')}

df_sub_EASIN = df_sub[df_sub['target'].isin(newDict.values())]

In [19]:
df_sub_EASIN.head()

Unnamed: 0,source,target,weight


### Shareholders Dataframe

#### Checking for non-numeric values

In [20]:
df_sha[(df_sha['Sha_%'].str.isalpha())]['Sha_%'].value_counts()

WO     34
NG     33
MO     10
FC      9
GP      9
BR      3
T       3
FME     1
Name: Sha_%, dtype: int64

In [21]:
# Converting entire cells
df_sha['Sha_%'].replace({
    'WO': 100,
    'MO': 51,
    'NG': 0.01,
    'VE': 0.01,
    'FC': 0.01,
    'GP': 50,
    'BR': 0.01,
    'T': 0.01,
    'FME': 0.01,
    '-': 0
}, 
                        inplace=True)

# 'WO': Whole ownership,
# 'MO': Major ownership,
# 'NG': Negligable,
# 'VE': ? (Venture Equity?),
# 'FC': ?,
# 'GP': General partnership,
# 'BR': Business Relief,
# 'T': ?,
# 'FME': ? 

In [22]:
# Removing special signs from values
import regex
df_sha['Sha_%'] = df_sha['Sha_%'].replace('[<>]+','',regex=True).astype(float)

In [23]:
# Renames columns
df_sha.insert(1, 'source', df_sha['Sha_BvD'].map(di_cdbk))
df_sha.insert(2, 'target', df_sha['BvD'].map(di_cdbk))
df_sha.rename(columns={'Sha_%': 'weight'}, inplace=True)
df_sha.drop(['BvD', 'Sha_BvD'], axis=1, inplace=True)

# Keeps only present links (Drops Nans and weight 0)
df_sha.dropna(inplace=True)

df_sha = df_sha[df_sha['weight'] > 0]
df_sha.head()

Unnamed: 0,source,target,weight


In [24]:
# Extracting EASIN only links
newDict = { key:value for (key,value) in di_cdbk.items() if str(value).startswith('Targi')}

df_sha_EASIN = df_sha[df_sha['source'].isin(newDict.values())]

In [25]:
df_sha_EASIN.head()

Unnamed: 0,source,target,weight


### All links joined

#### EASIN links

In [26]:
df_EASIN = pd.concat([df_sha_EASIN, df_sub_EASIN]).drop_duplicates(subset=['source','target'])

In [27]:
df_EASIN.head()

Unnamed: 0,source,target,weight


In [28]:
print(f"Sha links: {len(df_sha_EASIN)}")
print(f"Sub links: {len(df_sub_EASIN)}")
print(f"All links: {len(df_EASIN)}")

Sha links: 0
Sub links: 0
All links: 0


##### EASIN links transformed into EC

In [29]:
# def share_into_EC():

IndentationError: expected an indented block (1709571583.py, line 1)

#### EASIN + NEIGH links

In [None]:
df_EandN = pd.concat([df_sha, df_sub]).drop_duplicates(subset=['source','target'])

In [None]:
df_EandN.head()

In [None]:
print(f"Sha links: {len(df_sha)}")
print(f"Sub links: {len(df_sub)}")
print(f"All links: {len(df_EandN)}")