# Ownership-Networks

## Data Import

In [1]:
import pandas as pd
import numpy as np

In [2]:
file_name = '3030-own'
df = pd.read_csv(f'Data/{file_name}.csv',
                     encoding='unicode_escape')

df.rename(columns={'Company name Latin alphabet': 'Comp_Name',
                   'Country ISO code': 'Country',
                   'NACE Rev. 2, core code (4 digits)': 'NACE',
                   'BvD ID number': 'BvD',
                   'Operating revenue (Turnover)\nth USD Last avail. yr': 'TURN',
                   'Cash flow\nth USD Last avail. yr': 'CF',
                   'Total assets\nth USD Last avail. yr': 'TASS',
                   'Shareholders funds\nth USD Last avail. yr': 'EC',
                   'Number of employees\nLast avail. yr': 'EM',
                   'Shareholder - BvD ID number': 'Sha_BvD',
                   'Shareholder - Direct %': 'Sha_%',
                   'Subsidiary - BvD ID number': 'Sub_BvD',
                   'Subsidiary - Direct %': 'Sub_%'
                  },
            inplace=True)

# Codebook
cdbk = pd.read_csv(f'Data\\Codebook.csv',index_col=0)

# Creating dictionary so that it can be used for index in the main dataframe
di_cdbk = dict([(bvd, index) for index, bvd in zip(cdbk.index, cdbk.BvD)])

## Creating separate dataframe with economic attributes

In [3]:
df_econ = df[['Comp_Name', 'Country', 'NACE', 'BvD', 'TURN', 'CF',
       'TASS', 'EC', 'EM']].dropna(subset='Comp_Name')


df_econ['index'] = df_econ['BvD'].map(di_cdbk)
df_econ.set_index('index', inplace=True)

df_econ.head()

Unnamed: 0_level_0,Comp_Name,Country,NACE,BvD,TURN,CF,TASS,EC,EM
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Targi_117,AIRBUS SE,NL,3030.0,NL24288945,59 736 738,7 471 764,121 241 465,10 743 847,126 495
Targi_95,AIRBUS,FR,3030.0,FR383474814,46 503 021,1 450 146,67 241 069,11 035 552,8 944
Targi_277,BAE SYSTEMS PLC,GB,3030.0,GB01470151,26 837 099,3 328 945,36 453 156,10 301 190,82 000
Targi_927,SAFRAN,FR,3030.0,FR562082909,18 434 203,1 792 906,47 247 554,15 029 606,77 008
Targi_694,LEONARDO S.P.A.,IT,3030.0,IT00401990585,16 651 490,1 258 319,32 142 064,7 310 935,50 413


In [4]:
# Treats the NA in column with company name and BvD-code in the main dataframe
df['Comp_Name'].fillna(method='ffill', inplace=True)
df['BvD'].fillna(method='ffill', inplace=True)

## Creating separate dataframes for links with shareholders and subsidiaries

In [5]:
import copy
df_sub = copy.copy(df[['Comp_Name', 'BvD','Sub_BvD', 'Sub_%']].dropna(subset='Sub_BvD')).drop('Comp_Name', axis=1)
df_sha = copy.copy(df[['Comp_Name', 'BvD','Sha_BvD', 'Sha_%',]].dropna(subset='Sha_BvD')).drop('Comp_Name', axis=1)

### Subsidiaries Dataframe

In [6]:
df_sub.head()

Unnamed: 0,BvD,Sub_BvD,Sub_%
0,NL24288945,LULB51757,100
1,NL24288945,DEFEB14827,100
2,NL24288945,GB02449259,100
3,NL24288945,NL28086907,100
4,NL24288945,NL63545128,100


#### Checking for non-numeric values

In [7]:
df_sub[(df_sub['Sub_%'].str.isalpha())]['Sub_%'].value_counts()

WO    54
MO    13
NG     9
VE     3
Name: Sub_%, dtype: int64

#### Changing strings into numeric values

In [8]:
# Converting entire cells
df_sub['Sub_%'].replace({
    'WO': 100,
    'MO': 51,
    'NG': 0.01,
    'VE': 0.01,
    '-': 0
}, 
                        inplace=True)

# Removing special signs from values
import regex
df_sub['Sub_%'] = df_sub['Sub_%'].replace('[<>]+','',regex=True).astype(float)

In [9]:
# Renames columns
df_sub.insert(1, 'source', df_sub['BvD'].map(di_cdbk))
df_sub.insert(2, 'target', df_sub['Sub_BvD'].map(di_cdbk))
df_sub.rename(columns={'Sub_%': 'weight'}, inplace=True)
df_sub.drop(['BvD', 'Sub_BvD'], axis=1, inplace=True)

# Keeps only present links (Drops Nans and weight 0)
df_sub.dropna(inplace=True)

df_sub = df_sub[df_sub['weight'] > 0]
df_sub.head()

Unnamed: 0,source,target,weight
2,Targi_117,Neigh_525,100.0
3,Targi_117,Targi_98,100.0
9,Targi_117,Targi_398,100.0
15,Targi_117,Targi_850,100.0
49,Targi_117,Targi_95,95.0


In [10]:
# Extracting EASIN only links
df_sub = df_sub[df_sub['target'].isin(df_sub['source'])]

In [11]:
df_sub.head()

Unnamed: 0,source,target,weight
3,Targi_117,Targi_98,100.0
15,Targi_117,Targi_850,100.0
49,Targi_117,Targi_95,95.0
189,Targi_117,Targi_379,9.9
214,Targi_95,Targi_112,100.0


### Shareholders Dataframe

#### Checking for non-numeric values

In [12]:
df_sha[(df_sha['Sha_%'].str.isalpha())]['Sha_%'].value_counts()

WO     34
NG     33
MO     10
FC      9
GP      9
BR      3
T       3
FME     1
Name: Sha_%, dtype: int64