In [1]:
## Loading Libraries
import pandas as pd
import numpy as np
import sys
import jdatetime as jd
import os
import pickle

# import power_index_calculator as px
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
## Loading functions
from convert_ar_characters import convert_ar_characters
from ReadingBalancesheetData import read_blnc_data
from nthMax import nth_max
from gini import gini
from find_shapley import find_shapley
from find_banzhaf import find_banzhaf
from gameTheoric_concentration import gameTheoric_concentration
from fill_shapley_banzhaf import fill_shapley_banzhaf

# **Loading Shareholder Data**

In [18]:
year = 1397
file_name_holderdata = "Shareholder"+str(year)+".csv"
file_name_measures = "Measures"+str(year)+".csv"
file_name_blnc = str(year)[2:]+'.txt'
file_name_measures_blnc = 'data_owenership'+str(year)[2:]+'_blnc'+str(year)[2:]+'.csv'

In [19]:
## Loading DATA
os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
SDATA = pd.read_csv(file_name_holderdata,index_col=0)

# Conver date from string to jdatetime
SDATA['True_Date'] = pd.to_datetime(SDATA['True_Date'], format='%Y-%m-%d')
G = SDATA.True_Date.drop_duplicates()
J = G.apply(lambda x: jd.date.fromgregorian(day=x.day,month=x.month,year=x.year))
DataOrg_date_GtoJ_dict = dict(zip(G,J))
SDATA['Jalali_Date']=SDATA.True_Date.map(DataOrg_date_GtoJ_dict)

SDATA.drop(columns=['High', 'Low', 'Open', 'Last', 'Volume', 'close',
       'True_Date', 'year', 'month', 'day', 'Fill_Flag','Unadjusted_close','chnk_id'],inplace=True)

In [20]:
# Creating Dataframe for saving concentration mearsurs
CMdf = SDATA.groupby('Symbol',as_index=False).agg({'Id_tse':'first','Industry':'first','percent':'sum',
                                                   'ShareHolder':'count','MarketCap':'first'}).rename(columns=
                                                            {'ShareHolder':'Num_holders','percent':'sum_over1'})
CMdf.reset_index(drop=True,inplace=True)

## Filtering Data

In [21]:
## Filtering Data
print('Number of Observation is: ',len(CMdf),'\n')
theta_sum1 = 90
theta_holder = 2
MC = 100e10


filtered = CMdf[CMdf.sum_over1>=theta_sum1]
CMdf = CMdf[CMdf.sum_over1<=theta_sum1]
print(len(filtered),' is deleted becaause their sum_above1% is equalt to or more than ',theta_sum1,'%')

filtered = CMdf[CMdf.Num_holders<=theta_holder]
CMdf = CMdf[CMdf.Num_holders>theta_holder]
print(len(filtered),' is deleted becaause their number of holder is less or equal than ',theta_holder)

filtered = CMdf[CMdf.MarketCap<=MC]
CMdf = CMdf[np.logical_or(CMdf.MarketCap>MC,pd.isnull(CMdf.MarketCap))]
print(len(filtered),' is deleted becaause their MarketCap is less or equal than ',MC)

print('\nNumber of remained Observation is: ',len(CMdf))

Number of Observation is:  605 

174  is deleted becaause their sum_above1% is equalt to or more than  90 %
102  is deleted becaause their number of holder is less or equal than  2
7  is deleted becaause their MarketCap is less or equal than  1000000000000.0

Number of remained Observation is:  322


# **Concentration Measures**

## 1- Largest Owner

In [22]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':'max'}).rename(columns={'percent':'Largest_Owner'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left')

## 2- First/Second

In [23]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: max(x)/nth_max(x,nth=2,interval=False)}}).rename(columns={'percent':'First_Second'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('First_Second', '<lambda>'):'First_Second'})

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


## 3- First/Sumtwofour

In [24]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: max(x)/sum(nth_max(x,nth=[2,4],interval=True))}}).rename(
    columns={'percent':'First_Sumtwofour'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('First_Sumtwofour', '<lambda>'):'First_Sumtwofour'})

## 4- Sumfive

In [25]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: sum(nth_max(x,nth=[1,5],interval=True))}}).rename(columns={'percent':'Sumfive'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Sumfive', '<lambda>'):'Sumfive'})

## 5- Gini

We calculate gini coeficient using Deaton 1997:
$$ \gamma = \frac{N+1}{N-1} - \frac{2}{\mu\times N\times(N-1)}\sum_{i=1}^N{\rho_ix_i}$$

In [26]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: gini(list(x))}}).rename(columns={'percent':'Gini'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Gini', '<lambda>'):'Gini'})

## 6- Herfindhal

In [27]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: sum([(t/100)**2 for t in list(x)])}}).rename(columns={'percent':'Herfindhal'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Herfindhal', '<lambda>'):'Herfindhal'})

## 7 & 8-Shapley-Shubik & Banzhaf

In [28]:
CMdf_load

Unnamed: 0,Symbol,Id_tse,sum_over1,Num_holders,Largest_Owner,First_Second,First_Sumtwofour,Sumfive,Gini,Herfindhal,SSCL,SSCO,SSDL,SSDO,BZCL,BZCO,BZDL,Industry
0,اعتضاد غدیر,34973883374080119,99.89,1,99.89,1.000000,1.000000,99.89,0.000000,0.997801,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.00000,سرمایه گذاریها
1,کاغذ مراغه,21383339313241074,51.53,1,51.53,1.000000,1.000000,51.53,0.000000,0.265534,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.00000,محصولات کاغذی
2,آ س پ,17617474823279712,89.20,5,40.26,1.963902,0.912925,89.20,0.464462,0.236507,0.510949,0.045774,0.514355,0.086037,0.519901,0.106373,0.63636,انبوه سازی، املاک و مستغلات
3,آتیمس,22839330962768817,99.02,1,99.02,1.000000,1.000000,99.02,0.000000,0.980496,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.00000,صندوق سرمایه گذاری قابل معامله
4,آرمان,38738476064699383,92.75,11,19.78,1.163529,0.444295,72.12,0.450652,0.121069,0.215876,0.064402,0.216077,0.064466,0.211058,0.069431,0.22684,بیمه وصندوق بازنشستگی به جزتامین اجتماعی
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
587,کیمیا,27814844870305607,76.80,5,43.40,2.155986,1.341991,76.80,0.671680,0.240432,0.609680,0.241108,0.609142,0.250401,0.951379,0.038195,1.00000,فلزات اساسی
588,گوهران,65018804181564924,95.86,2,94.14,54.732558,54.732558,95.86,0.964114,0.886530,1.000000,0.000000,1.000000,0.000000,1.000000,0.000000,1.00000,سرمایه گذاریها
589,گپارس,59848307608894801,26.93,5,16.46,2.505327,1.741799,26.93,0.671185,0.031932,0.193570,0.702197,0.195451,0.699667,0.292622,0.631065,1.00000,هتل و رستوران
590,گکوثر,66599109405217136,91.13,6,46.00,1.361753,1.075269,90.12,0.708153,0.330048,0.598530,0.059244,0.590751,0.138989,0.736071,0.129164,1.00000,هتل و رستوران


In [29]:
# # Initiating columns
# CMdf['SSCL'] = np.nan
# CMdf['SSCO'] = np.nan
# CMdf['SSDL'] = np.nan
# CMdf['SSDO'] = np.nan
# CMdf['BZCL'] = np.nan
# CMdf['BZCO'] = np.nan
# CMdf['BZDL'] = np.nan

os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
CMdf_load = pd.read_csv(file_name_measures)

CMdf = pd.merge(CMdf,CMdf_load[['Symbol','SSCL', 'SSCO', 'SSDL', 'SSDO', 'BZCL', 'BZCO', 'BZDL']],left_on='Symbol',right_on='Symbol',how='left')

In [None]:
%%time
data = fill_shapley_banzhaf(data = CMdf,SDATA=SDATA,fast_mode = True,time_pnt=True,major_thr = 10)
CMdf = data['CMdf']

print('len(Errors): ',len(data['Errors']))
data['Errors']
[x for x in data['Errors'] if x[2]!= 'Error: request error!']

The symbol  آریان  mode:  SSDL ,  1  from  8
The symbol  تکالا  mode:  SSDL ,  2  from  8


In [None]:
os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
CMdf.to_csv(file_name_measures)

# **Loading Balancesheet Data and Merging**

In [13]:
blnc_data = read_blnc_data(file=file_name_blnc)
blnc_data['book_value'] = blnc_data.Tot_asset-blnc_data.Tot_lib
blnc_data = blnc_data[blnc_data.book_value>0]

In [14]:
data_out = pd.merge(CMdf,blnc_data[['Symbol','Tot_asset', 'Tot_lib', 'Capital', 'Equity_at_year_end','Debt_Equity', 'Debt_Equity_normal', 'book_value']]
                    ,left_on='Symbol',right_on='Symbol',how='outer')

os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
data_out.to_csv('data_owenership98_blnc98.csv')