In [1]:
## Loading Libraries
import pandas as pd
import numpy as np
import sys
import jdatetime as jd
import os
import pickle

# import power_index_calculator as px
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
## Loading functions
from convert_ar_characters import convert_ar_characters
from ReadingBalancesheetData import read_blnc_data
from nthMax import nth_max
from gini import gini
from find_shapley import find_shapley
from find_banzhaf import find_banzhaf
from gameTheoric_concentration import gameTheoric_concentration
from fill_shapley_banzhaf import fill_shapley_banzhaf

# **Loading Shareholder Data**

In [4]:
## Loading DATA
os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
SDATA = pd.read_csv("Shareholder97.csv",index_col=0)

# Conver date from string to jdatetime
SDATA['True_Date'] = pd.to_datetime(SDATA['True_Date'], format='%Y-%m-%d')
G = SDATA.True_Date.drop_duplicates()
J = G.apply(lambda x: jd.date.fromgregorian(day=x.day,month=x.month,year=x.year))
DataOrg_date_GtoJ_dict = dict(zip(G,J))
SDATA['Jalali_Date']=SDATA.True_Date.map(DataOrg_date_GtoJ_dict)

SDATA.drop(columns=['High', 'Low', 'Open', 'Last', 'Volume', 'close',
       'True_Date', 'year', 'month', 'day', 'Fill_Flag','Unadjusted_close','chnk_id'],inplace=True)

In [6]:
# Creating Dataframe for saving concentration mearsurs
CMdf = SDATA.groupby('Symbol',as_index=False).agg({'Id_tse':'first','percent':'sum','ShareHolder':'count'}).rename(
    columns={'ShareHolder':'Num_holders','percent':'sum_over1'})
CMdf.reset_index(drop=True,inplace=True)

# **Concentration Measures**

## 1- Largest Owner

In [7]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':'max'}).rename(columns={'percent':'Largest_Owner'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left')

## 2- First/Second

In [8]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: max(x)/nth_max(x,nth=2,interval=False)}}).rename(columns={'percent':'First_Second'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('First_Second', '<lambda>'):'First_Second'})

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


## 3- First/Sumtwofour

In [9]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: max(x)/sum(nth_max(x,nth=[2,4],interval=True))}}).rename(
    columns={'percent':'First_Sumtwofour'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('First_Sumtwofour', '<lambda>'):'First_Sumtwofour'})

## 4- Sumfive

In [10]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: sum(nth_max(x,nth=[1,5],interval=True))/100}}).rename(columns={'percent':'Sumfive'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Sumfive', '<lambda>'):'Sumfive'})

## 5- Gini

We calculate gini coeficient using Deaton 1997:
$$ \gamma = \frac{N+1}{N-1} - \frac{2}{\mu\times N\times(N-1)}\sum_{i=1}^N{\rho_ix_i}$$

In [11]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: gini(list(x))}}).rename(columns={'percent':'Gini'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Gini', '<lambda>'):'Gini'})

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


## 6- Herfindhal

In [12]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: sum([(t/100)**2 for t in list(x)])}}).rename(columns={'percent':'Herfindhal'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Herfindhal', '<lambda>'):'Herfindhal'})

## 7 & 8-Shapley-Shubik & Banzhaf

In [13]:
# # Initiating columns
# CMdf['SSCL'] = np.nan
# CMdf['SSCO'] = np.nan
# CMdf['SSDL'] = np.nan
# CMdf['SSDO'] = np.nan
# CMdf['BZCL'] = np.nan
# CMdf['BZCO'] = np.nan
# CMdf['BZDL'] = np.nan

os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
CMdf_load = pd.read_csv('Measures.csv',index_col=0)

CMdf = pd.merge(CMdf,CMdf_load[['Symbol','SSCL', 'SSCO', 'SSDL', 'SSDO', 'BZCL', 'BZCO', 'BZDL']],left_on='Symbol',right_on='Symbol',how='left')

In [20]:
%%time
data = fill_shapley_banzhaf(data = CMdf,SDATA=SDATA,fast_mode = True,time_pnt=True,major_thr = 15)
CMdf = data['CMdf']

os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
CMdf.to_csv('Measures.csv')

print('len(Errors): ',len(data['Errors']))
data['Errors']
[x for x in data['Errors'] if x[2]!= 'Error: request error!']

The symbol  آریان  mode:  SSDL ,  1  from  81
The symbol  امین یکم  mode:  SSCL ,  2  from  81
The symbol  بخاور  mode:  SSCL ,  3  from  81
The symbol  بساما  mode:  SSCL ,  4  from  81
The symbol  بپاس  mode:  BZDL ,  5  from  81
It took about  0.12 seconds
The symbol  تراک  mode:  SSCL ,  6  from  81




  parsed_html = BeautifulSoup(response.text.encode('utf8'))


The symbol  توسعه گردشگری   mode:  SSCL ,  7  from  81
The symbol  تکالا  mode:  SSCL ,  8  from  81
The symbol  تکالا  mode:  SSDL ,  9  from  81
The symbol  تکنار  mode:  SSCL ,  10  from  81
The symbol  ثاژن  mode:  SSCL ,  11  from  81
The symbol  ثتوسا  mode:  SSCL ,  12  from  81
The symbol  ثنام  mode:  SSCL ,  13  from  81
The symbol  حکمت  mode:  SSCL ,  14  from  81
The symbol  خاور  mode:  SSCL ,  15  from  81
The symbol  خصدرا  mode:  SSCL ,  16  from  81
The symbol  خفولا  mode:  SSCL ,  17  from  81
The symbol  خودرو  mode:  SSCL ,  18  from  81
The symbol  خکاوه  mode:  SSCL ,  19  from  81
The symbol  دسینا  mode:  SSCL ,  20  from  81
 It took about  0.09 seconds
The symbol  دی  mode:  SSCL ,  21  from  81




  parsed_html = BeautifulSoup(response.text.encode('utf8'))


The symbol  ساذری  mode:  SSCL ,  22  from  81
The symbol  ساراب  mode:  SSCL ,  23  from  81
 It took about  0.79 seconds
The symbol  سامان  mode:  SSCL ,  24  from  81
The symbol  سباقر  mode:  SSCL ,  25  from  81
The symbol  سخرم  mode:  SSCL ,  26  from  81
The symbol  سدور  mode:  SSCL ,  27  from  81
The symbol  سفارود  mode:  SSCL ,  28  from  81
The symbol  سمایه  mode:  SSCL ,  29  from  81
The symbol  سپرده  mode:  SSCL ,  30  from  81
 It took about  0.79 seconds
The symbol  سپرده  mode:  SSDL ,  31  from  81
The symbol  شزنگ  mode:  SSCL ,  32  from  81
The symbol  شستان  mode:  SSCL ,  33  from  81
The symbol  شسم  mode:  SSCL ,  34  from  81
The symbol  شسینا  mode:  SSCL ,  35  from  81
The symbol  فرابورس  mode:  SSDL ,  36  from  81
The symbol  فروس  mode:  SSCL ,  37  from  81
The symbol  فسدید  mode:  SSCL ,  38  from  81
The symbol  فلات  mode:  SSCL ,  39  from  81
The symbol  فملی  mode:  SSCL ,  40  from  81
The symbol  فولای  mode:  SSCL ,  41  from  81
The sym

[['ساراب', 'SSCL', 'Error: creating dataFrame error! '],
 ['سپرده', 'SSCL', 'Error: creating dataFrame error! ']]

# **Loading Balancesheet Data and Merging**

In [22]:
blnc_data = read_blnc_data(file='98.txt')
blnc_data['book_value'] = blnc_data.Tot_asset-blnc_data.Tot_lib
blnc_data = blnc_data[blnc_data.book_value>0]

In [23]:
data_out = pd.merge(CMdf,blnc_data[['Symbol','Tot_asset', 'Tot_lib', 'Capital', 'Equity_at_year_end','Debt_Equity', 'Debt_Equity_normal', 'book_value']]
                    ,left_on='Symbol',right_on='Symbol',how='outer')

os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
data_out.to_csv('data_owenership97_blnc98.csv')