In [88]:
## Loading Libraries
import pandas as pd
import numpy as np
import sys
import jdatetime as jd
import os
import matplotlib.pyplot as plt 
import pickle
import io
# import power_index_calculator as px
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [124]:
## Changing Arbic Characters to Persian Characters !
## Credit to "https://github.com/rezakamalifard/Persian/blob/master/persian/persian.py"
import re
def convert_ar_characters(input_str):
    mapping = {
        'ك': 'ک',
        'دِ': 'د',
        'بِ': 'ب',
        'زِ': 'ز',
        'ذِ': 'ذ',
        'شِ': 'ش',
        'سِ': 'س',
        'ى': 'ی',
        'ي': 'ی'
    }
    return _multiple_replace(mapping, input_str)

def _multiple_replace(mapping, text):
    pattern = "|".join(map(re.escape, mapping.keys()))
    return re.sub(pattern, lambda m: mapping[m.group()], str(text))

In [2]:
## Loading DATA
os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
SDATA = pd.read_csv("Shareholder97.csv",index_col=0)

# Conver date from string to jdatetime
SDATA['True_Date'] = pd.to_datetime(SDATA['True_Date'], format='%Y-%m-%d')
G = SDATA.True_Date.drop_duplicates()
J = G.apply(lambda x: jd.date.fromgregorian(day=x.day,month=x.month,year=x.year))
DataOrg_date_GtoJ_dict = dict(zip(G,J))
SDATA['Jalali_Date']=SDATA.True_Date.map(DataOrg_date_GtoJ_dict)

SDATA.drop(columns=['High', 'Low', 'Open', 'Last', 'Volume', 'close',
       'True_Date', 'year', 'month', 'day', 'Fill_Flag','Unadjusted_close','chnk_id'],inplace=True)

In [3]:
# Creating Dataframe for saving concentration mearsurs
CMdf = SDATA.groupby('Symbol',as_index=False).agg({'Id_tse':'first','percent':'sum','ShareHolder':'count'}).rename(columns={'ShareHolder':'Num_holders','percent':'sum_over1'})
CMdf.reset_index(drop=True,inplace=True)

# **Concentration Measures**

## 1- Largest Owner

In [4]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':'max'}).rename(columns={'percent':'Largest_Owner'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left')

## 2- First/Second

In [5]:
def nth_max(data,nth=1,interval=False):
    data = data.sort_values(ascending=False)
    if interval:
        return(np.round(data.iloc[min(nth[0]-1,len(data)-1):min(nth[1],len(data))],2))
    else:
        return(np.round(data.iloc[min(nth-1,len(data)-1)],2))

In [6]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: max(x)/nth_max(x,nth=2,interval=False)}}).rename(columns={'percent':'First_Second'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('First_Second', '<lambda>'):'First_Second'})

  new_axis = axis.drop(labels, errors=errors)


## 3- First/Sumtwofour

In [7]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: max(x)/sum(nth_max(x,nth=[2,4],interval=True))}}).rename(columns={'percent':'First_Sumtwofour'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('First_Sumtwofour', '<lambda>'):'First_Sumtwofour'})

## 4- Sumfive

In [8]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: sum(nth_max(x,nth=[1,5],interval=True))/100}}).rename(columns={'percent':'Sumfive'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Sumfive', '<lambda>'):'Sumfive'})

## 5- Gini

We calculate gini coeficient using Deaton 1997:
$$ \gamma = \frac{N+1}{N-1} - \frac{2}{\mu\times N\times(N-1)}\sum_{i=1}^N{\rho_ix_i}$$

In [9]:
def gini(data):
    data.sort(reverse = True)
    N = len(data)
    mu = np.mean(data)
    ser = [(i+1)*data[i] for i in range(len(data))]
    try:
        gamma = (N+1)/(N-1)-(2*sum(ser))/(mu*N*(N-1))
    except:
        gamma = 0
    return(gamma)

In [10]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: gini(list(x))}}).rename(columns={'percent':'Gini'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Gini', '<lambda>'):'Gini'})

## 6- Herfindhal

In [11]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: sum([(t/100)**2 for t in list(x)])}}).rename(columns={'percent':'Herfindhal'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Herfindhal', '<lambda>'):'Herfindhal'})

## 7-Shapley-Shubik

In [79]:
# CMdf[CMdf.Num_holders>20]
# CMdf.sort_values(by='sum_over1').iloc[1:50]

Unnamed: 0,Symbol,Id_tse,sum_over1,Num_holders,Largest_Owner,First_Second,First_Sumtwofour,Sumfive,Gini,Herfindhal
424,واحصا,53647874954005806,6.39,3,3.3,1.633663,1.067961,0.0639,0.348983,0.001612
70,توسعه گردشگری,49703334175145583,6.89,2,4.9,2.462312,2.462312,0.0689,0.422351,0.002797
548,کالا,44549439964296944,7.24,2,5.57,3.335329,3.335329,0.0724,0.538674,0.003381
459,وسالت,23175320865252772,8.66,3,5.0,2.03252,1.36612,0.0866,0.438799,0.003249
530,چابهار,25133129074902751,9.09,1,9.09,1.0,1.0,0.0909,0.0,0.008263
79,تکنو,3654864906585643,10.23,2,5.67,1.243421,1.243421,0.1023,0.108504,0.005294
104,ثنظام,45066064863062755,10.24,5,4.34,1.981735,0.906054,0.1024,0.364258,0.002827
209,سبزوا,611986653700161,11.09,2,6.85,1.615566,1.615566,0.1109,0.235347,0.00649
137,خفناور,58180284328186631,11.75,3,4.96,1.301837,0.730486,0.1175,0.168511,0.0048
95,ثعتما,64707090254488560,13.36,1,13.36,1.0,1.0,0.1336,0.0,0.017849


In [13]:
%%time
import powerindices as px
prc = list(SDATA[SDATA.Id_tse==CMdf[CMdf.Num_holders>40].Id_tse.iloc[0]].percent)
weight = [int(x*100) for x in prc]
# print(px.compute_ssi(5000,[int(x) for x in list(np.ones(20))]))
list(zip(weight,px.compute_ssi(5000,weight)))

Wall time: 11.2 s


[(117, 0.014666041173875889),
 (145, 0.018240179295041552),
 (107, 0.013395737887830305),
 (149, 0.018752896969831526),
 (142, 0.017856006591268805),
 (185, 0.023391291367154534),
 (118, 0.014793236812876266),
 (176, 0.022227578216311633),
 (252, 0.032142737004626463),
 (118, 0.014793236812876266),
 (200, 0.02533694280073188),
 (237, 0.030169699888540423),
 (151, 0.01900943809880782),
 (188, 0.023779793238160808),
 (289, 0.03704456553822153),
 (190, 0.02403896991071311),
 (161, 0.020294189355561564),
 (102, 0.01276179949350819),
 (167, 0.021066618783361135),
 (128, 0.016067110048636674),
 (237, 0.030169699888540423),
 (172, 0.021711271247153188),
 (242, 0.03082647176584244),
 (229, 0.02912068450547725),
 (268, 0.034256281659924136),
 (161, 0.020294189355561564),
 (175, 0.022098456175264545),
 (225, 0.028597040207685373),
 (344, 0.044425963277194884),
 (215, 0.027290368295594645),
 (140, 0.017600060898177852),
 (127, 0.015939578430193266),
 (202, 0.025596943413058938),
 (151, 0.01900943

## 8-Banzhaf

# **Loading Balancesheet Data**

In [200]:
# A function to read different file and prepare them
def read_blnc_data(file='98.txt',path=r"C:\Users\Mahdi\OneDrive\Master Thesis\Data"):

    os.chdir(path)
    with open(file,encoding="utf8") as f:
        fileobject = io.StringIO(f.read())

    BlncData = pd.read_csv(fileobject, sep='\t',  lineterminator='\n', names=None)
    
    # Selecting Columns
    BlncData = BlncData[['نماد', 'سال مالی', 'تاریخ مصوب','جمع دارایی‌های جاری',
           'سرمایه گذاری‌ها و سایر دارایی‌ها', 'خالص دارایی‌های ثابت',
           'جمع دارایی‌های غیر جاری', 'جمع کل دارایی‌ها', 'جمع بدهی‌های جاری',
           'جمع بدهی‌های غیر جاری', 'جمع کل بدهی‌ها', 'سرمایه',
           'سود و زیان انباشته', 'اندوخته قانونی',
           'جمع حقوق صاحبان سهام در پایان سال مالی',
           'جمع کل بدهی‌ها و حقوق صاحبان سهام',
           'جمع حقوق صاحبان سهام مصوب (در مجمع عادی)']]
    
    # renaming columns
    BlncData.rename(columns={'نماد':'Symbol','سال مالی':'Fin_year','جمع دارایی‌های جاری':'Tot_current_asset','تاریخ مصوب':'approve_date',
                             'خالص دارایی‌های ثابت':'Net_fixed_assed','سرمایه گذاری‌ها و سایر دارایی‌ها':'other_asset',
                             'جمع بدهی‌های جاری':'Tot_current_lib','جمع کل دارایی‌ها':'Tot_asset','جمع دارایی‌های غیر جاری':'Tot_uncurrent_asset',
                             'سرمایه':'Capital','حقوق عمومی':'Public_rights','جمع کل بدهی‌ها':'Tot_lib','جمع بدهی‌های غیر جاری':'Tot_uncurrent_lib',
                             'سایر اندوخته‌ها':'Other_saving','اندوخته قانونی':'Reserved_saving','سود و زیان انباشته':'Comulated_profit_loss',
                             'جمع حقوق صاحبان سهام در پایان سال مالی':'Equity_at_year_end','جمع کل بدهی‌ها و حقوق صاحبان سهام':'Debt_Equity',
                              'جمع حقوق صاحبان سهام مصوب (در مجمع عادی)':'Debt_Equity_normal'},inplace=True)

    # DataOrg.Symbol: convert_ar_characters(x)
    Names = BlncData.Symbol.drop_duplicates()
    Conv_Names = Names.apply(lambda x : convert_ar_characters(x))
    BlncData_Symbol_ArtoFa_dict = dict(zip(Names,Conv_Names))
    BlncData['Symbol'] = BlncData.Symbol.map(BlncData_Symbol_ArtoFa_dict)

    # Dates
    BlncData = BlncData[~pd.isnull(BlncData.Fin_year)]
    BlncData.Fin_year = BlncData.Fin_year.apply(lambda x: jd.date(day=int(x[8:10]), month=int(x[5:7]),year=int(x[0:4])))

    BlncData = BlncData[~pd.isnull(BlncData.approve_date)]
    BlncData.approve_date = BlncData.approve_date.apply(lambda x: jd.date(day=int(x[8:10]), month=int(x[5:7]),year=int(x[0:4])))
    
    # changing to int
    for x in BlncData.columns[3:]:
        BlncData = BlncData[~pd.isnull(BlncData[x])]
        BlncData[x] = BlncData[x].apply(lambda x: int(x.replace(',','')))
        
    return(BlncData)

In [201]:
df = read_blnc_data(file='98.txt')
len(set(df.Symbol).intersection(set(SDATA.Symbol.drop_duplicates())))

307