In [61]:
## Loading Libraries
import pandas as pd
import numpy as np
import sys
import jdatetime as jd
import os
import matplotlib.pyplot as plt 
import pickle
import io
import requests
from bs4 import BeautifulSoup

# import power_index_calculator as px
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [62]:
## Changing Arbic Characters to Persian Characters !
## Credit to "https://github.com/rezakamalifard/Persian/blob/master/persian/persian.py"
import re
def convert_ar_characters(input_str):
    mapping = {
        'ك': 'ک',
        'دِ': 'د',
        'بِ': 'ب',
        'زِ': 'ز',
        'ذِ': 'ذ',
        'شِ': 'ش',
        'سِ': 'س',
        'ى': 'ی',
        'ي': 'ی'
    }
    return _multiple_replace(mapping, input_str)

def _multiple_replace(mapping, text):
    pattern = "|".join(map(re.escape, mapping.keys()))
    return re.sub(pattern, lambda m: mapping[m.group()], str(text))

# **Loading Balancesheet Data**

In [63]:
# A function to read different file and prepare them
def read_blnc_data(file='98.txt',path=r"C:\Users\Mahdi\OneDrive\Master Thesis\Data"):

    os.chdir(path)
    with open(file,encoding="utf8") as f:
        fileobject = io.StringIO(f.read())

    BlncData = pd.read_csv(fileobject, sep='\t',  lineterminator='\n', names=None)
    
    # Selecting Columns
    BlncData = BlncData[['نماد', 'سال مالی', 'تاریخ مصوب','جمع دارایی‌های جاری',
           'سرمایه گذاری‌ها و سایر دارایی‌ها', 'خالص دارایی‌های ثابت',
           'جمع دارایی‌های غیر جاری', 'جمع کل دارایی‌ها', 'جمع بدهی‌های جاری',
           'جمع بدهی‌های غیر جاری', 'جمع کل بدهی‌ها', 'سرمایه',
           'سود و زیان انباشته', 'اندوخته قانونی',
           'جمع حقوق صاحبان سهام در پایان سال مالی',
           'جمع کل بدهی‌ها و حقوق صاحبان سهام',
           'جمع حقوق صاحبان سهام مصوب (در مجمع عادی)']]
    
    # renaming columns
    BlncData.rename(columns={'نماد':'Symbol','سال مالی':'Fin_year','جمع دارایی‌های جاری':'Tot_current_asset','تاریخ مصوب':'approve_date',
                             'خالص دارایی‌های ثابت':'Net_fixed_assed','سرمایه گذاری‌ها و سایر دارایی‌ها':'other_asset',
                             'جمع بدهی‌های جاری':'Tot_current_lib','جمع کل دارایی‌ها':'Tot_asset','جمع دارایی‌های غیر جاری':'Tot_uncurrent_asset',
                             'سرمایه':'Capital','حقوق عمومی':'Public_rights','جمع کل بدهی‌ها':'Tot_lib','جمع بدهی‌های غیر جاری':'Tot_uncurrent_lib',
                             'سایر اندوخته‌ها':'Other_saving','اندوخته قانونی':'Reserved_saving','سود و زیان انباشته':'Comulated_profit_loss',
                             'جمع حقوق صاحبان سهام در پایان سال مالی':'Equity_at_year_end','جمع کل بدهی‌ها و حقوق صاحبان سهام':'Debt_Equity',
                              'جمع حقوق صاحبان سهام مصوب (در مجمع عادی)':'Debt_Equity_normal'},inplace=True)

    # DataOrg.Symbol: convert_ar_characters(x)
    Names = BlncData.Symbol.drop_duplicates()
    Conv_Names = Names.apply(lambda x : convert_ar_characters(x))
    BlncData_Symbol_ArtoFa_dict = dict(zip(Names,Conv_Names))
    BlncData['Symbol'] = BlncData.Symbol.map(BlncData_Symbol_ArtoFa_dict)

    # Dates
    BlncData = BlncData[~pd.isnull(BlncData.Fin_year)]
    BlncData.Fin_year = BlncData.Fin_year.apply(lambda x: jd.date(day=int(x[8:10]), month=int(x[5:7]),year=int(x[0:4])))

    BlncData = BlncData[~pd.isnull(BlncData.approve_date)]
    BlncData.approve_date = BlncData.approve_date.apply(lambda x: jd.date(day=int(x[8:10]), month=int(x[5:7]),year=int(x[0:4])))
    
    # changing to int
    for x in BlncData.columns[3:]:
        BlncData = BlncData[~pd.isnull(BlncData[x])]
        BlncData[x] = BlncData[x].apply(lambda x: int(x.replace(',','')))
        
    return(BlncData)

In [64]:
blnc_data = read_blnc_data(file='98.txt')
blnc_data['book_value'] = blnc_data.Tot_asset-blnc_data.Tot_lib
blnc_data = blnc_data[blnc_data.book_value>0]

# **Loading Shareholder Data**

In [65]:
## Loading DATA
os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
SDATA = pd.read_csv("Shareholder97.csv",index_col=0)

# Conver date from string to jdatetime
SDATA['True_Date'] = pd.to_datetime(SDATA['True_Date'], format='%Y-%m-%d')
G = SDATA.True_Date.drop_duplicates()
J = G.apply(lambda x: jd.date.fromgregorian(day=x.day,month=x.month,year=x.year))
DataOrg_date_GtoJ_dict = dict(zip(G,J))
SDATA['Jalali_Date']=SDATA.True_Date.map(DataOrg_date_GtoJ_dict)

SDATA.drop(columns=['High', 'Low', 'Open', 'Last', 'Volume', 'close',
       'True_Date', 'year', 'month', 'day', 'Fill_Flag','Unadjusted_close','chnk_id'],inplace=True)

In [66]:
# Filtering and keeping symbols that we have data in both datasets
sym_list = list(set(blnc_data.Symbol).intersection(set(SDATA.Symbol.drop_duplicates())))
SDATA = SDATA[SDATA.Symbol.isin(sym_list)]

In [67]:
# Creating Dataframe for saving concentration mearsurs
CMdf = SDATA.groupby('Symbol',as_index=False).agg({'Id_tse':'first','percent':'sum','ShareHolder':'count'}).rename(columns={'ShareHolder':'Num_holders','percent':'sum_over1'})
CMdf.reset_index(drop=True,inplace=True)

# **Concentration Measures**

## 1- Largest Owner

In [68]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':'max'}).rename(columns={'percent':'Largest_Owner'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left')

## 2- First/Second

In [69]:
def nth_max(data,nth=1,interval=False):
    data = data.sort_values(ascending=False)
    if interval:
        return(np.round(data.iloc[min(nth[0]-1,len(data)-1):min(nth[1],len(data))],2))
    else:
        return(np.round(data.iloc[min(nth-1,len(data)-1)],2))

In [70]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: max(x)/nth_max(x,nth=2,interval=False)}}).rename(columns={'percent':'First_Second'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('First_Second', '<lambda>'):'First_Second'})

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


## 3- First/Sumtwofour

In [71]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: max(x)/sum(nth_max(x,nth=[2,4],interval=True))}}).rename(columns={'percent':'First_Sumtwofour'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('First_Sumtwofour', '<lambda>'):'First_Sumtwofour'})

## 4- Sumfive

In [72]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: sum(nth_max(x,nth=[1,5],interval=True))/100}}).rename(columns={'percent':'Sumfive'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Sumfive', '<lambda>'):'Sumfive'})

## 5- Gini

We calculate gini coeficient using Deaton 1997:
$$ \gamma = \frac{N+1}{N-1} - \frac{2}{\mu\times N\times(N-1)}\sum_{i=1}^N{\rho_ix_i}$$

In [73]:
def gini(data):
    data.sort(reverse = True)
    N = len(data)
    mu = np.mean(data)
    ser = [(i+1)*data[i] for i in range(len(data))]
    try:
        gamma = (N+1)/(N-1)-(2*sum(ser))/(mu*N*(N-1))
    except:
        gamma = 0
    return(gamma)

In [74]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: gini(list(x))}}).rename(columns={'percent':'Gini'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Gini', '<lambda>'):'Gini'})

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


## 6- Herfindhal

In [75]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: sum([(t/100)**2 for t in list(x)])}}).rename(columns={'percent':'Herfindhal'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Herfindhal', '<lambda>'):'Herfindhal'})

# Shapley-Shubik and Banzhaf Functions

In [76]:
# URL Dict
URL_shapley={'direct':"https://mywebpages.csv.warwick.ac.uk/cgi-vpi/ssdirect.cgi",
             'genf':"https://mywebpages.csv.warwick.ac.uk/cgi-vpi/ssgenf.cgi",
             'mmle':"https://mywebpages.csv.warwick.ac.uk/cgi-vpi/ssmmle.cgi",
             'ocean':"https://mywebpages.csv.warwick.ac.uk/cgi-vpi/ssocean.cgi",
             'concentrated':"https://mywebpages.csv.warwick.ac.uk/cgi-vpi/ssmmle.cgi"}


def find_shapley(percent,how='direct',quota = 50.01,major_mode='number',major_thr=20,concentration_point=0.99,time_pnt=False,fast_mode = True):
    
    """
    A function for finding Shapley-Shbik Index.
    
    This functions uses David Leech website to calculate Shapley-Shubik index.
    
    ...
    
    Parameters
    -----------
    percent: list, voting rights
    
    how: str, 'direct', 'concentrated', 'ocean', 'genf', and 'mmle'
    
    quota: float
    
    major_mode: 'percent' or 'number'
    
    major_thr: if major_mode is 'percnet'--> float
               if major_mode is 'number'--> int
            
    concentration_point: float, [less than 1]
    """
    df = percent
    
    # sorting
    percent.sort(reverse=True)
    
    # Fast mode calculates
    if percent[0]>=quota and fast_mode:
        if how is not 'concentrated' and how is not 'ocean':
            out = pd.DataFrame(data={'Weight':percent,
                             'Shapley-Shubik Index':[1]+[0]*(len(percent)-1)})
        elif how is 'concentrated':
            unassigned = 100 - sum(percent)
            cons_point = concentration_point
            number = int(np.floor(unassigned/cons_point))
            residual = np.round(unassigned - number*cons_point,2)
            percent = percent+[cons_point]*number+[residual]
            out = pd.DataFrame(data={'Weight':percent,
                             'Shapley-Shubik Index':[1]+[0]*(len(percent)-1)})
        elif how is 'ocean':
            out = pd.DataFrame(data={'Weight':percent+['Ocean'],
                             'Shapley-Shubik Index':[1]+[0]*(len(percent))})
        if time_pnt:
            print(' Fast_mode on!')
        return(out)
    
    # Checking size of input
    if len(df)<=1 and how is not 'ocean' and how is not 'concentrated':
        return('Error: Length Error!')
    
    
    ## Preparing website inputs
    if how is 'direct':
        prc = percent
        prc_str = ''
        for x in prc:
            prc_str+=str(x)
            prc_str+=' '
        payload = {'numberofplayers': len(df),
                   'quota': quota,
                   'textarea': prc_str}
        
        
    elif how is 'genf':
        prc = [int(x*100)for x in percent]
        prc_str = ''
        for x in prc:
            prc_str+=str(x)
            prc_str+=' '
        payload = {'numberofplayers': len(df),
                   'quota': int(quota*100),
                   'textarea': prc_str}    
        
        
    elif how is 'mmle':
        prc = [x for x in percent]
        prc_str = ''
        for x in prc:
            prc_str+=str(x)
            prc_str+=' '
        if major_mode=='percent':
            Majors = len([x for x in prc if x>=major_thr])
            Minors = len(df) - Majors
        elif major_mode=='number':
            Majors = len(prc[0:min(major_thr,len(prc))])
            Minors = len(prc) - Majors
        payload = {'numberofplayers': Majors,#majo3
                   'numberofplayers2': Minors,#minor
                   'quota': quota,
                   'textarea': prc_str}        
        
        
    elif how is 'concentrated':
        prc = [x for x in percent]
        unassigned = 100 - sum(prc)
        cons_point = concentration_point
        number = int(np.floor(unassigned/cons_point))
        residual = np.round(unassigned - number*cons_point,2)
        prc = prc+[cons_point]*number+[residual]
        prc_str = ''
        for x in prc:
            prc_str+=str(x)
            prc_str+=' '
        if major_mode=='percent':
            Majors = len([x for x in prc if x>=major_thr])
            Minors = len(prc) - Majors
        elif major_mode=='number':
            Majors = len(prc[0:min(major_thr,len(prc))])
            Minors = len(prc) - Majors
        payload = {'numberofplayers': Majors,#majo3
                   'numberofplayers2': Minors,#minor
                   'quota': quota,
                   'textarea': prc_str}    
    
    
    elif how is 'ocean':
        prc = [x for x in percent]
        prc_str = ''
        for x in prc:
            prc_str+=str(x)
            prc_str+=' '
        total_weight = 100
        payload = {'numberofplayers': len(df),# number of atomic players
                   'totalweight': total_weight,
                   'quota': 50.1,
                   'textarea': prc_str}
    
    
    # website url
    url = URL_shapley[how]
    
    # Making request
    try:
        response = requests.request("POST", url, data = payload)
    except:
        return('Error: request error!')
    if time_pnt:
        print(' It took about ',np.round(response.elapsed.microseconds/1e6,2), 'seconds')
        
    # Parshing output html of wevsite
    parsed_html = BeautifulSoup(response.text.encode('utf8'))
    
     # Finding rows or error message
    if parsed_html('tr'):
        rows = parsed_html('tr')
    else:
        return('Error: '+parsed_html.find('p').text)
    
    # Extracting rows to a list of lists
    data = []
    for row in rows:
        if row.th:
            cols = row.find_all('th')
            cols = [ele.text.strip() for ele in cols]
            if len(cols) is 1:
                data.append([cols[0],''])
            else:
                data.append([ele for ele in cols if ele]) # Get rid of empty valuespty values
        else:
            cols = row.find_all('td')
            cols = [ele.text.strip() for ele in cols]
            data.append([ele for ele in cols if ele]) # Get rid of empty values
    
    # Converting list of lists to a dataframe
    try:
        if how is 'mmle' or how is 'concentrated':
            out = pd.DataFrame(data[1:(len(data)-1)], columns=data[0])
            out.iloc[:,0] = prc
            
        elif how is 'ocean':
            del data[0]
            del data[-2]
            data[-1][0] = 'Ocean'
            out = pd.DataFrame(data[1:len(data)], columns=data[0])
            
        else: 
            out = pd.DataFrame(data[1:], columns=data[0])
            out.iloc[:,0] = prc
        
        return(out)
    
    except:
        return('Error: creating dataFrame error! ')

In [77]:
# URL Dict
URL_banzhaf={'direct':"https://mywebpages.csv.warwick.ac.uk/cgi-vpi/ipdirect.cgi",
             'genf':"https://mywebpages.csv.warwick.ac.uk/cgi-vpi/ipgenf.cgi",
             'mmle':"https://mywebpages.csv.warwick.ac.uk/cgi-vpi/ipmmle.cgi",
             'concentrated':"https://mywebpages.csv.warwick.ac.uk/cgi-vpi/ipmmle.cgi"}


def find_banzhaf(percent,how='direct',quota = 50.01,major_mode='number',major_thr=20,concentration_point=0.99,time_pnt=False,fast_mode = True):
    
    """
    A function for finding banzhaf Index.
    
    This functions uses David Leech website to calculate banzhaf index.
    
    ...
    
    Parameters
    -----------
    percent: list, voting rights
    
    how: str, 'direct', 'concentrated', 'ocean', 'genf', and 'mmle'
    
    quota: float
    
    major_mode: 'percent' or 'number'
    
    major_thr: if major_mode is 'percnet'--> float
               if major_mode is 'number'--> int
            
    concentration_point: float, [less than 1]
    """
    df = percent
    
    # sorting
    percent.sort(reverse=True)
    
    # Fast mode calculates
    if percent[0]>=quota and fast_mode:
        if how is not 'concentrated':
            out = pd.DataFrame(data={'Weight':percent,
                                     'Abs_Banzhaf':[1]+[0]*(len(percent)-1),
                                     'Norm_Banzhaf':[1]+[0]*(len(percent)-1),
                                     'Coleman_Prevent':[1]+[0]*(len(percent)-1),
                                     'Coleman_Initiate':[1]+[0]*(len(percent)-1)})
        elif how is 'concentrated':
            unassigned = 100 - sum(percent)
            cons_point = concentration_point
            number = int(np.floor(unassigned/cons_point))
            residual = np.round(unassigned - number*cons_point,2)
            percent = percent+[cons_point]*number+[residual]
            out = pd.DataFrame(data={'Weight':percent,
                                     'Abs_Banzhaf':[1]+[0]*(len(percent)-1),
                                     'Norm_Banzhaf':[1]+[0]*(len(percent)-1),
                                     'Coleman_Prevent':[1]+[0]*(len(percent)-1),
                                     'Coleman_Initiate':[1]+[0]*(len(percent)-1)})
        if time_pnt:
            print('Fast_mode on!')
        return(out)
    
    # Checking size of input
    if len(df)<=1 and how is not 'concentrated':
        return('Error: Length Error!')
    
    
    ## Preparing website inputs
    if how is 'direct':
        prc = percent
        prc_str = ''
        for x in prc:
            prc_str+=str(x)
            prc_str+=' '
        payload = {'numberofplayers': len(df),
                   'quota': quota,
                   'textarea': prc_str}
        
        
    elif how is 'genf':
        prc = [int(x*100)for x in percent]
        prc_str = ''
        for x in prc:
            prc_str+=str(x)
            prc_str+=' '
        payload = {'numberofplayers': len(df),
                   'quota': int(quota*100),
                   'textarea': prc_str}    
        
        
    elif how is 'mmle':
        prc = [x for x in percent]
        prc_str = ''
        for x in prc:
            prc_str+=str(x)
            prc_str+=' '
        if major_mode=='percent':
            Majors = len([x for x in prc if x>=major_thr])
            Minors = len(prc) - Majors
        elif major_mode=='number':
            Majors = len(prc[0:min(major_thr,len(prc))])
            Minors = len(prc) - Majors
        payload = {'numberofplayers': Majors,#majo3
                   'numberofplayers2': Minors,#minor
                   'quota': quota,
                   'textarea': prc_str}        
        
        
    elif how is 'concentrated':
        prc = [x for x in percent]
        unassigned = 100 - sum(prc)
        cons_point = concentration_point
        number = int(np.floor(unassigned/cons_point))
        residual = np.round(unassigned - number*cons_point,2)
        prc = prc+[cons_point]*number+[residual]
        prc_str = ''
        for x in prc:
            prc_str+=str(x)
            prc_str+=' '
        if major_mode=='percent':
            Majors = len([x for x in prc if x>=major_thr])
            Minors = len(prc) - Majors
        elif major_mode=='number':
            Majors = len(prc[0:min(major_thr,len(prc))])
            Minors = len(prc) - Majors
        payload = {'numberofplayers': Majors,#majo3
                   'numberofplayers2': Minors,#minor
                   'quota': quota,
                   'textarea': prc_str}    
    
    
    # website url
    url = URL_banzhaf[how]
    
    # Making request
    try:
        response = requests.request("POST", url, data = payload)
    except:
        return('Error: request error!')
    if time_pnt:
        print('It took about ',np.round(response.elapsed.microseconds/1000000,2), 'seconds')
        
    # Parshing output html of wevsite
    parsed_html = BeautifulSoup(response.text.encode('utf8'))
    
     # Finding rows or error message
    if parsed_html('tr'):
        rows = parsed_html('tr')
    else:
        return('Error: '+parsed_html.find('p').text)
    
    # Extracting rows to a list of lists
    data = []
    for row in rows:
        if row.th:
            cols = row.find_all('th')
            cols = [ele.text.strip() for ele in cols]
            if len(cols) is 1:
                data.append([cols[0],''])
            else:
                data.append([ele for ele in cols if ele]) # Get rid of empty valuespty values
        else:
            cols = row.find_all('td')
            cols = [ele.text.strip() for ele in cols]
            data.append([ele for ele in cols if ele]) # Get rid of empty values
    
    # Converting list of lists to a dataframe
    try:
        out = pd.DataFrame(data[1:], columns=data[0])
        out.iloc[:,0] = prc        
        out.rename(columns={'Absolute Banzhaf Index \n\n(Penrose Index)':'Abs_Banzhaf',
                            'Normalised Banzhaf Index':'Norm_Banzhaf',
                            'Coleman\'s\nPower to Prevent Action':'Coleman_Prevent',
                            'Coleman\'s Power to Initiate Action':'Coleman_Initiate'},inplace=True)
        return(out)
    
    except:
        return('Error: creating dataFrame error! ')

In [82]:
def gameTheoric_concentration(symbol, index='shapley', how='concentrated',quota = 50.01,major_mode='number',major_thr=20,
                              concentration_point=0.99,out_index = 'Largest',fast_mode = True):
    
    if how not in ['dispersion','concentrated']:
        raise('how must be in [\'dispersion\',\'concentrated\']!')
        
    if index not in ['shapely','banzhaf']:
        raise('how must be in [\'shapley\',\'banzhaf\']!')
        
    
    percent_list = list(SDATA.percent[SDATA.Symbol==sym_list[34]])
    
    # Finding functions
    func = {'shpley':find_shapley,'banzhaf':find_banzhaf}['index']
    
    
    return()

## 7-Shapley-Shubik

## 8-Banzhaf

In [87]:
# find_shapley(list(SDATA.percent[SDATA.Symbol==sym_list[12]]),how='ocean',quota = 50.01,major_mode='number',major_thr=20,time_pnt=True,fast_mode=False)

In [88]:
%%time
# find_banzhaf(list(SDATA.percent[SDATA.Symbol==sym_list[34]]),how='concentrated',quota = 50.01,major_mode='number',major_thr=20,time_pnt=True,fast_mode=False)

Wall time: 0 ns
