In [20]:
## Loading Libraries
import pandas as pd
import numpy as np
import sys
import jdatetime as jd
import os
import matplotlib.pyplot as plt 
# import power_index_calculator as px
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [22]:
## Loading DATA
os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
SDATA = pd.read_csv("Shareholder97.csv",index_col=0)

# Conver date from string to jdatetime
SDATA['True_Date'] = pd.to_datetime(SDATA['True_Date'], format='%Y-%m-%d')
G = SDATA.True_Date.drop_duplicates()
J = G.apply(lambda x: jd.date.fromgregorian(day=x.day,month=x.month,year=x.year))
DataOrg_date_GtoJ_dict = dict(zip(G,J))
SDATA['Jalali_Date']=SDATA.True_Date.map(DataOrg_date_GtoJ_dict)

SDATA.drop(columns=['High', 'Low', 'Open', 'Last', 'Volume', 'close',
       'True_Date', 'year', 'month', 'day', 'Fill_Flag','Unadjusted_close','chnk_id'],inplace=True)

In [23]:
# Creating Dataframe for saving concentration mearsurs
CMdf = SDATA.groupby('Symbol',as_index=False).agg({'Id_tse':'first','percent':'sum','ShareHolder':'count'}).rename(columns={'ShareHolder':'Num_holders','percent':'sum_over1'})
CMdf.reset_index(drop=True,inplace=True)

# **Concentration Measures**

## 1- Largest Owner

In [24]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':'max'}).rename(columns={'percent':'Largest_Owner'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left')

## 2- First/Second

In [25]:
def nth_max(data,nth=1,interval=False):
    data = data.sort_values(ascending=False)
    if interval:
        return(np.round(data.iloc[min(nth[0]-1,len(data)-1):min(nth[1],len(data))],2))
    else:
        return(np.round(data.iloc[min(nth-1,len(data)-1)],2))

In [26]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: max(x)/nth_max(x,nth=2,interval=False)}}).rename(columns={'percent':'First_Second'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('First_Second', '<lambda>'):'First_Second'})

  new_axis = axis.drop(labels, errors=errors)


## 3- First/Sumtwofour

In [27]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: max(x)/sum(nth_max(x,nth=[2,4],interval=True))}}).rename(columns={'percent':'First_Sumtwofour'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('First_Sumtwofour', '<lambda>'):'First_Sumtwofour'})

## 4- Sumfive

In [28]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: sum(nth_max(x,nth=[1,5],interval=True))/100}}).rename(columns={'percent':'Sumfive'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Sumfive', '<lambda>'):'Sumfive'})

## 5- Gini

We calculate gini coeficient using Deaton 1997:
$$ \gamma = \frac{N+1}{N-1} - \frac{2}{\mu\times N\times(N-1)}\sum_{i=1}^N{\rho_ix_i}$$

In [29]:
def gini(data):
    data.sort(reverse = True)
    N = len(data)
    mu = np.mean(data)
    ser = [(i+1)*data[i] for i in range(len(data))]
    try:
        gamma = (N+1)/(N-1)-(2*sum(ser))/(mu*N*(N-1))
    except:
        gamma = 0
    return(gamma)

In [30]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: gini(list(x))}}).rename(columns={'percent':'Gini'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Gini', '<lambda>'):'Gini'})

## 6- Herfindhal

In [31]:
temp = SDATA.groupby('Symbol',as_index=False).agg({'percent':{lambda x: sum([(t/100)**2 for t in list(x)])}}).rename(columns={'percent':'Herfindhal'})
CMdf = pd.merge(CMdf,temp,left_on='Symbol',right_on='Symbol',how='left').rename(columns={('Herfindhal', '<lambda>'):'Herfindhal'})

## 7-Shapley-Shubik

In [36]:
# CMdf[CMdf.Num_holders>20]
CMdf.sort_values(by='Num_holders')

Unnamed: 0,Symbol,Id_tse,sum_over1,Num_holders,Largest_Owner,First_Second,First_Sumtwofour,Sumfive,Gini,Herfindhal
0,اعتضاد غدیر,34973883374080119,99.89,1,99.89,1.000000,1.000000,0.9989,0.000000,0.997801
304,غالبر,24303422207378456,51.33,1,51.33,1.000000,1.000000,0.5133,0.000000,0.263477
300,صایند,45205530868811305,99.36,1,99.36,1.000000,1.000000,0.9936,0.000000,0.987241
283,شلعاب,39116664428676213,63.64,1,63.64,1.000000,1.000000,0.6364,0.000000,0.405005
270,شزنگ,65490886290565185,42.54,1,42.54,1.000000,1.000000,0.4254,0.000000,0.180965
...,...,...,...,...,...,...,...,...,...,...
494,وپارس,33293588228706998,79.41,24,8.29,1.444251,0.527354,0.2898,0.327924,0.034772
405,نبورس,60095061789823130,90.09,24,6.36,1.000000,0.333333,0.3180,0.320573,0.043968
5,آریان,6506179926371994,77.98,26,10.38,1.119741,0.450521,0.3832,0.401057,0.039486
412,نکالا,10919655792568926,93.96,31,6.34,1.126110,0.391600,0.2693,0.266418,0.034464


In [34]:
%%time
import powerindices as px
prc = list(SDATA[SDATA.Id_tse==CMdf[CMdf.Num_holders>40].Id_tse.iloc[0]].percent)
weight = [int(x*100) for x in prc]
# print(px.compute_ssi(5000,[int(x) for x in list(np.ones(20))]))
list(zip(weight,px.compute_ssi(5000,weight)))

Wall time: 10.6 s


[(117, 0.014666041173875889),
 (145, 0.018240179295041552),
 (107, 0.013395737887830305),
 (149, 0.018752896969831526),
 (142, 0.017856006591268805),
 (185, 0.023391291367154534),
 (118, 0.014793236812876266),
 (176, 0.022227578216311633),
 (252, 0.032142737004626463),
 (118, 0.014793236812876266),
 (200, 0.02533694280073188),
 (237, 0.030169699888540423),
 (151, 0.01900943809880782),
 (188, 0.023779793238160808),
 (289, 0.03704456553822153),
 (190, 0.02403896991071311),
 (161, 0.020294189355561564),
 (102, 0.01276179949350819),
 (167, 0.021066618783361135),
 (128, 0.016067110048636674),
 (237, 0.030169699888540423),
 (172, 0.021711271247153188),
 (242, 0.03082647176584244),
 (229, 0.02912068450547725),
 (268, 0.034256281659924136),
 (161, 0.020294189355561564),
 (175, 0.022098456175264545),
 (225, 0.028597040207685373),
 (344, 0.044425963277194884),
 (215, 0.027290368295594645),
 (140, 0.017600060898177852),
 (127, 0.015939578430193266),
 (202, 0.025596943413058938),
 (151, 0.01900943