In [72]:
## Loading Libraries
import pandas as pd
import numpy as np
import sys
import jdatetime as jd
import os
import pickle

# import power_index_calculator as px
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# **Loading OCM Data**

In [169]:
## Loading Data
DF_dict = {}
Shareholder_data = {}
Start = 1397
End = 1398

os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
for year in range(Start,End+1):
    file_name= "Shareholder"+str(year)+".csv"
    df = pd.read_csv(file_name)
    Shareholder_data[str(year)] = df

    

os.chdir(r"C:\Users\Mahdi\OneDrive\Master Thesis\Data")
for year in range(Start,End+1):
    file_name = "Measures"+str(year)+".csv"
    df = pd.read_csv(file_name)
    if year==Start:
        Symbols = set(df.Symbol)
    else:
        Symbols = Symbols.intersection(set(df.Symbol))
    DF_dict[str(year)] = df

    
print('We have ',len(Symbols),' stocks in all year from ',Start,' to ',End)

## Keeping common stocks
DF = {}
for year in range(Start,End+1):
    file_name = "Measures"+str(year)+".csv"
    df = pd.read_csv(file_name)
    df = df[df.Symbol.isin(list(Symbols))]
    DF[str(year)] = df

We have  301  stocks in all year from  1397  to  1398


# **Calculating Average for Industries and Filtering Industries**

In [170]:
## Calculating average for industries
Ind_OCM = pd.DataFrame()
for year in range(Start,End+1):
    data = DF[str(year)]
    temp = data.groupby('Industry',as_index=False).agg({'Symbol':'count','Largest_Owner':'mean','First_Second':'mean',
                                                 'First_Sumtwofour':'mean','Sumfive':'mean','Gini':'mean',
                                                 'Herfindhal':'mean','SSCL':'mean','SSCO':'mean','SSDL':'mean',
                                                 'SSDO':'mean','BZCL':'mean','BZCO':'mean','BZDL':'mean'})
    temp['year'] = year
    Ind_OCM = Ind_OCM.append(temp)

In [191]:
# Filtering industries and keeping with mean 5 symbols each year
min_sym = 5
temp = Ind_OCM[['Industry','Symbol']].groupby('Industry',as_index=False).agg('mean')
temp = temp[temp.Symbol>min_sym]
Ind_OCM = Ind_OCM[Ind_OCM.Industry.isin(temp.Industry)]
Ind_OCM.reset_index(drop=True,inplace=True)

print("After filtering by year({}-{}), common stock, and keeping industries with average {} firms in each year:\n"
      "We have {:0.0f} firms, and {} industries!"
      .format(Start,End,min_sym,sum(Ind_OCM[['Symbol','Industry']].Symbol)/(End-Start+1),
      len(Ind_OCM[['Symbol','Industry']].sort_values(by=['Industry']).drop_duplicates())))

After filtering by year(1397-1398), common stock, and keeping industries with average 5 firms in each year:
We have 253 firms, and 19 industries!


In [192]:
# Dictionary for Industries
temp = Ind_OCM['Industry'].drop_duplicates()
Ind_dict = dict(zip(list(range(len(temp))),temp))
Ind_dict

{0: 'استخراج کانه های فلزی',
 1: 'انبوه سازی، املاک و مستغلات',
 2: 'بانکها و موسسات اعتباری',
 3: 'بیمه وصندوق بازنشستگی به جزتامین اجتماعی',
 4: 'حمل ونقل، انبارداری و ارتباطات',
 5: 'خودرو و ساخت قطعات',
 6: 'رایانه و فعالیت\u200cهای وابسته به آن',
 7: 'ساخت محصولات فلزی',
 8: 'سایر محصولات کانی غیرفلزی',
 9: 'سرمایه گذاریها',
 10: 'سیمان، آهک و گچ',
 11: 'فراورده های نفتی، کک و سوخت هسته ای',
 12: 'فلزات اساسی',
 13: 'قند و شکر',
 14: 'لاستیک و پلاستیک',
 15: 'ماشین آلات و تجهیزات',
 16: 'محصولات شیمیایی',
 17: 'محصولات غذایی و آشامیدنی به جز قند و شکر',
 18: 'مواد و محصولات دارویی'}

In [193]:
def Industry_Trend(Industry_code,OCM,pnt=False):
    # finding Industry
    Industry = Ind_dict[Industry_code]
    if pnt:
        print(Industry)
    
    out = Ind_OCM.loc[Ind_OCM.Industry==Industry,['year','Symbol']+OCM].sort_values(by='year').set_index('year')
    
    return([Industry,out])

In [221]:
year = 1397
res = {}
for x in range(len(Ind_dict)):
    temp = Industry_Trend(x,['Largest_Owner','Herfindhal','SSCL','SSCO','Gini','BZCL','Sumfive','First_Second'])
    res[temp[0]] = {'Number of Firms':temp[1].loc[year,'Symbol'],
                    'Largest_Owner':temp[1].loc[year,'Largest_Owner'],
                    'Herfindahl':temp[1].loc[year,'Herfindhal'],
                    'SSCL':temp[1].loc[year,'SSCL'],
                    'BZCL':temp[1].loc[year,'BZCL'],
                    'Gini':temp[1].loc[year,'Gini'],
                    'SSCO':temp[1].loc[year,'SSCO'],
                    'Sumfive':temp[1].loc[year,'Sumfive'],
                    'First_Second':temp[1].loc[year,'First_Second']}

In [222]:
Indust = pd.DataFrame(res).T
Indust['LO_rank'] = Indust.Largest_Owner.rank(ascending=False)
Indust['HH_rank'] = Indust.Herfindahl.rank(ascending=False)
Indust['SL_rank'] = Indust.SSCL.rank(ascending=False)
Indust['BL_rank'] = Indust.BZCL.rank(ascending=False)
Indust['SO_rank'] = Indust.SSCO.rank(ascending=True)
Indust['G_rank'] = Indust.Gini.rank(ascending=False)
Indust['SF_rank'] = Indust.Sumfive.rank(ascending=False)
Indust['FS_rank'] = Indust.First_Second.rank(ascending=False)
# Indust = Indust[['Number of Firms', 'Largest_Owner','LO_rank', 'Herfindahl','HH_rank', 'SSCL','SS_rank', 'BZCL','BZ_rank']].sort_values(by='LO_rank')
np.round(Indust,3).sort_values(by='LO_rank')

Unnamed: 0,Number of Firms,Largest_Owner,Herfindahl,SSCL,BZCL,Gini,SSCO,Sumfive,First_Second,LO_rank,HH_rank,SL_rank,BL_rank,SO_rank,G_rank,SF_rank,FS_rank
محصولات غذایی و آشامیدنی به جز قند و شکر,7.0,57.749,0.353,0.956,0.997,0.791,0.018,73.576,10.622,1.0,1.0,1.0,1.0,1.0,1.0,6.0,2.0
مواد و محصولات دارویی,16.0,55.084,0.351,0.875,0.913,0.735,0.042,79.406,6.396,2.0,2.0,2.0,3.0,2.0,2.0,1.0,7.0
سیمان، آهک و گچ,15.0,53.223,0.326,0.868,0.918,0.725,0.045,77.156,6.178,3.0,3.0,3.0,2.0,3.0,3.0,2.0,8.0
ساخت محصولات فلزی,7.0,49.19,0.287,0.783,0.899,0.722,0.136,68.524,13.54,4.0,5.0,6.0,4.0,7.0,4.0,9.0,1.0
لاستیک و پلاستیک,7.0,48.053,0.287,0.792,0.844,0.674,0.078,71.291,6.012,5.0,6.0,4.0,6.0,4.0,10.0,8.0,9.0
حمل ونقل، انبارداری و ارتباطات,7.0,47.16,0.306,0.631,0.643,0.704,0.175,72.554,10.355,6.0,4.0,13.0,15.0,12.0,7.0,7.0,3.0
فراورده های نفتی، کک و سوخت هسته ای,7.0,46.504,0.275,0.766,0.825,0.615,0.104,77.003,2.692,7.0,8.0,7.0,7.0,6.0,15.0,3.0,18.0
سایر محصولات کانی غیرفلزی,16.0,46.411,0.269,0.722,0.803,0.711,0.161,65.826,8.636,8.0,9.0,8.0,9.0,9.0,6.0,14.0,4.0
رایانه و فعالیت‌های وابسته به آن,7.0,45.876,0.267,0.789,0.811,0.668,0.082,76.707,3.628,9.0,10.0,5.0,8.0,5.0,12.0,4.0,15.0
محصولات شیمیایی,29.0,44.201,0.278,0.629,0.678,0.621,0.155,74.398,6.011,10.0,7.0,14.0,14.0,8.0,14.0,5.0,10.0


In [27]:
################################################################################

In [28]:
OC_list = ['Largest_Owner','First_Second','First_Sumtwofour','Sumfive',
           'Gini','Herfindhal','SSCL','SSCO','SSDL','SSDO','BZCL','BZCO','BZDL']