In [49]:
import OpenDartReader
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from itertools import combinations
import networkx as nx

In [2]:

api_key = 'd0d913cb52aceb226c589884da8be4006d93381c'
dart = OpenDartReader(api_key)

In [3]:
df = pd.DataFrame()

for k in [2020,2021,2022]:
    temp = dart.report('대신증권','임원',k)
    temp['year'] = k
    df = pd.concat([df,temp])
    
df.columns = ['접수번호', '법인구분', '고유번호', '법인명', '이름', '성별', '생년월일', '직위', '등기임원여부', '상근여부', '담당업무', '주요경력', '최대주주와의관계', '재직기간', '임기만료일', '보고서연도']
df.to_csv("./df_daishin_executive.csv", index = False)


In [4]:
df = pd.DataFrame()

for k in [2020,2021,2022]:
    temp = dart.report('대신증권','최대주주',k)
    temp['year'] = k
    df = pd.concat([df,temp])
    
df.columns = ['접수번호', '법인구분', '고유번호', '법인명', '주식의종류', '이름', '관계', '기초주식수', '기초주식지분율', '기말주식수', '기말주식지분율', '비고', '보고서연도']
df.to_csv("./df_daishin_shareholder.csv", index = False)

In [5]:
daishin_executive = pd.read_csv('./df_daishin_executive.csv', sep=",", encoding = 'UTF-8')
daishin_shareholder = pd.read_csv('./df_daishin_shareholder.csv', sep=",", encoding = 'UTF-8')

In [6]:
def getUniversity(x):
    return x.split('\n')[0]

def getMaster(x):
    if x.find('석사')!=-1:
        return 1
    else:
        return 0
    
def getPhD(x):
    if x.find('박사')!=-1:
        return 1
    else:
        return 0
    
daishin_executive['대학교'] = daishin_executive['주요경력'].apply(lambda x : getUniversity(x))
daishin_executive['석사여부'] = daishin_executive['주요경력'].apply(lambda x : getMaster(x))
daishin_executive['박사여부'] = daishin_executive['주요경력'].apply(lambda x : getPhD(x))

# 재직 시작일 추출
daishin_executive['시작일'] = daishin_executive['재직기간'].str.split('~').str[0]

# 재직 시작일을 연도로 변환
daishin_executive['시작일'] = pd.to_datetime(daishin_executive['시작일'], format='%Y.%m.%d')

# 현재 날짜 계산
today = pd.to_datetime('today')

# 경력 계산
daishin_executive['경력'] = (today - daishin_executive['시작일']).dt.days // 365

#임기만료까지 남은 기간
daishin_executive[['연도', '월', '일']] = daishin_executive['임기만료일'].str.extract(r'(\d{4})년 (\d{2})월 (\d{2})일')
expiration_date = pd.to_datetime(daishin_executive['연도'] + daishin_executive['월'] + daishin_executive['일'], format='%Y%m%d')
daishin_executive['임기만료까지기간'] = (expiration_date - today).dt.days

# 생년월일에서 연도와 월 추출
daishin_executive[['생년', '월']] = daishin_executive['생년월일'].str.extract(r'(\d{4})년 (\d{2})월')

# 현재 연도와 월 계산
current_year = today.year
current_month = today.month

# 만나이 계산
daishin_executive['만나이'] = current_year - daishin_executive['생년'].astype(int)
daishin_executive.loc[daishin_executive['월'].astype(int) > current_month, '만나이'] -= 1

In [7]:
daishin_executive.head()

Unnamed: 0,접수번호,법인구분,고유번호,법인명,이름,성별,생년월일,직위,등기임원여부,상근여부,...,석사여부,박사여부,시작일,경력,연도,월,일,임기만료까지기간,생년,만나이
0,20210311001204,Y,110893,대신증권,이어룡,여,1953년 09월,회장,등기임원,상근,...,0,0,2004-09-24,18,2021,9,19,-787,1953,69
1,20210311001204,Y,110893,대신증권,신인식,남,1962년 01월,전무,미등기임원,상근,...,0,0,2013-04-01,10,2021,1,19,-787,1962,61
2,20210311001204,Y,110893,대신증권,길기모,남,1968년 12월,전무,미등기임원,상근,...,0,0,2019-04-01,4,2021,12,31,-775,1968,54
3,20210311001204,Y,110893,대신증권,조경순,남,1964년 01월,전무,미등기임원,상근,...,1,0,2014-01-01,9,2021,1,19,-787,1964,59
4,20210311001204,Y,110893,대신증권,홍대한,남,1963년 05월,전무,미등기임원,상근,...,0,0,2014-01-01,9,2021,5,31,-500,1963,60


In [8]:
drop_col = ['접수번호', '법인구분','고유번호','법인명','재직기간','보고서연도','시작일', '생년월일', '생년', '월','연도','일','임기만료일' ]

In [9]:
daishin_executive = daishin_executive.drop(drop_col, axis=1)6

In [10]:
drop_col2=['주요경력', '최대주주와의관계']
daishin_executive = daishin_executive.drop(drop_col2, axis=1)

In [11]:
def getUniversity_name(x):
    if x.split(' ')[0] == 'University':
        return 'Bath'
    else :
        return x.split(' ')[0]
daishin_executive['대학교'] = daishin_executive['대학교'].apply(lambda x : getUniversity_name(x))

In [12]:
daishin_executive.head()

Unnamed: 0,이름,성별,직위,등기임원여부,상근여부,담당업무,대학교,석사여부,박사여부,경력,임기만료까지기간,만나이
0,이어룡,여,회장,등기임원,상근,-,상명여자사범대,0,0,18,-787,69
1,신인식,남,전무,미등기임원,상근,-,성균관대,0,0,10,-787,61
2,길기모,남,전무,미등기임원,상근,리스크관리부문장,서울대,0,0,4,-775,54
3,조경순,남,전무,미등기임원,상근,대외협력담당,연세대,1,0,9,-787,59
4,홍대한,남,전무,미등기임원,상근,준법감시인\n/준법지원부문장,서강대,0,0,9,-500,60


In [13]:
def commonidx(df, col, uniquevalue):
    col_index_lst=[]
    for i in range(len(uniquevalue)):
        col_i_idx = df[df[col] == uniquevalue[i]].index.tolist()
        col_index_lst.append(col_i_idx)
    return col_index_lst

In [14]:
sex_idx=commonidx(daishin_executive, '성별', ['여','남'])
position_idx = commonidx(daishin_executive, '직위', daishin_executive['직위'].unique().tolist())
registeredOfficer_idx = commonidx(daishin_executive, '등기임원여부', daishin_executive['등기임원여부'].unique().tolist())
fullTime_idx = commonidx(daishin_executive, '상근여부', daishin_executive['상근여부'].unique().tolist())
university_idx = commonidx(daishin_executive, '대학교', daishin_executive['대학교'].unique().tolist())
master_idx = commonidx(daishin_executive, '석사여부', daishin_executive['석사여부'].unique().tolist())
doctor_idx = commonidx(daishin_executive, '박사여부', daishin_executive['박사여부'].unique().tolist())
career_idx = commonidx(daishin_executive, '경력', daishin_executive['경력'].unique().tolist())
expiration_idx = commonidx(daishin_executive, '임기만료까지기간', daishin_executive['임기만료까지기간'].unique().tolist())
age_idx = commonidx(daishin_executive, '만나이', daishin_executive['만나이'].unique().tolist())

In [15]:
sex_idx[0]

[0, 15, 36, 50, 74, 84]

In [16]:
for i in sex_idx[0]:
    print(daishin_executive.loc[i,'이름'])

이어룡
이순남
이어룡
이순남
이어룡
이순남


In [17]:
def idxtoname(df, idxlist):
    name_col_list = []
    for i in idxlist:
        name_list=[]
        for j in i:
            name_list.append(df.loc[j, '이름']) 
        name_col_list.append(name_list)
    return name_col_list

In [18]:
sex_name = idxtoname(daishin_executive, sex_idx)
position_name = idxtoname(daishin_executive, position_idx)
registeredOfficer_name = idxtoname(daishin_executive, registeredOfficer_idx)
fullTime_name = idxtoname(daishin_executive, fullTime_idx)
university_name = idxtoname(daishin_executive, university_idx)
master_name = idxtoname(daishin_executive, master_idx)
doctor_name = idxtoname(daishin_executive, doctor_idx)
career_name = idxtoname(daishin_executive, career_idx)
expiration_name = idxtoname(daishin_executive, expiration_idx)
age_name = idxtoname(daishin_executive, age_idx)

In [19]:
sex_name[0]

['이어룡', '이순남', '이어룡', '이순남', '이어룡', '이순남']

In [24]:
def combinations_name(name_col_list):
    permutations_list=[]
    for i in name_col_list:
        permutations_list.append(list(combinations(i, 2)))
    return permutations_list

In [27]:
sex_2 = combinations_name(sex_name)
position_2 = combinations_name(position_name)
registeredOfficer_2 = combinations_name(registeredOfficer_name)
fullTime_2 = combinations_name(fullTime_name)
university_2 = combinations_name(university_name)
master_2 = combinations_name(master_name)
doctor_2 = combinations_name(doctor_name)
career_2 = combinations_name(career_name)
expiration_2 = combinations_name(expiration_name)
age_2 = combinations_name(age_name)

In [32]:
sex_2

[[('이어룡', '이순남'),
  ('이어룡', '이어룡'),
  ('이어룡', '이순남'),
  ('이어룡', '이어룡'),
  ('이어룡', '이순남'),
  ('이순남', '이어룡'),
  ('이순남', '이순남'),
  ('이순남', '이어룡'),
  ('이순남', '이순남'),
  ('이어룡', '이순남'),
  ('이어룡', '이어룡'),
  ('이어룡', '이순남'),
  ('이순남', '이어룡'),
  ('이순남', '이순남'),
  ('이어룡', '이순남')],
 [('신인식', '길기모'),
  ('신인식', '조경순'),
  ('신인식', '홍대한'),
  ('신인식', '이정화'),
  ('신인식', '진승욱'),
  ('신인식', '권택현'),
  ('신인식', '박성준'),
  ('신인식', '정연규'),
  ('신인식', '정재중'),
  ('신인식', '양홍석'),
  ('신인식', '김호중'),
  ('신인식', '문병식'),
  ('신인식', '정연우'),
  ('신인식', '임민수'),
  ('신인식', '김수창'),
  ('신인식', '이재우'),
  ('신인식', '홍종국'),
  ('신인식', '최근영'),
  ('신인식', '김상원'),
  ('신인식', '오익근'),
  ('신인식', '나유석'),
  ('신인식', '신재범'),
  ('신인식', '강준규'),
  ('신인식', '강윤기'),
  ('신인식', '김성원'),
  ('신인식', '정기동'),
  ('신인식', '박현식'),
  ('신인식', '이지원'),
  ('신인식', '김병철'),
  ('신인식', '이창세'),
  ('신인식', '조홍희'),
  ('신인식', '김범철'),
  ('신인식', '송혁'),
  ('신인식', '송혁'),
  ('신인식', '길기모'),
  ('신인식', '유창범'),
  ('신인식', '박성준'),
  ('신인식', '홍대한'),
  ('신인식', '이정화'),
  ('신인식', '진승욱'),
  ('신인식', '

In [46]:
network_df

Unnamed: 0,source,target
0,신인식,길기모
1,신인식,조경순
2,신인식,홍대한
3,신인식,이정화
4,신인식,진승욱
...,...,...
5990,김창수,김성호
5991,김창수,김범철
5992,원윤희,김성호
5993,원윤희,김범철


In [38]:
col_name = ['source', 'target']
for i in sex_2:
    network_df.append(pd.DataFrame(i, columns=col_name))

  network_df.append(pd.DataFrame(i, columns=col_name))
  network_df.append(pd.DataFrame(i, columns=col_name))


In [39]:
network_df

Unnamed: 0,source,target
0,신인식,길기모
1,신인식,조경순
2,신인식,홍대한
3,신인식,이정화
4,신인식,진승욱
...,...,...
5990,김창수,김성호
5991,김창수,김범철
5992,원윤희,김성호
5993,원윤희,김범철


In [52]:
network_df['weight'] = network_df.groupby(['source', 'target'])['source'].transform('size')
network_df = network_df.dropna()

G = nx.from_pandas_edgelist(network_df, 'source', 'target',
                            create_using=nx.DiGraph(), edge_attr='weight')

network_df

Unnamed: 0,source,target,weight
0,신인식,길기모,3
1,신인식,조경순,1
2,신인식,홍대한,2
3,신인식,이정화,3
4,신인식,진승욱,2
...,...,...,...
5990,김창수,김성호,2
5991,김창수,김범철,3
5992,원윤희,김성호,2
5993,원윤희,김범철,3


In [59]:
from pyvis.network import Network

net = Network(notebook=True)
net.from_nx(G)
net.show_buttons(filter_=['nodes'])
net.show('example3.html')

example3.html


In [57]:
def make_df(network_df,col_name):
    for i in col_name:
        df=pd.DataFrame(i, columns=col_name)
        pd.concat([network_df,df],ignore_index = True)

In [45]:
network_df=make_df(network_df,position_2)

ValueError: all arrays must be same length

In [43]:
position_2 = list(filter(None, position_2))

In [44]:
position_2

[[('이어룡', '이어룡'), ('이어룡', '이어룡'), ('이어룡', '이어룡')],
 [('신인식', '길기모'),
  ('신인식', '조경순'),
  ('신인식', '홍대한'),
  ('신인식', '이정화'),
  ('신인식', '진승욱'),
  ('신인식', '권택현'),
  ('신인식', '박성준'),
  ('신인식', '정연규'),
  ('신인식', '길기모'),
  ('신인식', '유창범'),
  ('신인식', '박성준'),
  ('신인식', '홍대한'),
  ('신인식', '이정화'),
  ('신인식', '진승욱'),
  ('신인식', '권택현'),
  ('신인식', '정연규'),
  ('신인식', '김호중'),
  ('신인식', '정연우'),
  ('신인식', '이재우'),
  ('신인식', '길기모'),
  ('신인식', '박성준'),
  ('신인식', '이정화'),
  ('신인식', '권택현'),
  ('신인식', '정연규'),
  ('신인식', '김호중'),
  ('신인식', '정연우'),
  ('신인식', '이재우'),
  ('길기모', '조경순'),
  ('길기모', '홍대한'),
  ('길기모', '이정화'),
  ('길기모', '진승욱'),
  ('길기모', '권택현'),
  ('길기모', '박성준'),
  ('길기모', '정연규'),
  ('길기모', '길기모'),
  ('길기모', '유창범'),
  ('길기모', '박성준'),
  ('길기모', '홍대한'),
  ('길기모', '이정화'),
  ('길기모', '진승욱'),
  ('길기모', '권택현'),
  ('길기모', '정연규'),
  ('길기모', '김호중'),
  ('길기모', '정연우'),
  ('길기모', '이재우'),
  ('길기모', '길기모'),
  ('길기모', '박성준'),
  ('길기모', '이정화'),
  ('길기모', '권택현'),
  ('길기모', '정연규'),
  ('길기모', '김호중'),
  ('길기모', '정연우'),
  ('길기모', '이재