In [1]:
import requests as rq
from io import BytesIO
import pandas as pd

In [2]:
url = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
body = {
    'mktId': 'STK',
    'trdDd': '20240510',
    'money': 1,
    'csvxls_isNo': 'false',
    'name': 'fileDown',
    'url': 'dbms/MDC/STAT/standard/MDCSTAT03901'
}
headers = {'Referer' : 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader'}
otp = rq.post(url,body,headers = headers).text

url = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
body = {
    'code': otp
}
stock_ticker = rq.post(url,body,headers = headers)
ticker = pd.read_csv(BytesIO(stock_ticker.content) , encoding = 'EUC_KR')

In [3]:
ticker

Unnamed: 0,종목코드,종목명,시장구분,업종명,종가,대비,등락률,시가총액
0,095570,AJ네트웍스,KOSPI,서비스업,4945,135,2.81,223774893255
1,006840,AK홀딩스,KOSPI,기타금융,15320,-60,-0.39,202952634520
2,027410,BGF,KOSPI,기타금융,3770,45,1.21,360852302070
3,282330,BGF리테일,KOSPI,유통업,133600,-1600,-1.18,2309129841600
4,138930,BNK금융지주,KOSPI,기타금융,8640,210,2.49,2782844104320
...,...,...,...,...,...,...,...,...
949,079980,휴비스,KOSPI,화학,3730,20,0.54,128685000000
950,005010,휴스틸,KOSPI,철강금속,4570,0,0.00,256779502750
951,000540,흥국화재,KOSPI,보험,4280,110,2.64,274958520600
952,000545,흥국화재우,KOSPI,보험,6830,-150,-2.15,5245440000


# 코스피, 코스닥 종목코드 가져오기

In [4]:
def getKospi(date):
    url = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
    body = {
        'mktId': 'STK',
        'trdDd': date,
        'money': 1,
        'csvxls_isNo': 'false',
        'name': 'fileDown',
        'url': 'dbms/MDC/STAT/standard/MDCSTAT03901'
    }
    headers = {'Referer' : 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader'}
    otp = rq.post(url,body,headers = headers).text

    url = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
    body = {
        'code': otp
    }
    stock_ticker = rq.post(url,body,headers = headers)
    ticker = pd.read_csv(BytesIO(stock_ticker.content) , encoding = 'EUC_KR')
    ticker['종목코드 + 날짜'] = ticker['종목코드']+'/'+date
    ticker = ticker.drop('종목코드',axis = 1)
    ticker = ticker[['종목코드 + 날짜', '종목명', '시장구분', '업종명', '종가', '대비', '등락률', '시가총액']]
    return ticker

In [5]:
def getKosdak(date):
    url = 'http://data.krx.co.kr/comm/fileDn/GenerateOTP/generate.cmd'
    body = {
        'mktId': 'KSQ',
        'segTpCd': 'ALL',
        'trdDd': date,
        'money': 1,
        'csvxls_isNo': 'false',
        'name': 'fileDown',
        'url': 'dbms/MDC/STAT/standard/MDCSTAT03901'
    }
    headers = {'Referer' : 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader'}
    otp = rq.post(url,body,headers = headers).text

    url = 'http://data.krx.co.kr/comm/fileDn/download_csv/download.cmd'
    body = {
        'code': otp
    }
    stock_ticker = rq.post(url,body,headers = headers)
    ticker = pd.read_csv(BytesIO(stock_ticker.content) , encoding = 'EUC_KR')
    ticker['종목코드 + 날짜'] = ticker['종목코드'].astype(str) +'/'+date
    ticker = ticker.drop('종목코드',axis = 1)
    ticker = ticker[['종목코드 + 날짜', '종목명', '시장구분', '업종명', '종가', '대비', '등락률', '시가총액']]
    return ticker

# 새로운 df정의

In [6]:
columns = ['종목코드 + 날짜', '종목명', '시장구분', '업종명', '종가', '대비', '등락률', '시가총액']
stock_ticker = pd.DataFrame(columns = columns)
stock_ticker.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 0 entries
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   종목코드 + 날짜  0 non-null      object
 1   종목명        0 non-null      object
 2   시장구분       0 non-null      object
 3   업종명        0 non-null      object
 4   종가         0 non-null      object
 5   대비         0 non-null      object
 6   등락률        0 non-null      object
 7   시가총액       0 non-null      object
dtypes: object(8)
memory usage: 132.0+ bytes


# 날짜별로 종목정보 가져오기

In [7]:
from datetime import date
from datetime import datetime, timedelta
import holidays
from itertools import chain
from tqdm import tqdm
import time

kr_holidays = holidays.KR()

date_str = "20240331"

date_obj = datetime.strptime(date_str,"%Y%m%d").date()

for year in tqdm(range(15)):

    while (date_obj in kr_holidays or date_obj.weekday() >= 5):
        date_obj -= timedelta(days=1)
    date_possible = date_obj.strftime("%Y%m%d")
    
    df = getKospi(date_possible)
    stock_ticker = pd.concat([stock_ticker,df])        
        
    date_obj = date(2023-year,3,31)
    time.sleep(1)
stock_ticker = stock_ticker.reset_index(drop=True)

  stock_ticker = pd.concat([stock_ticker,df])
100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [00:25<00:00,  1.68s/it]


In [8]:
kr_holidays = holidays.KR()

date_str = "20240331"

date_obj = datetime.strptime(date_str,"%Y%m%d").date()

for year in tqdm(range(15)):

    while (date_obj in kr_holidays or date_obj.weekday() >= 5):
        date_obj -= timedelta(days=1)
    date_possible = date_obj.strftime("%Y%m%d")
    
    df = getKosdak(date_possible)
    stock_ticker = pd.concat([stock_ticker,df])        
        
    date_obj = date(2023-year,3,31)
    time.sleep(1)
stock_ticker = stock_ticker.reset_index(drop=True)

100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [00:23<00:00,  1.56s/it]


In [9]:
stock_ticker
stock_ticker['종목명'] = stock_ticker['종목명'].str.strip()
stock_ticker

Unnamed: 0,종목코드 + 날짜,종목명,시장구분,업종명,종가,대비,등락률,시가총액
0,095570/20240329,AJ네트웍스,KOSPI,서비스업,4865,-255,-4.98,220154672535
1,006840/20240329,AK홀딩스,KOSPI,기타금융,15100,-460,-2.96,200038171100
2,027410/20240329,BGF,KOSPI,기타금융,3740,-55,-1.45,357980798340
3,282330/20240329,BGF리테일,KOSPI,유통업,117700,700,0.60,2034315736200
4,138930/20240329,BNK금융지주,KOSPI,기타금융,7770,-210,-2.63,2502627163260
...,...,...,...,...,...,...,...,...
32769,65510/20100331,휴비츠,KOSDAQ,의료·정밀기기,4980,80,1.63,46402644000
32770,84110/20100331,휴온스,KOSDAQ,제약,7380,0,0.00,64501200000
32771,24060/20100331,흥구석유,KOSDAQ,유통,2700,-215,-7.38,40500000000
32772,10240/20100331,흥국,KOSDAQ,기계·장비,4000,5,0.13,24645392000


In [10]:
stock_ticker.to_csv('날짜별주식정보.csv', index=False)

# 섹터 리스트 가져오기

In [58]:
import json

In [60]:
biz_day = '20240510'
url = f'''https://www.wiseindex.com/Index/GetIndexComponets?ceil_yn=0&dt={biz_day}&sec_cd=G15'''
data = rq.get(url).json()

In [63]:
data.keys()

dict_keys(['info', 'list', 'sector', 'size'])

In [81]:
data_pd = pd.json_normalize(data['sector'])

sector_list = data_pd['SEC_CD']
sector_list

0    G25
1    G35
2    G50
3    G40
4    G10
5    G20
6    G55
7    G30
8    G15
9    G45
Name: SEC_CD, dtype: object

# 날짜별 섹터코드 가져오기

In [77]:
def getSector(sector_code, date_possible):
    url = f'''https://www.wiseindex.com/Index/GetIndexComponets?ceil_yn=0&dt={date_possible}&sec_cd={sector_code}'''
    data = rq.get(url).json()
    df = pd.json_normalize(data['list'])

    result = pd.DataFrame()
    result['종목코드 + 날짜'] = df['CMP_CD'].astype(str) +'/'+date_possible
    result['종목명'] = df['CMP_KOR']
    result['섹터코드'] = sector_code
    return result

# 섹터 정보 합치기

In [83]:
columns = ['종목코드 + 날짜','종목명','섹터코드']
sector_ticker = pd.DataFrame(columns = columns)

In [84]:
kr_holidays = holidays.KR()

date_str = "20240331"

date_obj = datetime.strptime(date_str,"%Y%m%d").date()

for year in tqdm(range(15)):

    while (date_obj in kr_holidays or date_obj.weekday() >= 5):
        date_obj -= timedelta(days=1)
    date_possible = date_obj.strftime("%Y%m%d")

    for sector in sector_list:
        df = getSector(sector, date_possible)
        sector_ticker = pd.concat([sector_ticker,df])        
        
    date_obj = date(2023-year,3,31)
    time.sleep(1)
sector_ticker = sector_ticker.reset_index(drop=True)

100%|██████████████████████████████████████████████████████████████████████████████████| 15/15 [01:15<00:00,  5.03s/it]


In [97]:
filter = sector_ticker['종목코드 + 날짜'] == '095660/20150331'
sector_ticker[filter]

Unnamed: 0,종목코드 + 날짜,종목명,섹터코드
20021,095660/20150331,네오위즈,G45


In [100]:
filter = sector_ticker['종목명'] == '네오위즈'
sector_ticker[filter]

Unnamed: 0,종목코드 + 날짜,종목명,섹터코드
724,095660/20240329,네오위즈,G50
3153,095660/20230331,네오위즈,G50
5529,095660/20220331,네오위즈,G50
7744,095660/20210331,네오위즈,G50
9866,095660/20200331,네오위즈,G50
12751,095660/20190329,네오위즈,G45
14671,095660/20180330,네오위즈,G45
16550,095660/20170331,네오위즈,G45
18312,095660/20160331,네오위즈,G45
20021,095660/20150331,네오위즈,G45


In [98]:
filter = stock_ticker['종목코드 + 날짜'] == '095660/20150331'
stock_ticker[filter]

Unnamed: 0,종목코드 + 날짜,종목명,시장구분,업종명,종가,대비,등락률,시가총액


In [99]:
filter = stock_ticker['종목명'] == '네오위즈'
stock_ticker[filter]

Unnamed: 0,종목코드 + 날짜,종목명,시장구분,업종명,종가,대비,등락률,시가총액
13954,095660/20240329,네오위즈,KOSDAQ,디지털컨텐츠,21850,-300,-1.35,481436760150
15662,095660/20230331,네오위즈,KOSDAQ,디지털컨텐츠,40000,-100,-0.25,881348760000
17287,095660/20220331,네오위즈,KOSDAQ,디지털컨텐츠,24900,-200,-0.8,548639603100
18834,095660/20210331,네오위즈,KOSDAQ,디지털컨텐츠,23400,-200,-0.85,515589024600
20323,095660/20200331,네오위즈,KOSDAQ,디지털컨텐츠,17300,1850,11.97,379129638400
21723,095660/20190329,네오위즈,KOSDAQ,디지털컨텐츠,14100,-50,-0.35,309001612800
23049,095660/20180330,네오위즈,KOSDAQ,디지털컨텐츠,19850,-250,-1.24,435012908800
28766,42420/20130329,네오위즈,KOSDAQ,기타서비스,14200,50,0.35,130027497200
29774,42420/20120330,네오위즈,KOSDAQ,기타서비스,18500,350,1.93,169402021000
30812,42420/20110331,네오위즈,KOSDAQ,기타서비스,12750,200,1.59,116750041500


In [90]:
temp = set(stock_ticker['종목코드 + 날짜']).symmetric_difference(set(sector_ticker['종목코드 + 날짜']))

In [93]:
temp

{'58450/20140331',
 '095660/20150331',
 '032685/20170331',
 '019540/20110331',
 '131030/20110331',
 '323230/20200331',
 '225570/20160331',
 '087220/20100331',
 '52330/20130329',
 '060370/20130329',
 '064550/20100331',
 '39560/20130329',
 '18290/20130329',
 '198440/20150331',
 '34810/20110331',
 '058370/20150331',
 '016600/20150331',
 '004367/20140331',
 '087730/20160331',
 '52710/20160331',
 '95340/20110331',
 '45660/20160331',
 '50470/20110331',
 '60570/20100331',
 '095610/20150331',
 '004590/20150331',
 '000835/20140331',
 '74430/20150331',
 '58480/20100331',
 '068790/20140331',
 '066410/20150331',
 '6580/20110331',
 '041140/20160331',
 '32680/20130329',
 '900260/20210331',
 '32040/20100331',
 '039420/20140331',
 '052300/20130329',
 '067310/20160331',
 '044180/20160331',
 '033290/20100331',
 '065560/20120330',
 '49960/20100331',
 '84990/20140331',
 '222810/20160331',
 '000145/20200331',
 '53060/20100331',
 '043710/20160331',
 '65420/20150331',
 '49960/20130329',
 '95610/20120330',
 '

In [103]:
stock_ticker['종목명'].unique().size

4560

In [104]:
sector_ticker['종목명'].unique().size

2972

In [120]:
list = stock_ticker['종목코드 + 날짜'].apply(lambda x : x.split('/'))
set1 = list.apply(lambda x : x[0])

In [121]:
set1

0        095570
1        006840
2        027410
3        282330
4        138930
          ...  
32769     65510
32770     84110
32771     24060
32772     10240
32773     37440
Name: 종목코드 + 날짜, Length: 32774, dtype: object

In [122]:
list = sector_ticker['종목코드 + 날짜'].apply(lambda x : x.split('/'))
set2 = list.apply(lambda x : x[0])

In [123]:
set2

0        005380
1        000270
2        012330
3        161390
4        090430
          ...  
28804    011050
28805    038320
28806    033280
28807    093640
28808    074140
Name: 종목코드 + 날짜, Length: 28809, dtype: object

In [124]:
set1.unique().size

4207

In [125]:
set2.unique().size

2980

In [126]:
sector_ticker

Unnamed: 0,종목코드 + 날짜,종목명,섹터코드
0,005380/20240329,현대차,G25
1,000270/20240329,기아,G25
2,012330/20240329,현대모비스,G25
3,161390/20240329,한국타이어앤테크놀로지,G25
4,090430/20240329,아모레퍼시픽,G25
...,...,...,...
28804,011050/20100331,케드콤,G45
28805,038320/20100331,어울림정보,G45
28806,033280/20100331,어울림엘시스,G45
28807,093640/20100331,케이알엠,G45


In [127]:
stock_ticker

Unnamed: 0,종목코드 + 날짜,종목명,시장구분,업종명,종가,대비,등락률,시가총액
0,095570/20240329,AJ네트웍스,KOSPI,서비스업,4865,-255,-4.98,220154672535
1,006840/20240329,AK홀딩스,KOSPI,기타금융,15100,-460,-2.96,200038171100
2,027410/20240329,BGF,KOSPI,기타금융,3740,-55,-1.45,357980798340
3,282330/20240329,BGF리테일,KOSPI,유통업,117700,700,0.60,2034315736200
4,138930/20240329,BNK금융지주,KOSPI,기타금융,7770,-210,-2.63,2502627163260
...,...,...,...,...,...,...,...,...
32769,65510/20100331,휴비츠,KOSDAQ,의료·정밀기기,4980,80,1.63,46402644000
32770,84110/20100331,휴온스,KOSDAQ,제약,7380,0,0.00,64501200000
32771,24060/20100331,흥구석유,KOSDAQ,유통,2700,-215,-7.38,40500000000
32772,10240/20100331,흥국,KOSDAQ,기계·장비,4000,5,0.13,24645392000


In [128]:
df = pd.merge(stock_ticker, sector_ticker,on = '종목코드 + 날짜',how = 'outer')

In [129]:
df

Unnamed: 0,종목코드 + 날짜,종목명_x,시장구분,업종명,종가,대비,등락률,시가총액,종목명_y,섹터코드
0,095570/20240329,AJ네트웍스,KOSPI,서비스업,4865,-255,-4.98,220154672535,AJ네트웍스,G20
1,006840/20240329,AK홀딩스,KOSPI,기타금융,15100,-460,-2.96,200038171100,AK홀딩스,G15
2,027410/20240329,BGF,KOSPI,기타금융,3740,-55,-1.45,357980798340,BGF,G15
3,282330/20240329,BGF리테일,KOSPI,유통업,117700,700,0.60,2034315736200,BGF리테일,G25
4,138930/20240329,BNK금융지주,KOSPI,기타금융,7770,-210,-2.63,2502627163260,BNK금융지주,G40
...,...,...,...,...,...,...,...,...,...,...
38583,037020/20100331,,,,,,,,한와이어리스,G45
38584,038320/20100331,,,,,,,,어울림정보,G45
38585,033280/20100331,,,,,,,,어울림엘시스,G45
38586,093640/20100331,,,,,,,,케이알엠,G45


In [130]:
data_pd = pd.json_normalize(data['list'])

In [131]:
data_pd

Unnamed: 0,IDX_CD,IDX_NM_KOR,ALL_MKT_VAL,CMP_CD,CMP_KOR,MKT_VAL,WGT,S_WGT,CAL_WGT,SEC_CD,SEC_NM_KOR,SEQ,TOP60,APT_SHR_CNT
0,G15,WICS 소재,113224296,005490,POSCO홀딩스,23400014,20.67,20.67,1.0,G15,소재,1,7,58354149
1,G15,WICS 소재,113224296,051910,LG화학,17936103,15.84,36.51,1.0,G15,소재,2,7,45179100
2,G15,WICS 소재,113224296,086520,에코프로,10720832,9.47,45.98,1.0,G15,소재,3,7,109173439
3,G15,WICS 소재,113224296,003670,포스코퓨처엠,7604952,6.72,52.69,1.0,G15,소재,4,7,27112127
4,G15,WICS 소재,113224296,010130,고려아연,4671501,4.13,56.82,1.0,G15,소재,5,7,9408865
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,G15,WICS 소재,113224296,000760,이화산업,11121,0.01,98.94,1.0,G15,소재,236,7,756000
236,G15,WICS 소재,113224296,026910,광진실업,10748,0.01,98.95,1.0,G15,소재,237,7,3266757
237,G15,WICS 소재,113224296,014970,삼륭물산,10468,0.01,98.96,1.0,G15,소재,238,7,2722500
238,G15,WICS 소재,113224296,001770,SHD,9966,0.01,98.97,1.0,G15,소재,239,7,546695


In [133]:
stock_ticker['업종명'].unique()

array(['서비스업', '기타금융', '유통업', '섬유의복', '운수창고업', '음식료품', '비금속광물', '증권',
       '보험', '전기전자', '화학', '건설업', '철강금속', '기계', '운수장비', '광업', '의약품',
       '통신업', '기타제조업', '전기가스업', '종이목재', '은행', '의료정밀', '농업, 임업 및 어업',
       '기계·장비', '금융', '반도체', '통신장비', '컴퓨터서비스', '운송장비·부품', '기타서비스',
       '방송서비스', '유통', '제약', '건설', '전기·가스·수도', '일반전기전자', '출판·매체복제',
       '섬유·의류', '의료·정밀기기', '오락·문화', '금속', '소프트웨어', 'IT부품', '디지털컨텐츠',
       '기타제조', '비금속', '운송', '인터넷', '정보기기', '음식료·담배', '종이·목재', '통신서비스',
       '숙박·음식'], dtype=object)

In [134]:
list = stock_ticker['종목코드 + 날짜'].apply(lambda x : x.split('/'))
set1 = list.apply(lambda x : x[1])

In [136]:
set1.unique()

array(['20240329', '20230331', '20220331', '20210331', '20200331',
       '20190329', '20180330', '20170331', '20160331', '20150331',
       '20140331', '20130329', '20120330', '20110331', '20100331'],
      dtype=object)