In [1]:
import os
import requests
import zipfile
import xml.etree.ElementTree as ET
from io import BytesIO
import pandas as pd
from datetime import datetime

# 오늘 날짜를 yyyymmdd 형태로 가져오기
class Dart_Client() :
    def __init__(self) : 
        try :
            self.api_key = os.getenv("DART_API_KEY")            
        except : 
            print('error with api key')
        try :
            self.corp_codes = self.load_corp_codes()
        except : 
            self.corp_codes = self.get_corp_codes(save=True)
        self.today = datetime.today().strftime('%Y%m%d')
    
    def get_corp_codes(self, save=False) :
        url = 'https://opendart.fss.or.kr/api/corpCode.xml'
        request_url = f"{url}?crtfc_key={self.api_key}"
        response = requests.get(request_url)
        print('[SYSTEM] 서버로부터 기업코드 다운로드 완료')

        # 응답 확인
        if response.status_code == 200:
            # ZIP 파일을 메모리에 로드
            zip_file = BytesIO(response.content)
            
            # ZIP 파일 열기
            with zipfile.ZipFile(zip_file, 'r') as zip_ref:
                # ZIP 파일 내 XML 파일 읽기
                with zip_ref.open('CORPCODE.xml') as file:
                    xml_content = file.read()
                    root = ET.fromstring(xml_content)

            # corp_name, corp_code, stock_code 추출
            data = []
            for list_element in root.findall('.//list'):
                corp_code = list_element.find('corp_code').text
                corp_name = list_element.find('corp_name').text
                stock_code = list_element.find('stock_code').text
                data.append((corp_name, corp_code, stock_code))
        
        df = pd.DataFrame(data, columns=['stock_name', 'corp_code', 'stock_code'])
        df['corp_code'] = df['corp_code'].apply(lambda x : f'`{str(x)}')

        if save : df.to_csv('dart_corp_codes.csv', encoding='cp949', errors='ignore', index=False)
        return df
    
    def load_corp_codes(self) :
        df = pd.read_csv('dart_corp_codes.csv', encoding='cp949')
        self.corp_codes = df
        print('[SYSTEM] 로컬로부터 기업코드 조회 완료')
        return df

    def get_corp_code_by_name(self, stock_name) :        
        df = self.corp_codes
        return df[df['stock_name'].isin([stock_name])]['corp_code'].values[0].replace('`','')
    
    def get_company_info(self, corp_code) : 
        cols = ['stock_code','corp_name', 'stock_name', 'ceo_nm', 'corp_cls', 'jurir_no', 'bizr_no', 'hm_url', 'ir_url', 'jurir_no', 'bizr_no']
        result = pd.DataFrame()
        url = f"https://opendart.fss.or.kr/api/company.json?crtfc_key={self.api_key}&corp_code={corp_code}"
        response = requests.get(url)
        json_data = response.json()
        for col in cols :
            result[col] = [json_data[col]]
        return result
    
    # def get_availables(self, corp_code, start=20150101, end = None) :       
    #     """조회가능한 정기보고서 연도를 출력합니다."""

    #     if end == None :
    #         end = self.today

    #     url = f"https://opendart.fss.or.kr/api/list.json?crtfc_key={self.api_key}&corp_code={corp_code}&bgn_de={start}&end_de={end}&pblntf_ty=A&pblntf_detail_ty=11011&corp_cls=Y&page_no=1&page_count=99"

    #     response = requests.get(url)
    #     json_data = response.json()

    #     start = json_data['list'][0]['bsns_year']
    #     start = json_data['list'][-1]['bsns_year']

    #     print(f"{start}년에서 {end}년 까지의 데이터 조회가 가능합니다.")
    #     return start, end
        
    def get_single_fs(self, corp_code, bsns_year = '2023', reprt_code = '11011', fs_div = 'CFS', simple=False) :
        """# 1분기보고서 : 11013 | 반기보고서 : 11012 | 3분기보고서 : 11014 | 사업보고서 : 11011
        # OFS:재무제표, CFS:연결재무제표"""
        if simple :
            url = f'https://opendart.fss.or.kr/api/fnlttSinglAcnt.json?crtfc_key={self.api_key}&corp_code={corp_code}&bsns_year={bsns_year}&reprt_code={reprt_code}'
        else :
            url = f'https://opendart.fss.or.kr/api/fnlttSinglAcntAll.json?crtfc_key={self.api_key}&corp_code={corp_code}&bsns_year={bsns_year}&reprt_code={reprt_code}&fs_div={fs_div}'
        response = requests.get(url)
        json_data = response.json()

        accounts = []
        cols = ['fs_nm','account_id','account_nm','sj_div','thstrm_nm','thstrm_dt','thstrm_amount','frmtrm_amount', 'bfefrmtrm_amount','currency']
        for account in json_data['list'] :
            result = []
            for col in cols :
                result.append(account.get(col))
            accounts.append(result)

        df = pd.DataFrame(accounts, columns=cols)

        return df

    def get_single_index(self, corp_code, bsns_year = '2023', reprt_code = '11011', idx_cl_code = 'M210000') :
        """# 1분기보고서 : 11013 | 반기보고서 : 11012 | 3분기보고서 : 11014 | 사업보고서 : 11011
        수익성지표 : M210000 안정성지표 : M220000 성장성지표 : M230000 활동성지표 : M240000 전부 : 
        
        # OFS:재무제표, CFS:연결재무제표"""
        url = f'https://opendart.fss.or.kr/api/fnlttSinglIndx.json?crtfc_key={self.api_key}&corp_code={corp_code}&bsns_year={bsns_year}&reprt_code={reprt_code}&idx_cl_code={idx_cl_code}'
        response = requests.get(url)
        json_data = response.json()

        accounts = []
        cols = ['bsns_year','idx_cl_nm', 'idx_nm', 'idx_val']
        for index in json_data['list'] :
            result = []
            for col in cols :
                result.append(index.get(col))
            accounts.append(result)

        df = pd.DataFrame(accounts, columns=cols)

        return df
dart = Dart_Client()


[SYSTEM] 로컬로부터 기업코드 조회 완료


In [4]:
def get_standard(sj_div) :
    url =  f"https://opendart.fss.or.kr/api/xbrlTaxonomy.json?crtfc_key={os.getenv('DART_API_KEY')}&sj_div={sj_div}"
    response = requests.get(url).json()
    accounts =[]
    for account in response['list'] :
        accounts.append((account['account_id'],account['label_kor']))
    df = pd.DataFrame(accounts)
    return df

def sorting_fs(standard, df) :
    sample_df = df.T.reset_index()
    result = copy.deepcopy(standard)
    for idx, row in result.iterrows() :
        id = row[0]
        name = row[1]
        val = sample_df[(sample_df['index'].isin([name])) | (sample_df['account_id'].isin([id]))]['2023']
        
        if len(val) > 0 :
            val = val.values[0]
            standard.at[idx, '2023'] = val
    return result

sj_div_bs = "BS1"
sj_div_dcis = "DCIS1"



IS = get_standard(sj_div_bs)
BS = get_standard(sj_div_dcis)




dart = Dart_Client()
corp_code = dart.get_corp_code_by_name('삼성전자')

# 단일 재무제표
bsns_years  = ['2023', '2022', '2021']
reprt_code = '11011'
fs_div = 'CFS'

result = pd.DataFrame()
for bsns_year in bsns_years:
    df = dart.get_single_fs(corp_code=corp_code, bsns_year=bsns_year, reprt_code=reprt_code, fs_div=fs_div, simple=False)
    df = df[df['sj_div'].isin( ['BS', 'IS', 'CIS'])]
    df = df[['account_nm', 'account_id','thstrm_amount']]    
    df.columns = ['account_nm', 'account_id',bsns_year]

    for idx, row in IS.iterrows() :
        data = df[(df['account_nm'].isin([row[1]])) | (df['account_id'].isin([row[0]]))]
        if len(data) > 0 :
            IS.at[idx, bsns_year] = data[bsns_year].values[0]
            
    for idx, row in BS.iterrows() :
        data = df[(df['account_nm'].isin([row[1]])) | (df['account_id'].isin([row[0]]))]
        if len(data) > 0 :
            BS.at[idx, bsns_year] = data.values[0]


[SYSTEM] 로컬로부터 기업코드 조회 완료


In [5]:
df

Unnamed: 0,account_nm,account_id,2021
0,유동자산,ifrs-full_CurrentAssets,218163185000000
1,현금및현금성자산,ifrs-full_CashAndCashEquivalents,39031415000000
2,단기금융상품,dart_ShortTermDepositsNotClassifiedAsCashEquiv...,81708986000000
3,단기상각후원가금융자산,-표준계정코드 미사용-,3369034000000
4,단기당기손익-공정가치금융자산,ifrs-full_CurrentFinancialAssetsAtFairValueThr...,40757000000
...,...,...,...
79,해외사업장환산외환차이,ifrs-full_GainsLossesOnExchangeDifferencesOnTr...,7283620000000
80,현금흐름위험회피파생상품평가손익,ifrs-full_GainsLossesOnCashFlowHedgesNetOfTax,50410000000
81,총포괄손익,ifrs-full_ComprehensiveIncome,49909749000000
82,지배기업 소유주지분,ifrs-full_ComprehensiveIncomeAttributableToOwn...,49037912000000


In [8]:
IS.dropna(subset=bsns_years, how='all').to_csv('삼성전자.csv', encoding='cp949')
BS.dropna(subset=bsns_years, how='all').to_csv('삼성전자2.csv', encoding='cp949')


In [40]:
IS = get_standard('BS1')
for item in  IS[1] :
    if '유동성' in item :
        print(item)

유동성장기차입금
유동성장기미지급금
유동성사채
유동성전환사채
유동성신주인수권부사채
유동성전환상환우선주부채
유동성교환사채
유동성금융리스부채


In [16]:
df[df['account_nm'] == '부채와자본총계']

Unnamed: 0,account_nm,account_id,2021
52,부채와자본총계,ifrs-full_EquityAndLiabilities,426621158000000


In [17]:
df.to_csv('삼성전자3.csv', encoding='cp949')

In [161]:
result.dropna().to_csv('삼성전자2.csv', encoding='cp949')

In [74]:
result.T.to_csv('삼성전자.csv', encoding='cp949')

In [52]:
result = result.reset_index()  # Reset the index to make 'account_nm' a column
result = result.drop_duplicates(subset='account_nm')  # Drop duplicates based on 'account_nm'
result = result.set_index('account_nm')  # Set 'account_nm' back as the index
result.to_csv('삼성전자.csv', encoding='cp949')

AttributeError: 'dict' object has no attribute 'reset_index'

In [23]:
result.to_csv('삼성전자.csv', encoding='cp949', errors='ignore')
result = result.set_index('account_nm')
result
result.columns = [2023]
result


In [58]:

df.to_csv('삼성전자.csv', encoding='cp949')

In [59]:
df

Unnamed: 0,fs_nm,account_nm,sj_div,thstrm_nm,thstrm_dt,thstrm_amount,frmtrm_amount,bfefrmtrm_amount,currency
0,,자산총계,BS,제 55 기,,455905980000000,448424507000000,426621158000000,KRW
1,,유동자산,BS,제 55 기,,195936557000000,218470581000000,218163185000000,KRW
2,,미수금,BS,제 55 기,,6633248000000,6149209000000,4497257000000,KRW
3,,선급비용,BS,제 55 기,,3366130000000,2867823000000,2336252000000,KRW
4,,현금및현금성자산,BS,제 55 기,,69080893000000,49680710000000,39031415000000,KRW
...,...,...,...,...,...,...,...,...,...
171,,당기순이익(손실),SCE,제 55 기,,14473401000000,54730018000000,39243791000000,KRW
172,,관계기업 및 공동기업의 기타포괄손익에 대한 지분,SCE,제 55 기,,75112000000,-50510000000,211979000000,KRW
173,,관계기업 및 공동기업의 기타포괄손익에 대한 지분,SCE,제 55 기,,70157000000,-51848000000,225464000000,KRW
174,,관계기업 및 공동기업의 기타포괄손익에 대한 지분,SCE,제 55 기,,4955000000,1338000000,-13485000000,KRW


In [42]:
import pandas as pd

dart = Dart_Client()
corp_code = dart.get_corp_code_by_name('삼성전자')

# 단일 재무제표
bsns_years  = ['2021', '2022', '2023']
reprt_code = '11011'
fs_div = 'CFS'

result = pd.DataFrame()


indexes = []
dfs = {}
for bsns_year in bsns_years:
    df = dart.get_single_fs(corp_code=corp_code, bsns_year=bsns_year, reprt_code=reprt_code, fs_div=fs_div, simple=False)
    df.set_index('account_nm', inplace=True)
    df = df[['thstrm_amount']]
    df.columns = [bsns_year]
    indexes += list(df.index)
    dfs[bsns_year] = df

indexes = list(set(indexes))
result = {}

for key, val in dfs.items() :
    result[key] = {}
    for index in indexes : 
        try :
            result[key][index] = val.loc(index,key)
            print(val)
        except : 
            continue
pd.DataFrame(result)
        



[SYSTEM] 로컬로부터 기업코드 조회 완료


Unnamed: 0,2021,2022,2023


In [47]:
val.loc[indexes[0], key]

account_nm
당기순이익(손실)    15487100000000
당기순이익(손실)    15487100000000
당기순이익(손실)    15487100000000
당기순이익(손실)    15487100000000
당기순이익(손실)     1013699000000
당기순이익(손실)    14473401000000
당기순이익(손실)    14473401000000
Name: 2023, dtype: object

In [38]:
df.to_dict()

{'2023': {'자산총계': '455905980000000',
  '유동자산': '195936557000000',
  '미수금': '6633248000000',
  '선급비용': '3366130000000',
  '현금및현금성자산': '69080893000000',
  '단기상각후원가금융자산': '608281000000',
  '단기당기손익-공정가치금융자산': '27112000000',
  '매출채권': '36647393000000',
  '재고자산': '51625874000000',
  '매각예정분류자산': '217864000000',
  '기타유동자산': '5038838000000',
  '단기금융상품': '22690924000000',
  '비유동자산': '259969423000000',
  '이연법인세자산': '10211797000000',
  '무형자산': '22741862000000',
  '관계종속기업투자자산-지분법': '11767444000000',
  '당기손익-공정가치금융자산': '1431394000000',
  '기타포괄손익-공정가치 측정 비유동금융자산': '7481297000000',
  '순확정급여자산': '4905219000000',
  '기타비유동자산': '14174148000000',
  '유형자산': '187256262000000',
  '자본총계': '10444090000000',
  '지배기업 소유주지분': '17845661000000',
  '기타자본항목': '1280130000000',
  '자본금': '897514000000',
  '보통주자본금': '778047000000',
  '우선주자본금': '119467000000',
  '이익잉여금': '346652238000000',
  '주식발행초과금': '4403893000000',
  '비지배지분': '991750000000',
  '자본과부채총계': '455905980000000',
  '유동부채': '75719452000000',
  '선수금': '14926020

In [111]:
dart = Dart_Client()
# 기업코드
corp_code = dart.get_corp_code_by_name('삼성전자')
corp_code = '00164742'
# 기업정보
# df = dart.get_company_info(corp_code)
# df

# # 단일 재무제표
# print(corp_code)
bsns_year = '2023'
reprt_code = '11014'
fs_div = 'CFS'
# simple=False

# print(bsns_year)
# df = dart.get_single_fs(corp_code=corp_code,bsns_year=bsns_year,reprt_code=reprt_code,fs_div=fs_div,simple=False)
# df

a = ['M210000', 'M220000', 'M230000', 'M240000']

zz = pd.DataFrame()
for _ in a :
    df = dart.get_single_index(corp_code=corp_code,bsns_year=bsns_year,reprt_code=reprt_code, idx_cl_code=_)
    zz = pd.concat([zz,df], ignore_index=True)
zz




[SYSTEM] 로컬로부터 기업코드 조회 완료


Unnamed: 0,bsns_year,idx_cl_nm,idx_nm,idx_val
0,2023,수익성지표,세전계속사업이익률,
1,2023,수익성지표,순이익률,
2,2023,수익성지표,총포괄이익률,9.793
3,2023,수익성지표,매출총이익률,20.719
4,2023,수익성지표,매출원가율,79.281
...,...,...,...,...
61,2023,활동성지표,유형자산회전율,329.781
62,2023,활동성지표,타인자본회전율,70.583
63,2023,활동성지표,자기자본회전율,125.935
64,2023,활동성지표,자본금회전율,8128.383


In [122]:
zz.to_csv('zz.csv', encoding='cp949')

In [94]:

# [SYSTEM] 로컬로부터 기업코드 조회 완료
# [SYSTEM] 로컬로부터 기업코드 조회 완료

url =  "https://opendart.fss.or.kr/api/fnlttSinglIndx.json?crtfc_key=5fba2957a41266c1e12f217d04e869ede7bd3241&corp_code=00164742&bsns_year=2023&reprt_code=11014&idx_cl_code=M210000"


# https://opendart.fss.or.kr/api/fnlttSinglIndx.json?crtfc_key={dart.api_key}&corp_code=00164742&bsns_year=2023&reprt_code=11014&idx_cl_code=M210000'

# requests.get(f'https://opendart.fss.or.kr/api/fnlttSinglIndx.json?crtfc_key={dart.api_key}&corp_code=00164742&bsns_year=2023&reprt_code=11014&idx_cl_code=M210000').json()['list']
requests.get(url).json()

{'status': '000',
 'message': '정상',
 'list': [{'reprt_code': '11014',
   'bsns_year': '2023',
   'corp_code': '00164742',
   'stock_code': '005380',
   'idx_cl_code': 'M210000',
   'idx_cl_nm': '수익성지표',
   'idx_code': 'M211100',
   'idx_nm': '세전계속사업이익률'},
  {'reprt_code': '11014',
   'bsns_year': '2023',
   'corp_code': '00164742',
   'stock_code': '005380',
   'idx_cl_code': 'M210000',
   'idx_cl_nm': '수익성지표',
   'idx_code': 'M211200',
   'idx_nm': '순이익률'},
  {'reprt_code': '11014',
   'bsns_year': '2023',
   'corp_code': '00164742',
   'stock_code': '005380',
   'idx_cl_code': 'M210000',
   'idx_cl_nm': '수익성지표',
   'idx_code': 'M211250',
   'idx_nm': '총포괄이익률',
   'idx_val': '9.793'},
  {'reprt_code': '11014',
   'bsns_year': '2023',
   'corp_code': '00164742',
   'stock_code': '005380',
   'idx_cl_code': 'M210000',
   'idx_cl_nm': '수익성지표',
   'idx_code': 'M211300',
   'idx_nm': '매출총이익률',
   'idx_val': '20.719'},
  {'reprt_code': '11014',
   'bsns_year': '2023',
   'corp_code': '00164