In [None]:
import requests
import time
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import os
from tqdm import tqdm
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from config import (
    url,
    ACCEPT,
    ACCETPS_LANGUAGE,
    CONNECTION,
    CONTENT_TYPE,
    COOKIE,
    ORIGIN,
    REFERER,
    REFERER_ISIF_PREFIX,
    USER_AGENT,
)

# directorys
DATA_PATH = "./data"
IMG_PATH = "./img"
VIDEO_PATH = "./video"
os.makedirs(DATA_PATH, exist_ok=True)
os.makedirs(IMG_PATH, exist_ok=True)
os.makedirs(VIDEO_PATH, exist_ok=True)

In [None]:
headers = {
    "Accept": ACCEPT,
    "Accept-Language": ACCETPS_LANGUAGE,
    "Connection": CONNECTION,
    "Content-Type": CONTENT_TYPE,
    "Cookie": COOKIE,
    "Origin": ORIGIN,
    "Referer": REFERER,
    "User-Agent": USER_AGENT,
}

# 데이터가 시작하는 1995 5월 2일부터 오늘까지의 데이터를 받아오기
# 시작일부터 끝나는 날까지 하루씩 더해준다.
# total_days 는 tqdm의 진행 상태를 표현하기 위해서 일부러 만들었다.
start_date = datetime(1995, 5, 2)
end_date = datetime.now()
delta = timedelta(days=1)
total_days = (end_date - start_date).days + 1
current_date = start_date

# 빈 데이터프레임을 만들어주고 데이터를 계속 머지해서 최종적인 데이터를 구한다.
ranked_companies_df = pd.DataFrame()
for _ in tqdm(range(total_days), desc="Fetching Data"):

    # 요청의 데이터부분
    data = {
        "bld": "dbms/MDC/EASY/ranking/MDCEASY01701",
        "trdDd": current_date.strftime("%Y%m%d"),
        "itmTpCd": "1",
        "mktId": "ALL",
    }

    # 나는 서버를 공격해서 고장내려는게 아니라
    # 교육 목적에서 파이썬을 공부하는 것이기 때문에
    # 요청의 딜레이를 주고 요청을 한다.
    time.sleep(0.1)
    response = requests.post(url, headers=headers, data=data, verify=False)

    # 받아온 데이터를 데이터 프레임에 넣고 유효한 데이터라면 가공한다.
    # 이후 전체 주식 데이터와 합쳐준다.
    df = pd.DataFrame(response.json()["OutBlock_1"])
    if "ISU_ABBRV" in df.columns:
        df = df.iloc[0:9]
        df = df.set_index("ISU_ABBRV")
        df = df[["ISU_CD_FULL", "ISU_CD", "MKT_ID"]]
        ranked_companies_df = pd.concat(
            [ranked_companies_df, df], join="outer", axis=0
        ).drop_duplicates()
    current_date += delta
ranked_companies_df.to_csv(f"./{DATA_PATH}/ranked_companies.csv")
ranked_companies_df

In [7]:
# 한국 역사상 상위 10위 안에 랭크된 기업의 정보로 상장일을 받아오는 코드
ranked_companies_df = pd.read_csv(f"./{DATA_PATH}/ranked_companies.csv")
ranked_companies_df = ranked_companies_df.set_index("ISU_ABBRV")
for ISU_ABBRV in tqdm(ranked_companies_df.index, desc="Fetching Data"):
    data = {
        "isuCd": ranked_companies_df.loc[ISU_ABBRV]["ISU_CD_FULL"],
        "isuSrtCd": ranked_companies_df.loc[ISU_ABBRV]["ISU_CD"],
        "isuTp": ranked_companies_df.loc[ISU_ABBRV]["MKT_ID"],
        "bld": "dbms/MDC/STAT/standard/MDCSTAT02103",
    }

    time.sleep(0.1)
    response = requests.post(url, headers=headers, data=data, verify=False)
    data = response.json().get("LIST_DD", None) or datetime(1995, 5, 2).strftime(
        "%Y/%m/%d"
    )
    ranked_companies_df.at[ISU_ABBRV, "LIST_DD"] = data
ranked_companies_df.to_csv(f"./{DATA_PATH}/companies_list_dd.csv")
ranked_companies_df

Unnamed: 0_level_0,ISU_CD_FULL,ISU_CD,MKT_ID
ISU_ABBRV,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
한국전력공사,KR7015760002,15760,STK
삼성전자,KR7005930003,5930,STK
포항종합제철,KR7005490008,5490,STK
대우중공업,KR7000200006,200,STK
엘지전자,KR7002610004,2610,STK
...,...,...,...
카카오뱅크,KR7323410001,323410,STK
LG에너지솔루션,KR7373220003,373220,STK
포스코퓨처엠,KR7003670007,3670,STK
에코프로비엠,KR7247540008,247540,KSQ


In [9]:
ranked_companies_df = pd.read_csv(f"./{DATA_PATH}/companies_list_dd.csv")
end_date = datetime.strptime(ranked_companies_df.loc[ISU_ABBRV]["LIST_DD"], "%Y/%m/%d")
today = datetime.now()
market_caps_df = pd.DataFrame()
for ISU_ABBRV in tqdm(ranked_companies_df.index, desc="Fetching Data"):
    current_date = today
    market_cap_df = pd.DataFrame()
    while end_date <= current_date:
        if (current_date - relativedelta(years=1)) < end_date:
            strtDd = end_date.strftime("%Y%m%d")
        else:

            strtDd = (

                current_date - relativedelta(years=1) + timedelta(days=1)

            ).strftime("%Y%m%d")
        headers = {
            "Accept": ACCEPT,
            "Accept-Language": ACCETPS_LANGUAGE,
            "Connection": CONNECTION,
            "Content-Type": CONTENT_TYPE,
            "Cookie": COOKIE,
            "Origin": ORIGIN,
            "Referer": f"{REFERER_ISIF_PREFIX}?tabIndex=0&isuCd={ranked_companies_df.loc[ISU_ABBRV]["ISU_CD_FULL"]}&isuSrtCd={ranked_companies_df.loc[ISU_ABBRV]["ISU_CD"]}&isuTp={ranked_companies_df.loc[ISU_ABBRV]["MKT_ID"]}&isuTpDtl=undefined&prodId=undefined",
            "User-Agent": USER_AGENT,
        }

        data = {
            "isuCd": ranked_companies_df.loc[ISU_ABBRV]["ISU_CD_FULL"],
            "isuSrtCd": ranked_companies_df.loc[ISU_ABBRV]["ISU_CD"],
            "isuTp": ranked_companies_df.loc[ISU_ABBRV]["MKT_ID"],
            "isuTpDtl": "undefined",
            "strtDd": strtDd,
            "endDd": current_date.strftime("%Y%m%d"),
            "bld": "dbms/MDC/STAT/standard/MDCSTAT01701",
        }
        time.sleep(0.1)
        response = requests.post(url, headers=headers, data=data, verify=False)
        if response.json()["output"] and response.status_code == 200:
            df = pd.DataFrame(response.json()["output"])
            df = df[["TRD_DD", "MKTCAP"]]
            df["TRD_DD"] = pd.to_datetime(df["TRD_DD"])
            df.set_index("TRD_DD", inplace=True)
            df["MKTCAP"] = df["MKTCAP"].str.replace(",", "").astype(float)
            df = df.rename(columns={"MKTCAP": ISU_ABBRV})
            market_cap_df = pd.concat([market_cap_df, df])
        current_date -= relativedelta(years=1)
    market_caps_df = pd.merge(
        market_caps_df, market_cap_df, how="outer", left_index=True, right_index=True
    )


market_caps_df = market_caps_df.T
market_caps_df.fillna(0, inplace=True)
market_caps_df = market_caps_df.set_index("Unnamed: 0")
market_caps_df.to_csv(f"./{DATA_PATH}/mktcp_data.csv")
market_caps_df

Fetching Data:   0%|          | 0/68 [00:04<?, ?it/s]


KeyboardInterrupt: 

In [5]:
matplotlib.rcParams["font.family"] = "Malgun Gothic"
colors = plt.cm.get_cmap("tab20", len(market_caps_df.index))(
    range(len(market_caps_df.index))
)
for index in tqdm(range(7507, len(market_caps_df.columns))):
    if index % 3 == 0:
        plt.figure(figsize=(18, 12))
        for color, title in zip(colors, market_caps_df.index):
            x = market_caps_df.loc[title].index[:index]
            y = market_caps_df.loc[title].values[:index]
            if y[-1] < 1:
                continue
            plt.plot(x, y, color=color)
            plt.scatter(x[-1], y[-1], color=color)
            plt.text(x[-1], y[-1], title, ha="left", va="center", color=color)
        plt.xticks([])
        plt.yticks([])
        plt.title(x[-1][:7], fontsize=32, weight="bold")
        plt.savefig(f"./img/{index}.png", dpi=300)

  colors = plt.cm.get_cmap("tab20", len(market_caps_df.index))(
0it [00:00, ?it/s]
