In [1]:
import pandas as pd
import requests
import os
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

path = '.'

In [2]:
baseURL = 'apis.data.go.kr/1160100/service'
getApis = {
   'GetSBProfileInfoService' : ['getOtlInfo']#, 'getCsdoStatus'],
   # 'GetSBBankingInfoService' : ['getGrnBalInfo', 'getDpstLoanInfo'],
   # 'GetSBFinanceInfoService' : ['getFnafInfo', 'getSlsInfo', 'getDbtInfo']
}

In [None]:
# baseURL = 'apis.data.go.kr/1160100/service/GetSBProfileInfoService' /

In [None]:
def getApiData(apis: dict | list, baseURL: str = baseURL, scope: int = 100, path: str = path):
    try:
        # 환경 변수에서 키를 가져오기
        key = os.getenv('KEY_DECODE')  # Get your API key
        if not key:
            raise KeyError("API key not found in environment variables.")
        
        if isinstance(apis, dict):
            base = list(apis.keys())  # Base URLs
            api = list(apis.values())  # API endpoints
        else:
            base = [baseURL.split('/')[-1]]
            api = apis
            baseURL = baseURL.replace(f'/{base[0]}', '')  # Adjust base URL for list input

        # Setup a session with retries and increased connection pool
        session = requests.Session()
        
        # Configure retries for the session
        retries = Retry(total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
        session.mount('http://', HTTPAdapter(max_retries=retries))

        for base_item, api_list in zip(base, api):
            for api_item in api_list:
                for page in range(scope):
                    params = {
                        "serviceKey": key,
                        "pageNo": page + 1,
                        "numOfRows": 10000,  # or your desired value
                        "resultType": "json"
                    }

                    # Make the request using the session
                    url = f"http://{baseURL}/{base_item}/{api_item}"
                    response = session.get(url, params=params)
                    response.raise_for_status()  # Raise an error for bad status codes

                    try:
                        # Process the response (parse JSON)
                        response_data = response.json().get('response', {}).get('body', {}).get('items', {}).get('item', [])
                    except ValueError as e:
                        raise ValueError(f"Invalid JSON response from {url}: {e}")

                    if response_data:
                        try:
                            df = pd.DataFrame(response_data)
                        except ValueError as e:
                            raise ValueError(f"Error converting response data to DataFrame: {e}")
                        
                        save_path = f'{path}/dataset/{base_item}/{api_item}'
                        try:
                            os.makedirs(save_path, exist_ok=True)
                        except OSError as e:
                            raise OSError(f"Failed to create directory {save_path}: {e}")
                        
                        file_name = f'{save_path}/{api_item}_{str(page).zfill(3)}.csv'
                        try:
                            df.to_csv(file_name, index=False)
                        except PermissionError as e:
                            raise PermissionError(f"Permission denied while saving file {file_name}: {e}")
                    
    except requests.exceptions.RequestException as e:
        raise Exception(f"HTTP request failed: {e}")
    except KeyError as e:
        raise Exception(f"Missing environment variable: {e}")
    except OSError as e:
        raise Exception(f"File system error: {e}")
    except ValueError as e:
        raise Exception(f"Data processing error: {e}")
    except Exception as e:
        raise Exception(f"An unexpected error occurred: {e}")

In [5]:
path

'.'

In [6]:
getApiData(getApis, scope=112)

In [7]:
path_d = []
for key in getApis:
    values = getApis.get(key)
    for value in values:
        path_d.append(f'{key}/{value}')

In [8]:
path_d

['GetSBProfileInfoService/getOtlInfo']

In [None]:
for path in path_d:
    # DataFrame을 모을 리스트
    all_data = []
    path = f'./dataset/{path}'
    
    # 디렉토리 내 CSV 파일들 목록 가져오기
    try:
        file_list = sorted([f for f in os.listdir(path) if f.endswith('.csv')])
    except Exception as e:
        raise Exception(f"디렉토리 {path}에 문제가 발생했습니다: {e}")
    
    # 각 파일에 대해 반복
    for fileNo, filename in enumerate(file_list):
        try:
            # 파일 경로 설정
            file_path = os.path.join(path, filename)
            tmp = pd.read_csv(file_path)
            all_data.append(tmp)  # 파일을 리스트에 추가
        except Exception as e:
            raise Exception(f"파일 {filename}을 읽는 도중 오류 발생: {e}")

    # 모든 DataFrame을 하나로 합침
    if all_data:
        try:
            df = pd.concat(all_data, ignore_index=True)
            df.to_csv(f"{path}/all.csv", index=False)
        except Exception as e:
            raise Exception(f"모든 데이터를 합치는 도중 오류 발생: {e}")
    else:
        raise Exception(f"디렉토리 {path}에는 유효한 CSV 파일이 없습니다.")