In [None]:
# http://openapi.seoul.go.kr:8088/(인증키)/xml/tbLnOpendataRtmsV/1/5/2024/11500/강서구/10300/1/대지/0151/0039/극동빌라/20240903/연립다세대


# 2024/11680/강남구
# 
# /10300/1/대지/0151/0039/극동빌라/20240903/연립다세대


In [None]:
import os
import requests
import time
import pandas as pd
from dotenv import load_dotenv
from IPython.display import display

load_dotenv('../.env')
API_KEY = os.getenv('PUBLIC_API_KEY')

BASE_URL = f'http://openapi.seoul.go.kr:8088/{API_KEY}/json/tbLnOpendataRtmsV'

years = [2023, 2024, 2025]
districts = {
    '서초구': '11650',
    '강남구': '11680',
    '송파구': '11710'
}

rows_all = []
step = 1000

for year in years:
    for gu_nm, gu_cd in districts.items():
        start = 1

        while True:
            end = start + step - 1
            url = f'{BASE_URL}/{start}/{end}/{year}/{gu_cd}'

            res = requests.get(url)
            if res.status_code != 200:
                print('요청 실패:', year, gu_nm)
                break

            data = res.json().get('tbLnOpendataRtmsV', {})
            rows = data.get('row', [])

            if not rows:
                break

            rows_all.extend(rows)
            print(f'{year} {gu_nm} {start}~{end} 수집')

            start += step
            time.sleep(2)

df = pd.DataFrame(rows_all)

if df.empty:
    print("데이터가 없습니다.")
else:
    df = df[df['BLDG_USG'] == '아파트']

    df = df[
        [
            'RCPT_YR',        # 접수연도
            'CGG_CD',         # 자치구 코드
            'CGG_NM',         # 자치구명
            'STDG_CD',        # 법정동 코드
            'STDG_NM',        # 법정동 명
            'BLDG_NM',        # 건물명
            'CTRT_DAY',       # 계약일
            'THING_AMT',	  # 물건금액(만원)
            'ARCH_AREA',	  # 건물면적(㎡)
            'LAND_AREA',	  # 토지면적(㎡)
            'FLR',	          # 층
            'ARCH_YR',        # 건축년도
            'BLDG_USG',       # 건물용도
        ]
    ]

    df.to_csv(
        '../data/seoul_apartment_2023_2025_gangnam.csv',
        index=False,
        encoding='utf-8-sig'
    )

    print(df.head())

In [14]:
import pandas as pd

df = pd.read_csv('../data/seoul_apartment_2023_2025_gangnam.csv', encoding='utf-8-sig')

# 컬럼 정보
display(df.info())

# 상위 5개 행 확인
display(df.head())

# 결측치 확인
# display(df.isna().sum()) # 전체 확인

# 요약 통계
display(df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29761 entries, 0 to 29760
Data columns (total 13 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   RCPT_YR    29761 non-null  int64  
 1   CGG_CD     29761 non-null  int64  
 2   CGG_NM     29761 non-null  object 
 3   STDG_CD    29761 non-null  int64  
 4   STDG_NM    29761 non-null  object 
 5   BLDG_NM    29761 non-null  object 
 6   CTRT_DAY   29761 non-null  int64  
 7   THING_AMT  29761 non-null  int64  
 8   ARCH_AREA  29761 non-null  float64
 9   LAND_AREA  29761 non-null  float64
 10  FLR        29761 non-null  float64
 11  ARCH_YR    29761 non-null  int64  
 12  BLDG_USG   29761 non-null  object 
dtypes: float64(3), int64(6), object(4)
memory usage: 3.0+ MB


None

Unnamed: 0,RCPT_YR,CGG_CD,CGG_NM,STDG_CD,STDG_NM,BLDG_NM,CTRT_DAY,THING_AMT,ARCH_AREA,LAND_AREA,FLR,ARCH_YR,BLDG_USG
0,2023,11650,서초구,10800,서초동,서초1차e-편한세상,20231228,220000,130.53,0.0,4.0,2004,아파트
1,2023,11650,서초구,10700,반포동,반포파크빌,20231227,175000,110.79,0.0,4.0,2002,아파트
2,2023,11650,서초구,10300,우면동,서초힐스,20231227,123000,74.97,0.0,8.0,2012,아파트
3,2023,11650,서초구,10100,방배동,SK리더스뷰(파스텔시티),20231226,140000,84.95,0.0,13.0,2006,아파트
4,2023,11650,서초구,10100,방배동,롯데캐슬포레스트,20231222,450000,239.33,0.0,1.0,2003,아파트


Unnamed: 0,RCPT_YR,CGG_CD,STDG_CD,CTRT_DAY,THING_AMT,ARCH_AREA,LAND_AREA,FLR,ARCH_YR
count,29761.0,29761.0,29761.0,29761.0,29761.0,29761.0,29761.0,29761.0,29761.0
mean,2024.197708,11685.022009,10689.677766,20242430.0,222999.2,86.647031,0.0,10.456033,1973.104432
std,0.781349,23.863277,454.131608,7870.83,129349.6,34.507907,0.0,7.372261,237.823472
min,2023.0,11650.0,10100.0,20210710.0,11000.0,12.1,0.0,-1.0,0.0
25%,2024.0,11680.0,10300.0,20240200.0,135000.0,59.968,0.0,5.0,1991.0
50%,2024.0,11680.0,10700.0,20240900.0,200000.0,84.81,0.0,9.0,2004.0
75%,2025.0,11710.0,10900.0,20250320.0,279000.0,99.991,0.0,14.0,2012.0
max,2025.0,11710.0,11800.0,20251230.0,1900000.0,301.47,0.0,68.0,2025.0
