In [14]:
import requests
import pandas as pd
from datetime import datetime, timedelta
from xml.etree import ElementTree as ET
import time

# 카카오 API 키 (반드시 입력)
KAKAO_API_KEY = "6665946ef83bd1dc889184a24cf212a2"

# 오피스텔 실거래가 API 정보
TRADE_API_URL = "https://apis.data.go.kr/1613000/RTMSDataSvcOffiTrade/getRTMSDataSvcOffiTrade"
SERVICE_KEY = "7vMdnzTpnFnBO5wPN3LkHyPgPNFu3A/w/+RH8EJw3ihZfuhA5UiMx4x/PYl1qjlCx1VAzTL+i2GJXf1c/oHfyg=="

# 최근 12개월
def get_recent_months(n=12):
    today = datetime.today()
    return [(today - timedelta(days=30*i)).strftime("%Y%m") for i in range(n)]

# 구별 LAWD_CD
GU_LAWD_MAP = {
    "관악구": "11620",
    "강서구": "11500",
    "금천구": "11545"
}

# 1️⃣ 고유 주소 수집
def get_unique_addresses(gu_name):
    lawd_cd = GU_LAWD_MAP[gu_name]
    months = get_recent_months()
    address_set = set()

    for yyyymm in months:
        params = {
            "serviceKey": SERVICE_KEY,
            "LAWD_CD": lawd_cd,
            "DEAL_YMD": yyyymm,
            "numOfRows": "1000",
            "pageNo": "1"
        }

        try:
            response = requests.get(TRADE_API_URL, params=params)
            root = ET.fromstring(response.content)

            for item in root.iter("item"):
                dong = item.findtext("umdNm", "").strip()
                jibun = item.findtext("jibun", "").strip()
                if dong and jibun:
                    address_set.add((gu_name, dong, jibun))
        except:
            continue

    return sorted(address_set)

# 2️⃣ 주소 → 전체 주소 + 좌표 변환
def query_kakao_address(full_text):
    url = "https://dapi.kakao.com/v2/local/search/address.json"
    headers = {"Authorization": f"KakaoAK {KAKAO_API_KEY}"}
    params = {"query": full_text}

    try:
        res = requests.get(url, headers=headers, params=params)
        res.raise_for_status()
        result = res.json()
        if result['documents']:
            doc = result['documents'][0]
            return doc['address']['address_name'], float(doc['y']), float(doc['x'])
    except:
        return None, None, None

# 3️⃣ 통합 실행
def build_address_coord_csv():
    all_results = []

    for gu in ["관악구", "강서구", "금천구"]:
        print(f"\n📍 [{gu}] 주소 변환 중...")
        addresses = get_unique_addresses(gu)
        print(f"  🔎 {len(addresses)}개 주소 처리 시작")

        for gu_name, dong, jibun in addresses:
            query = f"서울특별시 {gu_name} {dong} {jibun}"
            full_addr, lat, lon = query_kakao_address(query)

            if full_addr and lat and lon:
                all_results.append({
                    "전체주소": full_addr,
                    "위도": lat,
                    "경도": lon
                })
            else:
                print(f"⚠️ 변환 실패: {query}")

            time.sleep(0.2)  # 카카오 API rate limit 고려

    # CSV 저장
    df = pd.DataFrame(all_results)
    df.to_csv("df_office.csv", index=False, encoding="utf-8-sig")
    print("\n✅ CSV 저장 완료: df_office.csv")

# 실행
build_address_coord_csv()


📍 [관악구] 주소 변환 중...
  🔎 68개 주소 처리 시작

📍 [강서구] 주소 변환 중...
  🔎 162개 주소 처리 시작

📍 [금천구] 주소 변환 중...
  🔎 59개 주소 처리 시작

✅ CSV 저장 완료: df_office.csv


In [16]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
from xml.etree import ElementTree as ET

# ✅ API 설정
KAKAO_API_KEY = "6665946ef83bd1dc889184a24cf212a2"
VILLA_API_URL = "https://apis.data.go.kr/1613000/RTMSDataSvcRHTrade/getRTMSDataSvcRHTrade"
SERVICE_KEY = "7vMdnzTpnFnBO5wPN3LkHyPgPNFu3A/w/+RH8EJw3ihZfuhA5UiMx4x/PYl1qjlCx1VAzTL+i2GJXf1c/oHfyg=="

# ✅ 기간 설정
def get_recent_months(n=12):
    today = datetime.today()
    return [(today - timedelta(days=30 * i)).strftime("%Y%m") for i in range(n)]

# ✅ 구별 법정동 코드
GU_LAWD_MAP = {
    "관악구": "11620",
    "강서구": "11500",
    "금천구": "11545"
}

# ✅ 주소 수집
def get_villa_addresses(gu_name):
    lawd_cd = GU_LAWD_MAP[gu_name]
    months = get_recent_months()
    address_set = set()

    for yyyymm in months:
        params = {
            "serviceKey": SERVICE_KEY,
            "LAWD_CD": lawd_cd,
            "DEAL_YMD": yyyymm,
            "numOfRows": "1000",
            "pageNo": "1"
        }

        try:
            res = requests.get(VILLA_API_URL, params=params)
            root = ET.fromstring(res.content)

            for item in root.iter("item"):
                dong = item.findtext("umdNm", "").strip()
                jibun = item.findtext("jibun", "").strip()
                if dong and jibun:
                    address_set.add((gu_name, dong, jibun))
        except Exception as e:
            print(f"❌ 오류 ({gu_name} {yyyymm}): {e}")
            continue

    return sorted(address_set)

# ✅ Kakao 주소 변환
def query_kakao_address(full_text):
    url = "https://dapi.kakao.com/v2/local/search/address.json"
    headers = {"Authorization": f"KakaoAK {KAKAO_API_KEY}"}
    params = {"query": full_text}

    try:
        res = requests.get(url, headers=headers, params=params)
        res.raise_for_status()
        result = res.json()
        if result['documents']:
            doc = result['documents'][0]
            return doc['address']['address_name'], float(doc['y']), float(doc['x'])
    except:
        return None, None, None

# ✅ 실행
def build_villa_csv():
    all_results = []

    for gu in GU_LAWD_MAP.keys():
        print(f"\n📍 [{gu}] 주소 수집 중...")
        addresses = get_villa_addresses(gu)
        print(f"  🔍 총 {len(addresses)}개 주소 변환 시작")

        for gu_name, dong, jibun in addresses:
            query = f"서울특별시 {gu_name} {dong} {jibun}"
            full_addr, lat, lon = query_kakao_address(query)

            if full_addr and lat and lon:
                all_results.append({
                    "전체주소": full_addr,
                    "위도": lat,
                    "경도": lon
                })
            else:
                print(f"⚠️ 변환 실패: {query}")

            time.sleep(0.2)  # Kakao API rate limit 고려

    df = pd.DataFrame(all_results)
    df.to_csv("df_villa.csv", index=False, encoding="utf-8-sig")
    print("\n✅ 저장 완료: df_villa.csv")

# ▶ 실행
build_villa_csv()


📍 [관악구] 주소 수집 중...
  🔍 총 943개 주소 변환 시작

📍 [강서구] 주소 수집 중...
  🔍 총 1596개 주소 변환 시작

📍 [금천구] 주소 수집 중...
  🔍 총 521개 주소 변환 시작

✅ 저장 완료: df_villa.csv


In [18]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
from xml.etree import ElementTree as ET

# ✅ 설정
KAKAO_API_KEY = "6665946ef83bd1dc889184a24cf212a2"
APT_API_URL = "https://apis.data.go.kr/1613000/RTMSDataSvcAptTrade/getRTMSDataSvcAptTrade"
SERVICE_KEY = "7vMdnzTpnFnBO5wPN3LkHyPgPNFu3A/w/+RH8EJw3ihZfuhA5UiMx4x/PYl1qjlCx1VAzTL+i2GJXf1c/oHfyg=="

# ✅ 최근 12개월
def get_recent_months(n=12):
    today = datetime.today()
    return [(today - timedelta(days=30*i)).strftime("%Y%m") for i in range(n)]

# ✅ 구별 LAWD_CD
GU_LAWD_MAP = {
    "관악구": "11620",
    "강서구": "11500",
    "금천구": "11545"
}

# ✅ 아파트 고유 주소 수집
def get_apartment_addresses(gu_name):
    lawd_cd = GU_LAWD_MAP[gu_name]
    months = get_recent_months()
    address_set = set()

    for yyyymm in months:
        params = {
            "serviceKey": SERVICE_KEY,
            "LAWD_CD": lawd_cd,
            "DEAL_YMD": yyyymm,
            "numOfRows": "1000",
            "pageNo": "1"
        }

        try:
            res = requests.get(APT_API_URL, params=params)
            root = ET.fromstring(res.content)

            for item in root.iter("item"):
                dong = item.findtext("법정동", "").strip() or item.findtext("umdNm", "").strip()
                jibun = item.findtext("지번", "").strip() or item.findtext("jibun", "").strip()
                if dong and jibun:
                    address_set.add((gu_name, dong, jibun))
        except Exception as e:
            print(f"❌ 오류 ({gu_name} {yyyymm}):", e)
            continue

    return sorted(address_set)

# ✅ Kakao 주소 → 전체주소 + 위경도
def query_kakao_address(query_text):
    url = "https://dapi.kakao.com/v2/local/search/address.json"
    headers = {"Authorization": f"KakaoAK {KAKAO_API_KEY}"}
    params = {"query": query_text}

    try:
        res = requests.get(url, headers=headers, params=params)
        res.raise_for_status()
        result = res.json()
        if result['documents']:
            doc = result['documents'][0]
            return doc['address']['address_name'], float(doc['y']), float(doc['x'])
    except:
        return None, None, None

# ✅ 통합 실행
def build_apartment_csv():
    all_results = []

    for gu in GU_LAWD_MAP.keys():
        print(f"\n📍 [{gu}] 주소 수집 중...")
        addresses = get_apartment_addresses(gu)
        print(f"  🔍 총 {len(addresses)}개 주소 변환 시작")

        for gu_name, dong, jibun in addresses:
            query = f"서울특별시 {gu_name} {dong} {jibun}"
            full_addr, lat, lon = query_kakao_address(query)

            if full_addr and lat and lon:
                all_results.append({
                    "전체주소": full_addr,
                    "위도": lat,
                    "경도": lon
                })
            else:
                print(f"⚠️ 변환 실패: {query}")

            time.sleep(0.2)  # Kakao API rate-limit 고려

    df = pd.DataFrame(all_results)
    df.to_csv("df_apartment.csv", index=False, encoding="utf-8-sig")
    print("\n✅ 저장 완료: df_apartment.csv")

# ▶ 실행
build_apartment_csv()


📍 [관악구] 주소 수집 중...
  🔍 총 176개 주소 변환 시작

📍 [강서구] 주소 수집 중...
  🔍 총 407개 주소 변환 시작

📍 [금천구] 주소 수집 중...
  🔍 총 82개 주소 변환 시작

✅ 저장 완료: df_apartment.csv


In [3]:
import pandas as pd

# ✅ 파일 불러오기
df = pd.read_csv("df_villa.csv")  # 또는 df_office.csv, df_villa.csv

# ✅ '전체주소'에서 '서울 ' → '서울특별시 '로 교체 (정확히 시작할 때만)
df["전체주소"] = df["전체주소"].apply(
    lambda x: x.replace("서울 ", "서울특별시 ", 1) if x.startswith("서울 ") else x
)

# ✅ 덮어쓰기 또는 새 파일로 저장
df.to_csv("df_villa.csv", index=False, encoding="utf-8-sig")

print("✅ '서울' → '서울특별시' 교체 완료 및 저장됨: df_apartment_fixed.csv")

✅ '서울' → '서울특별시' 교체 완료 및 저장됨: df_apartment_fixed.csv
