In [None]:
import requests
import pandas as pd
import re
from tqdm import tqdm
KAKAO_API_KEY = "f32e9311c06e941799af2d38345741de"
HEADERS = {"Authorization": f"KakaoAK {KAKAO_API_KEY}"}

input_file = "FACT_Data.csv"
output_file = "FACT_Cord_5.csv"

df = pd.read_csv(input_file, encoding='utf-8').iloc[120001:150000]

name_col = df.iloc[:, 0]
code_col = df.iloc[:, 1]
address_col = df.iloc[:, 2] 
results = []

def clean_address(address):
    if not isinstance(address, str):
        return ""
    
    address = re.sub(r"번지", "", address)
    address = re.sub(r",.*", "", address)
    address = re.sub(r"\(.*?\)", "", address)
    address = re.sub(r"\s+", " ", address).strip()
    return address

def address_to_coordinates(address):
    url = "https://dapi.kakao.com/v2/local/search/address.json"
    params = {"query": address}
    response = requests.get(url, headers=HEADERS, params=params)
    
    if response.status_code == 200:
        data = response.json()
        if data['documents']:
            address_info = data['documents'][0]
            lat = address_info['y']
            lon = address_info['x']
            print(f"🔄 주소: {address} -> 위도: {lat}, 경도: {lon}")
            return lat, lon
        else:
            print(f"❌ 좌표를 찾을 수 없는 주소: {address}")
            return None, None
    else:
        print(f"❌ API 오류: {response.status_code} - {response.text}")
        return None, None

def get_similar_address(address):
    url = "https://dapi.kakao.com/v2/local/search/keyword.json"
    params = {"query": address}
    response = requests.get(url, headers=HEADERS, params=params)
    
    if response.status_code == 200:
        data = response.json()
        if data["documents"]:
            best_match = data["documents"][0]
            print(f"🔄 유사 주소로 대체: {address} -> {best_match['address_name']}")
            return best_match["address_name"], best_match["y"], best_match["x"]
        else:
            print(f"❌ 유사 주소를 찾을 수 없음: {address}")
            return None, None, None
    else:
        print(f"❌ API 오류: {response.status_code} - {response.text}")
        return None, None, None

#좌표 변환 및 결과 저장
for name, code, address in tqdm(zip(name_col, code_col, address_col), total=len(address_col)):
    cleaned_address = clean_address(address)
    
    # 주소가 비어있지 않은 경우만 처리
    if cleaned_address:
        # 1️⃣ 주소로 직접 좌표 변환 시도
        lat, lon = address_to_coordinates(cleaned_address)
        
        # 2️⃣ 좌표가 없으면 유사 주소로 변환 시도
        if not lat or not lon:
            similar_address, lat, lon = get_similar_address(cleaned_address)
        
        # 3️⃣ 좌표가 있으면 저장
        if lat and lon:
            results.append([name, code, lat, lon])

result_df = pd.DataFrame(results, columns=["이름", "업종코드", "위도", "경도"])
result_df.to_csv(output_file, index=False, encoding="utf-8-sig")
print(f"✅ 완료! 저장된 파일: {output_file}")