In [None]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import numpy as np

# .env 파일에서 KAKAO_API_KEY를 로드
load_dotenv()
KAKAO_API_KEY = os.getenv('KAKAO_API_KEY')

# 독립문: 37.575020, 126.957221
# 성수JC: 37.542640, 127.029530

# 서울 중구의 대략적인 경계 좌표 (minX, minY, maxX, maxY)
# min_x, min_y = 126.973, 37.550
# max_x, max_y = 127.020, 37.574

# min_x, min_y = 126.957221, 37.542640
# max_x, max_y = 127.029530, 37.575020

min_x, min_y = 126.734086, 37.413294
max_x, max_y = 127.269311, 37.715133

# 50000개의 작은 영역으로 분할
# num_divisions = int(np.sqrt(10))
num_divisions = int(np.sqrt(50000))
x_points = np.linspace(min_x, max_x, num_divisions)
y_points = np.linspace(min_y, max_y, num_divisions)

def search_places(rect):
    url = "https://dapi.kakao.com/v2/local/search/keyword.json"
    headers = {"Authorization": f"KakaoAK {KAKAO_API_KEY}"}
    params = {
        "query": "서울",
        "rect": rect,
        "size": 15  # 한 페이지에 가져올 결과 수 (최대 15)
    }

    places = []
    page = 1
    max_pages = 3  # 최대 페이지 수

    while page <= max_pages:
        params["page"] = page
        response = requests.get(url, headers=headers, params=params)
        data = response.json()

        if "documents" not in data or not data["documents"]:
            break

        places.extend(data["documents"])

        if data["meta"]["is_end"]:
            break

        page += 1

    return places

def save_to_csv(places, filename):
    df = pd.DataFrame(places)
    df.to_csv(filename, index=False, encoding='utf-8-sig')

all_places = []
for i in range(len(x_points) - 1):
    for j in range(len(y_points) - 1):
        rect = f"{x_points[i]},{y_points[j]},{x_points[i+1]},{y_points[j+1]}"
        places = search_places(rect)
        all_places.extend(places)

save_to_csv(all_places, "before/places.csv")

In [9]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import numpy as np

df = pd.read_csv('before/places.csv')
df = df.drop_duplicates(subset=['id'])
df = df.drop(['distance','category_group_code','id'],axis=1)
df = df.dropna()
len(df)

109280

In [10]:
# '관광명소' '문화시설' 
# '숙박'
# '음식점' '카페'

df_touristattraction= df[(df['category_group_name']=='관광명소') | (df['category_group_name']=='문화시설')]
df_accommodation = df[df['category_group_name']=='숙박']
df_restaurant = df[(df['category_group_name']=='음식점') | (df['category_group_name']=='카페')]

In [11]:
df_restaurant

Unnamed: 0,address_name,category_group_name,category_name,phone,place_name,place_url,road_address_name,x,y
143,서울 강서구 개화동 388-1,음식점,음식점 > 분식,010-9598-9919,개화역분식집,http://place.map.kakao.com/782545697,서울 강서구 개화동로8길 38,126.797770,37.578544
166,서울 강서구 개화동 376-57,음식점,"음식점 > 한식 > 육류,고기",02-2664-6163,제일한우촌,http://place.map.kakao.com/10803779,서울 강서구 개화길 4,126.798698,37.583527
170,서울 강서구 개화동 376-58,음식점,"음식점 > 한식 > 찌개,전골",02-2663-3220,명동찌개마을 개화점,http://place.map.kakao.com/441183742,서울 강서구 개화길 2,126.798737,37.583341
194,서울 강서구 공항동 1373,음식점,음식점 > 푸드코트,02-2665-9881,플레이보6 익스프레스(서편) 김포국제공항국내선,http://place.map.kakao.com/1073034191,서울 강서구 하늘길 112,126.801201,37.559943
216,서울 강서구 방화동 886,카페,음식점 > 카페 > 커피전문점 > 엔제리너스,02-2662-1005,엔제리너스 김포공항역사점,http://place.map.kakao.com/1323178106,서울 강서구 하늘길 지하 77,126.801317,37.562586
...,...,...,...,...,...,...,...,...,...
384578,서울 강동구 강일동 695,카페,음식점 > 카페,070-4459-5459,믿음소망사랑,http://place.map.kakao.com/1857991054,서울 강동구 고덕로97길 20,127.179989,37.559202
384579,서울 강동구 강일동 695,음식점,"음식점 > 간식 > 제과,베이커리 > 뚜레쥬르",02-426-4045,뚜레쥬르 리버파크10단지점,http://place.map.kakao.com/19730783,서울 강동구 고덕로97길 20,127.179932,37.559216
384584,서울 강동구 강일동 699,카페,음식점 > 카페 > 테마카페 > 무인카페 > 데이롱카페,0507-1369-5213,데이롱카페 강일역점,http://place.map.kakao.com/326863310,서울 강동구 고덕로97길 29,127.179339,37.559557
384587,서울 강동구 강일동 699,음식점,음식점 > 한식,02-441-7745,경성밥상 강동미사점,http://place.map.kakao.com/1476481151,서울 강동구 고덕로97길 29,127.179299,37.559617


In [12]:
# 불러오기
import pandas as pd
import numpy as np

df_restaurant.loc[df['category_group_name'] == '카페', 'category_group_name'] = '음식점'
df_touristattraction.loc[df['category_group_name'] == '음식점', 'category_group_name'] = '카페'
# columns 의미 -> row 전이


def df_preprocessing(df):
    df.loc[:, 'place_name'] = "장소이름:" + df['place_name']
    df.loc[:, 'phone'] = "전화번호:" + df['phone']
    df.loc[:, 'x'] = "경도:" + df['x'].astype(str)
    df.loc[:, 'y'] = "위도:" + df['y'].astype(str)
    df.loc[:, 'address_name'] = "지번:" + df['address_name']
    df.loc[:, 'road_address_name'] = "도로명주소:" + df['road_address_name']
    df.loc[:, 'place_url'] = "웹링크:" + df['place_url']
    return df

def df_preprocessing2(df):
    df = df[['place_name','category_group_name','category_name','x','y','road_address_name','place_url']]
    return df

# df_restaurant = df_preprocessing(df_restaurant)
# df_accommodation = df_preprocessing(df_accommodation)
# df_touristattraction = df_preprocessing(df_touristattraction)

df_restaurant = df_preprocessing2(df_restaurant)
df_accommodation = df_preprocessing2(df_accommodation)
df_touristattraction = df_preprocessing2(df_touristattraction)

df_restaurant.to_csv('after/식당.csv', index=False, encoding='utf-8')
df_accommodation.to_csv('after/숙소.csv', index=False, encoding='utf-8')
df_touristattraction.to_csv('after/관광명소.csv', index=False, encoding='utf-8')