In [1]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import numpy as np

# .env 파일에서 KAKAO_API_KEY를 로드
load_dotenv()
KAKAO_API_KEY = os.getenv('KAKAO_API_KEY')

# 독립문: 37.575020, 126.957221
# 성수JC: 37.542640, 127.029530

# 서울 중구의 대략적인 경계 좌표 (minX, minY, maxX, maxY)
# min_x, min_y = 126.973, 37.550
# max_x, max_y = 127.020, 37.574

min_x, min_y = 126.957221, 37.542640
max_x, max_y = 127.029530, 37.575020

# 5000개의 작은 영역으로 분할
num_divisions = int(np.sqrt(50000))
x_points = np.linspace(min_x, max_x, num_divisions)
y_points = np.linspace(min_y, max_y, num_divisions)

def search_places(rect):
    url = "https://dapi.kakao.com/v2/local/search/keyword.json"
    headers = {"Authorization": f"KakaoAK {KAKAO_API_KEY}"}
    params = {
        "query": "중구",
        "rect": rect,
        "size": 15  # 한 페이지에 가져올 결과 수 (최대 15)
    }

    places = []
    page = 1
    max_pages = 3  # 최대 페이지 수

    while page <= max_pages:
        params["page"] = page
        response = requests.get(url, headers=headers, params=params)
        data = response.json()

        if "documents" not in data or not data["documents"]:
            break

        places.extend(data["documents"])

        if data["meta"]["is_end"]:
            break

        page += 1

    return places

def save_to_csv(places, filename):
    df = pd.DataFrame(places)
    df.to_csv(filename, index=False, encoding='utf-8-sig')

all_places = []
for i in range(len(x_points) - 1):
    for j in range(len(y_points) - 1):
        rect = f"{x_points[i]},{y_points[j]},{x_points[i+1]},{y_points[j+1]}"
        places = search_places(rect)
        all_places.extend(places)

save_to_csv(all_places, "before/places.csv")

In [20]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import numpy as np

df = pd.read_csv('before/places.csv')
df = df.drop_duplicates(subset=['id'])
df = df.drop(['distance','category_group_code','id'],axis=1)
df = df.dropna()
len(df)

7094

In [21]:
# '관광명소' '문화시설' 
# '숙박'
# '음식점' '카페'

df_touristattraction= df[(df['category_group_name']=='관광명소') | (df['category_group_name']=='문화시설')]
df_accommodation = df[df['category_group_name']=='숙박']
df_restaurant = df[(df['category_group_name']=='음식점') | (df['category_group_name']=='카페')]

In [22]:
df_restaurant

Unnamed: 0,address_name,category_group_name,category_name,phone,place_name,place_url,road_address_name,x,y
21,서울 중구 중림동 401-4,음식점,음식점 > 한식,010-2769-0195,오공일,http://place.map.kakao.com/550581873,서울 중구 손기정로1길 38,126.962056,37.558846
23,서울 중구 중림동 400-6,음식점,"음식점 > 한식 > 육류,고기 > 곱창,막창",02-913-1641,마포대도소곱창,http://place.map.kakao.com/27119857,서울 중구 손기정로1길 42,126.962266,37.558900
24,서울 중구 중림동 400-7,음식점,"음식점 > 한식 > 해물,생선",02-363-0990,e문어세상 충정로직영점,http://place.map.kakao.com/166104597,서울 중구 손기정로1길 46,126.962434,37.558900
34,서울 중구 만리동2가 273,음식점,음식점 > 간식 > 아이스크림 > 배스킨라빈스,02-364-3331,배스킨라빈스 만리센트럴점,http://place.map.kakao.com/960690898,서울 중구 만리재로 175,126.962507,37.551916
47,서울 중구 중림동 400-15,음식점,"음식점 > 한식 > 육류,고기",02-365-7792,한우정육마당 충정로점,http://place.map.kakao.com/1536966308,서울 중구 손기정로1길 50,126.962675,37.558948
...,...,...,...,...,...,...,...,...,...
44208,서울 중구 신당동 170-29,음식점,"음식점 > 한식 > 육류,고기 > 곱창,막창",02-2232-5652,우왕소곱창,http://place.map.kakao.com/27347015,서울 중구 왕십리로 413,127.024213,37.564919
44210,서울 중구 신당동 170-30,음식점,음식점 > 한식 > 국밥,02-3298-6659,정보인부산돼지국밥 신당점,http://place.map.kakao.com/504454249,서울 중구 왕십리로 415,127.024030,37.564923
44212,서울 중구 신당동 170-29,음식점,음식점 > 치킨 > BHC치킨,02-2253-1092,BHC치킨 왕십리점,http://place.map.kakao.com/26628361,서울 중구 왕십리로 413,127.024233,37.564946
44234,서울 중구 신당동 855,음식점,"음식점 > 일식 > 초밥,롤",02-2234-7730,상무초밥 왕십리점,http://place.map.kakao.com/1354722386,서울 중구 왕십리로 407,127.024916,37.564846


In [23]:
# 불러오기
import pandas as pd
import numpy as np

df_restaurant.loc[df['category_group_name'] == '카페', 'category_group_name'] = '음식점'
df_touristattraction.loc[df['category_group_name'] == '음식점', 'category_group_name'] = '카페'
# columns 의미 -> row 전이


def df_preprocessing(df):
    df.loc[:, 'place_name'] = "장소이름:" + df['place_name']
    df.loc[:, 'phone'] = "전화번호:" + df['phone']
    df.loc[:, 'x'] = "경도:" + df['x'].astype(str)
    df.loc[:, 'y'] = "위도:" + df['y'].astype(str)
    df.loc[:, 'address_name'] = "지번:" + df['address_name']
    df.loc[:, 'road_address_name'] = "도로명주소:" + df['road_address_name']
    df.loc[:, 'place_url'] = "웹링크:" + df['place_url']
    return df

def df_preprocessing2(df):
    df = df[['place_name','category_group_name','category_name','x','y','road_address_name','place_url']]
    return df

df_restaurant = df_preprocessing(df_restaurant)
df_accommodation = df_preprocessing(df_accommodation)
df_touristattraction = df_preprocessing(df_touristattraction)

df_restaurant = df_preprocessing2(df_restaurant)
df_accommodation = df_preprocessing2(df_accommodation)
df_touristattraction = df_preprocessing2(df_touristattraction)

df_restaurant.to_csv('after/restaurant_data.csv', index=False, encoding='utf-8')
df_accommodation.to_csv('after/accommodation.csv', index=False, encoding='utf-8')
df_touristattraction.to_csv('after/touristattraction.csv', index=False, encoding='utf-8')