In [1]:
!pip install boto3
!pip install --upgrade urllib3

Collecting boto3
  Downloading boto3-1.28.63-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.8/135.8 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting botocore<1.32.0,>=1.31.63 (from boto3)
  Downloading botocore-1.31.63-py3-none-any.whl (11.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m38.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jmespath<2.0.0,>=0.7.1 (from boto3)
  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Collecting s3transfer<0.8.0,>=0.7.0 (from boto3)
  Downloading s3transfer-0.7.0-py3-none-any.whl (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jmespath, botocore, s3transfer, boto3
Successfully installed boto3-1.28.63 botocore-1.31.63 jmespath-1.0.1 s3transfer-0.7.0


# Kakao Map API with Python ⚡

In [2]:
import requests
import pandas as pd
import numpy as np
import folium
from folium.plugins import MiniMap

In [3]:
import os
os.environ["REST_API_KEY"] = ""
REST_API_KEY = os.environ["REST_API_KEY"]

## 1. Keyword로 검색하기
- 카카오 맵 API는 검색 할 때, 최대 45개의 결과 값만 알려줌
- 지도앱에서의 장소 검색과 동일

In [11]:
import pprint

def get_location_with_keyword(region, page_num = 1):
  url = 'https://dapi.kakao.com/v2/local/search/keyword.json'
  params = {'query': region, 'page': page_num}
  headers = {'Authorization':f'KakaoAK {REST_API_KEY}'}

  response = requests.get(url, params=params, headers=headers).json()
  places = response['documents']
  total = response['meta']['total_count']
  pprint.pprint(places)


In [12]:
get_location_with_keyword("강남역 햄버거", 1)

[{'address_name': '서울 서초구 서초동 1305-5',
  'category_group_code': 'FD6',
  'category_group_name': '음식점',
  'category_name': '음식점 > 양식 > 햄버거',
  'distance': '',
  'id': '1725176424',
  'phone': '070-8834-7900',
  'place_name': '파이브가이즈 강남',
  'place_url': 'http://place.map.kakao.com/1725176424',
  'road_address_name': '서울 서초구 강남대로 435',
  'x': '127.02568305264',
  'y': '37.5011674033572'},
 {'address_name': '서울 강남구 역삼동 822-2',
  'category_group_code': 'FD6',
  'category_group_name': '음식점',
  'category_name': '음식점 > 패스트푸드 > 맥도날드',
  'distance': '',
  'id': '22318989',
  'phone': '02-6205-6400',
  'place_name': '맥도날드 강남2호점',
  'place_url': 'http://place.map.kakao.com/22318989',
  'road_address_name': '서울 강남구 테헤란로 107',
  'x': '127.02873329209403',
  'y': '37.49864658673187'},
 {'address_name': '서울 서초구 서초동 1329-8',
  'category_group_code': 'FD6',
  'category_group_name': '음식점',
  'category_name': '음식점 > 양식 > 햄버거',
  'distance': '',
  'id': '772414643',
  'phone': '070-7038-3501',
  'place_nam

## 2. 위치 (위도 + 경도) 기반으로 반경 넓혀서 검색하기

In [22]:
from collections import defaultdict

def get_location(keyword, start_x, start_y, end_x, end_y):

  page_num = 1
  result = [] # [{'서울역': [{...}]}, {'역삼역': [{docs...}]}]

  while(1):
    url = 'https://dapi.kakao.com/v2/local/search/keyword.json'
    params = {'query': keyword,
              'page': page_num,
              'rect':f'{start_x},{start_y},{end_x},{end_y}',
              'category_group_code' : 'FD6'}
    headers = {'Authorization':'KakaoAK YOUR_API_KEY'}

    response = requests.get(url, params=params, headers=headers)
    total_cnt = response.json()['meta']['total_count']
    # print("total count:", total_cnt)

    if total_cnt > 45:
      # 좌표 기준으로 4등분 하기
      mid_x = (start_x + end_x) / 2
      mid_y = (start_y + end_y) / 2
      # left down
      result.extend(get_location(keyword, start_x, start_y, mid_x, mid_y))
      # right down
      result.extend(get_location(keyword, mid_x, start_y, end_x, mid_y))
      # left up
      result.extend(get_location(keyword, start_x, mid_y, mid_x, end_y))
      # right up
      result.extend(get_location(keyword, mid_x, mid_y, end_x, end_y))
      return result
    else:
      if response.json()['meta']['is_end']:
        result.extend(response.json()['documents'])
        return result
      else:
        page_num += 1
        # print('page_num:', page_num)
        result.extend(response.json()['documents'])

In [5]:
import pandas as pd
station_df = pd.read_csv('station_latlen.csv')
station_df

Unnamed: 0,lat,lon,name,no_line
0,37.580059,127.047740,청량리역,1
1,37.578317,127.038799,제기동역,1
2,37.576106,127.024534,신설동역,1
3,37.573604,127.017151,동묘앞역,1
4,37.571762,127.011253,동대문역,1
...,...,...,...,...
249,37.499788,127.112267,송파역,8
250,37.493057,127.118076,가락시장역,8
251,37.487790,127.121335,문정역,8
252,37.477772,127.126530,장지역,8


In [24]:
def get_data(dataframe):
  results = []
  keyword = '음식점'
  next_x, next_y = 0.01, 0.01
  for index, row in station_df.iterrows():
    station, start_x, start_y = row['name'], row['lon'], row['lat']
    print(station)
    result = get_location(keyword, start_x, start_y, start_x + next_x, start_y + next_y)
    results.append({station : result})
  return results

In [None]:
import collections

def make_dataframe(results):
  results = list(map(dict, collections.OrderedDict.fromkeys(tuple(sorted(d.items())) for d in results)))
  X = []
  Y = []
  stores = []
  road_address = []
  place_url = []
  ID = []

  for place in results:
    X.append(float(place['x']))
    Y.append(float(place['y']))
    stores.append(place['place_name'])
    road_address.append(place['road_address_name'])
    place_url.append(place['place_url'])
    ID.append(place['id'])

  arr = np.array([ID, stores, X, Y, road_address, place_url]).T
  df = pd.DataFrame(arr, columns = ['ID','stores', 'X', 'Y','road_address','place_url'])
  return df

1162


In [26]:
results = get_data(station_df)

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
total count: 39
page_num: 3
total count: 39
total count: 52
total count: 12
total count: 17
page_num: 2
total count: 17
total count: 10
total count: 13
군자역 
total count: 430
total count: 93
total count: 31
page_num: 2
total count: 31
page_num: 3
total count: 31
total count: 5
total count: 19
page_num: 2
total count: 19
total count: 38
page_num: 2
total count: 38
page_num: 3
total count: 38
total count: 127
total count: 28
page_num: 2
total count: 28
total count: 62
total count: 22
page_num: 2
total count: 22
total count: 13
total count: 21
page_num: 2
total count: 21
total count: 7
total count: 35
page_num: 2
total count: 35
page_num: 3
total count: 35
total count: 2
total count: 183
total count: 77
total count: 18
page_num: 2
total count: 18
total count: 20
page_num: 2
total count: 20
total count: 26
page_num: 2
total count: 26
total count: 13
total count: 26
page_num: 2
total count: 26
total count: 60
total count: 26
page_num: 2
total

In [54]:
import collections
# results = [{'서울역': [{식당이름: 식당1, x:..., y:... , ... }. {식당이름 : 식당2}]}]
def make_dataframe_from_results(results):
  stations = []
  X = []
  Y = []
  stores = []
  road_address = []
  place_url = []
  ID = []
  category = []

  for i in range(len(results)):
    station = list(results[i].keys())[0]
    docs = list(results[i].values())[0]

    for place in docs:
      stations.append(station)
      X.append(float(place['x']))
      Y.append(float(place['y']))
      stores.append(place['place_name'])
      category.append(place['category_name'])
      road_address.append(place['road_address_name'])
      place_url.append(place['place_url'])
      ID.append(place['id'])


  arr = np.array([ID, stations, stores, category, X, Y, road_address, place_url]).T
  df = pd.DataFrame(arr, columns = ['ID','station', 'store_name', 'category','X', 'Y','road_address','place_url'])
  return df

In [55]:
df = make_dataframe_from_results(results)
df = df.drop_duplicates().reset_index(drop=True)
df

Unnamed: 0,ID,station,store_name,category,X,Y,road_address,place_url
0,214884174,청량리역,매란방 청량리역점,음식점 > 중식 > 중국요리,127.0481397953076,37.58065649866244,서울 동대문구 왕산로 214,http://place.map.kakao.com/214884174
1,945426786,청량리역,홍수계찜닭 청량리역사점,"음식점 > 한식 > 육류,고기 > 닭요리",127.048327694311,37.5806068672322,서울 동대문구 왕산로 214,http://place.map.kakao.com/945426786
2,17135871,청량리역,롯데리아 청량리역사점,음식점 > 패스트푸드 > 롯데리아,127.04832206169196,37.58065011723611,서울 동대문구 왕산로 214,http://place.map.kakao.com/17135871
3,392263498,청량리역,사보텐 청량리역사점,"음식점 > 일식 > 돈까스,우동 > 사보텐",127.04795866389627,37.58066738431477,서울 동대문구 왕산로 214,http://place.map.kakao.com/392263498
4,1573242366,청량리역,스시덴고쿠 청량리점,"음식점 > 일식 > 초밥,롤",127.048178182122,37.5804934031387,서울 동대문구 왕산로 214,http://place.map.kakao.com/1573242366
...,...,...,...,...,...,...,...,...
83529,15221566,복정역,장충동왕족발보쌈365 장지동점,"음식점 > 한식 > 육류,고기 > 족발,보쌈",127.127341317205,37.4774519241136,서울 송파구 충민로2길 26,http://place.map.kakao.com/15221566
83530,15598282,복정역,승일운수 구내식당,음식점 > 구내식당,127.135997444494,37.480275049426,서울 송파구 충민로6길 61-6,http://place.map.kakao.com/15598282
83531,15625514,복정역,떡마루,"음식점 > 간식 > 떡,한과",127.12736840155632,37.477423062633164,서울 송파구 충민로2길 26,http://place.map.kakao.com/15625514
83532,1079346621,복정역,아이스크림살래 파인점,음식점 > 간식 > 아이스크림,127.12752980485,37.4766011682789,서울 송파구 충민로2길 36,http://place.map.kakao.com/1079346621


In [57]:
df.to_csv("231015_seoul_restaurants_category.csv")

## Ad-Hoc Analytics
- 서울 254개 지하철 역 위치를 중심으로 식당 데이터 83534 건 수집
- 3번째 분류를 우선적으로 고려, 없을 시 2번째 분류 사용

In [60]:
category = df['category'].to_list()

In [61]:
parsed = []
for item in category:
    splitted = item.split(">")
    parsed.append(splitted)

In [62]:
depth_list = defaultdict(int)
for item in parsed:
    n = len(item)
    depth_list[f'{n}차 분류'] += 1
depth_list

defaultdict(int,
            {'3차 분류': 42379,
             '4차 분류': 16360,
             '2차 분류': 23316,
             '5차 분류': 973,
             '1차 분류': 506})

In [67]:
# use 3rd-depth first and then 2rd-depth
food_category = []
for item in category:
  splitted = item.split(">")
  if len(splitted) >= 3: # more than 3rd depth
    food_category.append(splitted[-1].strip())
  elif len(splitted) == 2:
    food_category.append(splitted[-1].strip())
len(food_category)

83028

In [68]:
from collections import Counter

food_category_cnt = Counter(food_category)

In [69]:
food_category_cnt.most_common(32)

[('한식', 9103),
 ('호프,요리주점', 5529),
 ('육류,고기', 4960),
 ('분식', 3476),
 ('제과,베이커리', 2998),
 ('중국요리', 2356),
 ('양식', 2205),
 ('해물,생선', 2115),
 ('술집', 2022),
 ('치킨', 1709),
 ('곱창,막창', 1512),
 ('닭요리', 1451),
 ('실내포장마차', 1450),
 ('갈비', 1346),
 ('돈까스,우동', 1276),
 ('일식', 1265),
 ('회', 1232),
 ('족발,보쌈', 1180),
 ('일본식주점', 1148),
 ('와인바', 995),
 ('초밥,롤', 942),
 ('국수', 924),
 ('중식', 922),
 ('순대', 903),
 ('떡,한과', 889),
 ('칼국수', 877),
 ('찌개,전골', 871),
 ('이탈리안', 849),
 ('칵테일바', 821),
 ('떡볶이', 798),
 ('피자', 714),
 ('삼겹살', 703)]

In [70]:
food_category_cnt.most_common(40)

[('한식', 9103),
 ('호프,요리주점', 5529),
 ('육류,고기', 4960),
 ('분식', 3476),
 ('제과,베이커리', 2998),
 ('중국요리', 2356),
 ('양식', 2205),
 ('해물,생선', 2115),
 ('술집', 2022),
 ('치킨', 1709),
 ('곱창,막창', 1512),
 ('닭요리', 1451),
 ('실내포장마차', 1450),
 ('갈비', 1346),
 ('돈까스,우동', 1276),
 ('일식', 1265),
 ('회', 1232),
 ('족발,보쌈', 1180),
 ('일본식주점', 1148),
 ('와인바', 995),
 ('초밥,롤', 942),
 ('국수', 924),
 ('중식', 922),
 ('순대', 903),
 ('떡,한과', 889),
 ('칼국수', 877),
 ('찌개,전골', 871),
 ('이탈리안', 849),
 ('칵테일바', 821),
 ('떡볶이', 798),
 ('피자', 714),
 ('삼겹살', 703),
 ('국밥', 695),
 ('한정식', 533),
 ('일식집', 528),
 ('냉면', 523),
 ('양꼬치', 432),
 ('베트남음식', 422),
 ('감자탕', 417),
 ('참치회', 416)]

In [71]:
food_category_cnt.most_common(50)

[('한식', 9103),
 ('호프,요리주점', 5529),
 ('육류,고기', 4960),
 ('분식', 3476),
 ('제과,베이커리', 2998),
 ('중국요리', 2356),
 ('양식', 2205),
 ('해물,생선', 2115),
 ('술집', 2022),
 ('치킨', 1709),
 ('곱창,막창', 1512),
 ('닭요리', 1451),
 ('실내포장마차', 1450),
 ('갈비', 1346),
 ('돈까스,우동', 1276),
 ('일식', 1265),
 ('회', 1232),
 ('족발,보쌈', 1180),
 ('일본식주점', 1148),
 ('와인바', 995),
 ('초밥,롤', 942),
 ('국수', 924),
 ('중식', 922),
 ('순대', 903),
 ('떡,한과', 889),
 ('칼국수', 877),
 ('찌개,전골', 871),
 ('이탈리안', 849),
 ('칵테일바', 821),
 ('떡볶이', 798),
 ('피자', 714),
 ('삼겹살', 703),
 ('국밥', 695),
 ('한정식', 533),
 ('일식집', 528),
 ('냉면', 523),
 ('양꼬치', 432),
 ('베트남음식', 422),
 ('감자탕', 417),
 ('참치회', 416),
 ('아이스크림', 413),
 ('장어', 398),
 ('간식', 388),
 ('햄버거', 382),
 ('샐러드', 363),
 ('파리바게뜨', 351),
 ('오리', 350),
 ('샤브샤브', 336),
 ('아구', 326),
 ('일본식라면', 306)]

## reference
- https://haries.tistory.com/6
- https://apis.map.kakao.com/web/documentation/#services_Places_keywordSearch
(kakao map api 문서)