In [1]:
import requests
import json
from tqdm import tqdm

In [2]:
def crawling_recipe_list(offset,limit):
    url = "https://wtable.net/api_v2/recipe/list" \
    "?app_version=1" \
    "&platform=web" \
    "&order=publish_desc" \
    f"&offset={offset}" \
    f"&limit={limit}"
    response = requests.get(url)
    json_data=json.loads(response.text)
    if not json_data['success']:
        pass # 나중에는 오류 설정하기
    total_elements= json_data['total_elements']
    data_list = json_data['data']
    return  total_elements, data_list

In [3]:
def parsing_recipe_list(data_list):
    recipe_list = []
    for data in data_list:
        recipe = {}
        recipe['title'] = data['title']
        recipe['title_desc'] = data['title_desc']
        recipe['token'] = data['token']
        recipe['title_img'] = data['profile_img']
        recipe_list.append(recipe)
    return recipe_list

In [4]:
def crawling_recipe_detail(token):
    url = "https://wtable.co.kr/_next/data/QyVQmbVsmSdCqTclsMsAZ/recipes"\
    f"/{token}.json"\
    "?location=recipe_home"\
    f"&token={token}"
    response = requests.get(url)
    json_data=json.loads(response.text)
    data=json_data['pageProps']['recipe']
    recipe_detail={}
    if data['token']!=token:
        return recipe_detail # 다른레시피면 null
    recipe_detail['description'] = data['description']
    recipe_detail['theme_names'] = data['theme_names'] # 리스트 형태
    recipe_detail['sequences'] = data['recipe_steps'] 
    recipe_detail['ingredients'] = data['recipe_igroups']
    return recipe_detail

In [5]:
def parsing_recipe_sequences(steps):
    sequences = []
    for idx,step in enumerate(steps):
        sequence={}
        sequence['content'] = step['content']
        sequence['sequence'] = idx+1
        sequences.append(sequence)
    return sequences

In [6]:
def parsing_recipe_ingredients(ingredients_list):
    ingredients=[]
    for ingredien_group in ingredients_list:
        for ingre in ingredien_group['ingredients']:
            ingredient={}
            ingredient['name'] = ingre['name']
            ingredient['quantity'] = ingre['value']
            ingredients.append(ingredient)
    return ingredients

In [7]:
def parsing_recipe(crawled_recipe):
    recipe_detail = crawling_recipe_detail(crawled_recipe['token'])
    recipe_detail['sequences'] = parsing_recipe_sequences(recipe_detail['sequences'])
    recipe_detail['ingredients'] = parsing_recipe_ingredients(recipe_detail['ingredients'])

In [8]:
def making_recipe_detail(recipe_list):
    for crawled_recipe in tqdm(recipe_list):
        recipe_detail = crawling_recipe_detail(crawled_recipe['token'])
        recipe_detail['sequences'] = parsing_recipe_sequences(recipe_detail['sequences'])
        recipe_detail['ingredients'] = parsing_recipe_ingredients(recipe_detail['ingredients'])
        crawled_recipe.update(recipe_detail)
    return recipe_list

In [15]:
# 레시피 리스트를 원하는 만큼 크롤링합니다
offset = 0
limit = 10000
total_elements ,data_list = crawling_recipe_list(offset,limit)

In [16]:
# 레시피 리스트에서 필요한 부분만 파싱합니다
incomplete_recipe_list = parsing_recipe_list(data_list)

In [17]:
# 레시피의 토큰을 사용하여 레시피 상세정보를 가져옵니다
recipe_list = making_recipe_detail(incomplete_recipe_list)

100%|███████████████████████████████████████| 1746/1746 [21:05<00:00,  1.38it/s]


In [24]:
# json으로 저장
with open("우리의 식탁.json", 'w', encoding="UTF-8") as f:
    json.dump(recipe_list, f, indent=4, ensure_ascii=False)