# Convert transcript to RecipeCore


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
from pathlib import Path

from loguru import logger as lg
from rich import print as rprint

from recipamatic.config.recipamatic_config import get_recipamatic_paths
from recipamatic.cook.recipe_core.transcriber import RecipeCoreTranscriber
from recipamatic.langchain_openai_.chat_openai_config import DEFAULT_CHAT_OPENAI_CONFIG
from recipamatic.social.insta.loader import InstaLoader
from recipamatic.social.insta.structures import PostIg, ProfileIg
from recipamatic.utils.pathlib_ import check_create_fol

In [None]:
il = InstaLoader("")
# il.login()

In [None]:
posts_fol = get_recipamatic_paths().ig_fol / "posts"
posts_fol

In [None]:
recipes_fol = get_recipamatic_paths().data_fol / "recipes"
check_create_fol(recipes_fol)
recipes_fol

In [None]:
# model_type = "base.en"
model_type = "medium"
# model_type = "medium.en"
# model_type = "large-v3"

In [None]:
from recipamatic.langchain_openai_.chat_openai_config import ChatOpenAIConfig
from recipamatic.utils.langchain_ import get_secret_from_env


# oai_config = ChatOpenAIConfig(
#     model="gpt-4o",
#     temperature=0.1,
#     api_key=get_secret_from_env("OPENAI_API_KEY"),
# )

In [None]:
posts_fol_iter = list(posts_fol.iterdir())
# posts_fol_iter = [Path("CqS6OV4osrG")]

for ip, post_fol in enumerate(posts_fol_iter):
    post_shortcode = post_fol.name
    lg.info(f"{ip}/{len(posts_fol_iter)} {post_fol} - {post_shortcode}")

    transcript_fp = post_fol / f"p_transcript_{model_type}.txt"
    if not transcript_fp.exists():
        lg.warning(f"Missing transcript {transcript_fp}")
        continue

    rc_fol = recipes_fol / post_shortcode
    check_create_fol(rc_fol)
    rc_fp = rc_fol / f"recipe_core.json"
    if rc_fp.exists():
        lg.debug(f"Recipe core already exists {rc_fp}")
        continue

    # load post and transcript
    post = il.load_post(post_shortcode)
    transcript = transcript_fp.read_text().strip()
    transcript_len = len(transcript.split())
    if transcript_len < 20:
        lg.warning(f"Transcript too short {transcript_len} {transcript_fp}")
        transcript = ""

    recipe_text = f"\n{post.caption}\n\n{transcript}"

    # transcribe recipe
    rc_transcriber = RecipeCoreTranscriber(DEFAULT_CHAT_OPENAI_CONFIG)
    # rc_transcriber = RecipeCoreTranscriber(oai_config)
    rc = rc_transcriber.invoke(recipe_text)

    # save recipe core
    rc_dict = rc.model_dump()
    rc_fp.write_text(json.dumps(rc_dict, indent=4))

    break

In [None]:
rprint(recipe_text)
rprint(rc)