# Create the chunks prepared for the training of the model


In [1]:
# Import the necessary libraries
import json
import os
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Document
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding
from dotenv import load_dotenv
import google.generativeai as genai
from dotenv import load_dotenv


  from .autonotebook import tqdm as notebook_tqdm


In [16]:
base_path = "data/out"
filename = "all_categories.json"

load_dotenv()

genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))


In [3]:
def load_json(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    return data


file_path = base_path + "/" + filename
data = load_json(file_path)


In [4]:
def extract_categories(data):
    categories = {}
    for category in data:
        category_name = category.get("name")
        recipes = category.get("recipes", [])
        categories[category_name] = recipes
    return categories


categories = extract_categories(data)


In [5]:
def create_chunks(categories, chunk_size=512):
    chunks = []
    for category, recipes in categories.items():
        for recipe in recipes:
            recipe_name = recipe.get("name", "")
            info = recipe.get("info", {})
            ingredients = recipe.get("ingredients", [])
            preparation = recipe.get("preparation", [])

            text = f"Categoría: {category}\nNombre: {recipe_name}\n"
            text += f"Fuente: {info.get('source', '')}\nTiempo de preparación: {info.get('prep_time', '')}\n"
            text += f"Tiempo de cocción: {info.get('cook_time', '')}\nDificultad: {info.get('difficulty', '')}\n\n"
            text += "Ingredientes:\n" + "\n".join(ingredients) + "\n\n"
            text += "Preparación:\n" + "\n".join(preparation) + "\n"

            for i in range(0, len(text), chunk_size):
                chunks.append(text[i : i + chunk_size])
    return chunks


chunks = create_chunks(categories)


In [70]:
documents = [Document(text=chunk) for chunk in chunks[:1]]

gemini_model = Gemini(model_name="models/gemini-1.5-flash", temperature=0.1)

embedding_model = GeminiEmbedding(
    model_name="models/embedding-001", title="Gemini Embedding"
)


In [76]:
node_parser = SimpleNodeParser.from_defaults()

nodes = node_parser.get_nodes_from_documents(documents)


In [66]:
embedding_model


GeminiEmbedding(model_name='models/gemini-1.5-flash', embed_batch_size=10, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7f1490a1bd50>, num_workers=None, title=None, task_type='retrieval_document', api_key=None)

In [77]:
index = VectorStoreIndex(nodes, embed_model=embedding_model)


In [78]:
query = index.as_query_engine(llm=gemini_model)

response = query.query("Como preparo una margarita cristalina? indicame el paso a paso")

print(response)


Primero, combina el tequila, el jugo de limón, la miel de agave y la solución salina en un shaker. Luego, agrega hielo y agita hasta que esté bien frío. Finalmente, sirve en un vaso con hielo y decora con un limón amarillo. 



In [81]:
response = query.query("Como preparo un frapuccino de Mazapan?")

print(response)


The provided context does not contain information about how to prepare a Mazapan Frappuccino. 

