In [None]:
from src.loaders.load import load_documents_from_folder
from src.chunking.chunk import chunk_text
from src.embedding.embedding import CustomHuggingFaceEmbeddings
from src.vector_store_client.vstore import create_vector_store
from qdrant_client import QdrantClient
from dotenv import load_dotenv
load_dotenv()

#Parametros:
chunk_size = 500
chunk_overlap = 100
folder_path = "../practicos-rag/data"


#Carga de documentos:
docs_from_folder = load_documents_from_folder(folder_path)

#Chunking
splits = chunk_text(docs_from_folder, chunk_size, chunk_overlap)
splits[:2]

In [13]:
# Crear una instancia de la clase personalizada
embedding_model = CustomHuggingFaceEmbeddings(mode="sentence")

# Obtener la dimensión del vector
dimension = embedding_model.get_dimension()
print(f"La dimensión del vector es: {dimension}")

# Nombre de la colección
collection_name = "demo_collection2"
update = True

La dimensión del vector es: 384


In [None]:
# Conectar al cliente Qdrant
client = QdrantClient(host="localhost", port=6333)
print('Cliente conectado')

vector_store = create_vector_store(client, collection_name, embedding_model, splits, dimension, update = True)

In [None]:
from langchain_ollama import OllamaLLM
#!ollama pull llama3.2
llm = OllamaLLM(model="llama3.2")

In [None]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")
prompt.messages[0].prompt.template

In [None]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

retriever = vector_store.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# RAG chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
!ollama pull llama3.2

In [None]:
rag_chain.invoke("Can you tell me what are the regulations for labeling of cacao and chocolate?")

In [None]:
#!pip freeze > requirements.txt