In [2]:
import fitz  # PyMuPDF

def extract_lines_from_pdf_pymupdf(pdf_path):
    """
    Extrae las líneas completas de texto de un archivo PDF utilizando PyMuPDF.
    
    Args:
        pdf_path (str): Ruta del archivo PDF.
        
    Returns:
        list: Lista de líneas extraídas del PDF.
    """
    lines = []
    with fitz.open(pdf_path) as doc:
        for page in doc:
            text = page.get_text("text")  # Obtener texto como líneas completas
            page_lines = text.splitlines()
            lines.extend([line.strip() for line in page_lines if line.strip()])
    return lines

pdf_path = "./Dialnet-LaViolenciaComoOpcionCreativaEnLaObraDeMartinScors-7009818.pdf"  
pdf_lines = extract_lines_from_pdf_pymupdf(pdf_path)
print(pdf_lines)

ModuleNotFoundError: No module named 'frontend'

In [151]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [152]:
from langchain_chroma import Chroma


vector_store = Chroma.from_texts(
    texts=pdf_lines,
    collection_name="pdf-gui",
    embedding=embeddings,
)

In [153]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="pdf-gui",
    embedding_function=embeddings,
)

In [154]:
retriever = vector_store.as_retriever()

In [155]:
question = "Como me llamo?"
docs = vector_store.similarity_search(question)
len(docs)

4

In [156]:
docs[0]

Document(metadata={}, page_content='Me llamo Martin, nací el 26 de noviembre de 2004')

In [157]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_ollama.chat_models import ChatOllama
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

# Local LLM
ollama_llm = "llama3.2"
model_local = ChatOllama(model=ollama_llm)

# Chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model_local
    | StrOutputParser()
)

In [158]:
def search_chroma(input,history):
    try:
        # Invocar la cadena con la consulta como input
        response = chain.invoke(input)
        return response  
    except Exception as e:
        # Manejo de errores
        return f"Error al procesar la consulta: {e}"

In [159]:
import gradio as gr

Tip:
 Always set type="messages" in gr.ChatInterface. The default value (type="tuples") is deprecated and will be removed in a future version of Gradio.

In [160]:
# Definir la interfaz de chat
with gr.Blocks(theme=gr.themes.Glass()) as demo:
    gr.Markdown("### Chat con llama3.2")
    
    # Crear el ChatInterface para manejar la interacción
    chat_interface = gr.ChatInterface(fn=search_chroma,type="messages",
                                       examples=["Quien es Martin Scorsese", "Que relación tiene la violencia es sus películas?", "Háblame de Goodfellas"],
                                       #example_labels=['ej1','ej2','ej3'],
                                       #analytics_enabled=True
                                       )

# Lanzar la interfaz
demo.launch()

* Running on local URL:  http://127.0.0.1:7913

To create a public link, set `share=True` in `launch()`.


