In [18]:
from haystack_integrations.components.generators.ollama import OllamaGenerator
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever

from haystack import Document,Pipeline
from haystack.components.builders.prompt_builder import PromptBuilder


In [19]:
generator = OllamaGenerator(model="llama3.1",
                            url = "http://localhost:11434",
                            generation_kwargs={
                              "num_predict": 100,
                              "temperature": 0.9,
                              })

document_store = InMemoryDocumentStore()

document_store.write_documents([Document(content="I like fish"),
                               Document(content="My favorite color is blue"),
                               Document(content="My favorite sport is F1")])

retriever = InMemoryBM25Retriever(document_store=document_store)

template = """
Given the following information, answer the question.
Context: 
{% for document in documents %}

    {{ document.content }}

{% endfor %}
Question: {{ query }}?
"""

pipe = Pipeline()

pipe.add_component("retriever", retriever)
pipe.add_component("llm", generator)
pipe.add_component("prompt_builder", PromptBuilder(template=template))
pipe.connect("retriever", "prompt_builder.documents")
pipe.connect("prompt_builder", "llm")

<haystack.core.pipeline.pipeline.Pipeline object at 0x0000021AF334DD60>
🚅 Components
  - retriever: InMemoryBM25Retriever
  - llm: OllamaGenerator
  - prompt_builder: PromptBuilder
🛤️ Connections
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [20]:
query = "What is my favorite sport?"

result = pipe.run({"prompt_builder" : {"query":query}, "retriever" : {"query":query}})
print(result["llm"]["replies"][0])

Your favorite sport is Formula 1 (F1).


In [13]:
from haystack.components.preprocessors import DocumentSplitter
from pathlib import Path
from haystack.components.converters import PyPDFToDocument
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.preprocessors import DocumentCleaner
from haystack.components.writers import DocumentWriter
from haystack_integrations.document_stores.chroma import ChromaDocumentStore
from haystack_integrations.components.embedders.ollama import OllamaDocumentEmbedder
from haystack import Pipeline

converter = PyPDFToDocument()
splitter = DocumentSplitter(split_by="function", splitting_function=)
cleaner = DocumentCleaner()
document_store = ChromaDocumentStore(persist_path="db")
writer = DocumentWriter(document_store=document_store)

embedder = OllamaDocumentEmbedder()

document_encoder = Pipeline()
document_encoder.add_component("converter", converter)
document_encoder.add_component("cleaner", cleaner)
document_encoder.add_component("splitter", splitter)
document_encoder.add_component("embedder", embedder)
document_encoder.add_component("writer", writer)

document_encoder.connect("converter", "cleaner")
document_encoder.connect("cleaner", "splitter")
document_encoder.connect("splitter", "embedder")
document_encoder.connect("embedder", "writer")

<haystack.core.pipeline.pipeline.Pipeline object at 0x000002318084F830>
🚅 Components
  - converter: PyPDFToDocument
  - cleaner: DocumentCleaner
  - splitter: DocumentSplitter
  - embedder: OllamaDocumentEmbedder
  - writer: DocumentWriter
🛤️ Connections
  - converter.documents -> cleaner.documents (List[Document])
  - cleaner.documents -> splitter.documents (List[Document])
  - splitter.documents -> embedder.documents (List[Document])
  - embedder.documents -> writer.documents (List[Document])

In [14]:
document_encoder.run({"converter" : {"sources" : [Path("FS-Rules_2025_v1.0.pdf")]}})

Calculating embeddings: 100%|██████████| 1/1 [00:04<00:00,  4.16s/it]


{'embedder': {'meta': {'model': 'nomic-embed-text'}},
 'writer': {'documents_written': 23}}

# Create document with the short things
# Prep the regs
# Split at subsections?