In [110]:
import os
import warnings

warnings.filterwarnings('ignore')

from haystack import Pipeline
from haystack.utils.auth import Secret
from haystack.components.builders import PromptBuilder
from haystack.components.converters import HTMLToDocument
from haystack.components.fetchers import LinkContentFetcher
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.writers import DocumentWriter
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.preprocessors.document_splitter import DocumentSplitter


from haystack_integrations.components.embedders.cohere import CohereDocumentEmbedder, CohereTextEmbedder


from haystack_integrations.components.generators.ollama import OllamaGenerator

from haystack_integrations.components.embedders.ollama import OllamaDocumentEmbedder, OllamaTextEmbedder


In [111]:
document_store = InMemoryDocumentStore()

fetcher = LinkContentFetcher()
converter = HTMLToDocument()
splitter = DocumentSplitter()
embedder = OllamaDocumentEmbedder()
writer = DocumentWriter(document_store=document_store)

indexing = Pipeline()
indexing.add_component("fetcher", fetcher)
indexing.add_component("converter", converter)
indexing.add_component("splitter", splitter)
indexing.add_component("embedder", embedder)
indexing.add_component("writer", writer)

indexing.connect("fetcher.streams", "converter.sources")
indexing.connect("converter.documents", "splitter")
indexing.connect("splitter", "embedder")
indexing.connect("embedder", "writer")

<haystack.core.pipeline.pipeline.Pipeline object at 0x2a4151bd0>
🚅 Components
  - fetcher: LinkContentFetcher
  - converter: HTMLToDocument
  - splitter: DocumentSplitter
  - embedder: OllamaDocumentEmbedder
  - writer: DocumentWriter
🛤️ Connections
  - fetcher.streams -> converter.sources (List[ByteStream])
  - converter.documents -> splitter.documents (List[Document])
  - splitter.documents -> embedder.documents (List[Document])
  - embedder.documents -> writer.documents (List[Document])

In [112]:
embedder

<haystack_integrations.components.embedders.ollama.document_embedder.OllamaDocumentEmbedder object at 0x2a41534f0>
embedder
Inputs:
  - documents: List[Document]
  - generation_kwargs: Optional[Dict[str, Any]]
Outputs:
  - documents: List[Document]
  - meta: Dict[str, Any]

In [116]:
indexing.run(
{
"fetcher": {
"urls": [
"https://www.cyclingweekly.com/racing/this-ones-for-my-grandma-michael-matthews-rounds-off-emotional-week-with-third-gp-quebec-victory",

]
}
}
)

Calculating embeddings: 100%|██████████| 6/6 [00:00<00:00,  8.82it/s]


{'embedder': {'meta': {'model': 'nomic-embed-text'}},
 'writer': {'documents_written': 6}}

In [117]:
prompt = """
Answer the question based on the provided context. You will receive summaries of cycling-races. The shortest time is the best time.
Context:
{% for doc in documents %}
{{ doc.content }} 
{% endfor %}
Question: {{ query }}
"""

query_embedder = CohereTextEmbedder(model="embed-english-v3.0", api_base_url=os.getenv("CO_API_URL"))
query_embedder = OllamaTextEmbedder()

retriever = InMemoryEmbeddingRetriever(document_store=document_store)
prompt_builder = PromptBuilder(template=prompt)

generator = OllamaGenerator(model="llama3.1:latest",
url = "http://localhost:11434",
generation_kwargs={
  #"num_predict": 100,
 # "temperature": 0.9,
  })

rag = Pipeline()
rag.add_component("query_embedder", query_embedder)
rag.add_component("retriever", retriever)
rag.add_component("prompt", prompt_builder)
rag.add_component("generator", generator)

rag.connect("query_embedder.embedding", "retriever.query_embedding")
rag.connect("retriever.documents", "prompt.documents")
rag.connect("prompt", "generator")



<haystack.core.pipeline.pipeline.Pipeline object at 0x2a41fc670>
🚅 Components
  - query_embedder: OllamaTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt: PromptBuilder
  - generator: OllamaGenerator
🛤️ Connections
  - query_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt.documents (List[Document])
  - prompt.prompt -> generator.prompt (str)

In [118]:
# Create a query to search the document store
question = "Describe the stage briefly and mark 3 top performers"

result = rag.run(
    {
        "query_embedder": {"text": question},
        "retriever": {"top_k": 10},
        "prompt": {"query": question},
    }
)

print(result["generator"]["replies"][0])

Here is a brief description of the stage and the 3 top performers:

**Stage Description:** The Grands Prix Cyclistes de Québec (GP Québec) stage saw Tadej Pogačar and Arnaud De Lie pull away from the front of the peloton on the final climb, but their move was eventually reeled back in. Michael Matthews then unleashed a powerful sprint from range to take his third victory at the race.

**Top 3 Performers:**

1. **Michael Matthews**: The Australian rider took his third GP Québec victory, dedicating it to his grandmother who passed away the week before.
2. **Tadej Pogačar**: The Slovenian rider finished seventh on his return to WorldTour action after winning his third Tour de France title in July.
3. **Biniam Girmay**: The Eritrean rider took second place behind Matthews, with Matthews's Jayco teammates celebrating their dominant win together.


In [119]:
result

{'query_embedder': {'meta': {'model': 'nomic-embed-text'}},
 'generator': {'replies': ["Here is a brief description of the stage and the 3 top performers:\n\n**Stage Description:** The Grands Prix Cyclistes de Québec (GP Québec) stage saw Tadej Pogačar and Arnaud De Lie pull away from the front of the peloton on the final climb, but their move was eventually reeled back in. Michael Matthews then unleashed a powerful sprint from range to take his third victory at the race.\n\n**Top 3 Performers:**\n\n1. **Michael Matthews**: The Australian rider took his third GP Québec victory, dedicating it to his grandmother who passed away the week before.\n2. **Tadej Pogačar**: The Slovenian rider finished seventh on his return to WorldTour action after winning his third Tour de France title in July.\n3. **Biniam Girmay**: The Eritrean rider took second place behind Matthews, with Matthews's Jayco teammates celebrating their dominant win together."],
  'meta': [{'model': 'llama3.1:latest',
    'cre

In [21]:
from haystack_integrations.components.embedders.ollama import OllamaTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever

query_embedder = OllamaTextEmbedder()
retriever = InMemoryEmbeddingRetriever(document_store=document_store)

document_search = Pipeline()

document_search.add_component("query_embedder", query_embedder)
document_search.add_component("retriever", retriever)

document_search.connect("query_embedder.embedding", "retriever.query_embedding")

<haystack.core.pipeline.pipeline.Pipeline object at 0x290c71ed0>
🚅 Components
  - query_embedder: OllamaTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
🛤️ Connections
  - query_embedder.embedding -> retriever.query_embedding (List[float])

In [22]:
question = "Who won with the largest margin?"

result = rag.run(
{
"query_embedder": {"text": question},
"retriever": {"top_k": 10},
"prompt": {"query": question},
}
)

print(result["generator"]["replies"][0])

The answer is not explicitly stated in the text you provided, which lists riders and their teams. To find out who won with the largest margin, I would need more information about a specific race or event where these riders participated.

If you provide me with context (e.g., the name of a particular cycling competition), I'll do my best to help you determine who won with the largest margin based on available data.


In [4]:
result

 'generator': {'replies': ['I can’t answer that.'],
  'meta': [{'model': 'llama3.1:latest',
    'created_at': '2024-10-01T14:29:52.031262Z',
    'done': True,
    'done_reason': 'stop',
    'context': [128006,
     882,
     128007,
     1432,
     16533,
     279,
     3488,
     3196,
     389,
     279,
     3984,
     2317,
     627,
     2014,
     1473,
     60704,
     11500,
     560,
     59136,
     3925,
     765,
     220,
     2366,
     18,
     14002,
     765,
     5256,
     1160,
     198,
     19962,
     220,
     16,
     765,
     22891,
     220,
     17,
     765,
     22891,
     220,
     18,
     765,
     22891,
     220,
     19,
     765,
     22891,
     220,
     20,
     765,
     22891,
     220,
     21,
     765,
     22891,
     220,
     22,
     765,
     22891,
     220,
     23,
     198,
     2366,
     19,
     12366,
     11500,
     560,
     6149,
     13,
     220,
     23,
     18094,
     18702,
     220,
     16,
     11,
     8610,
   