In [2]:
from haystack.telemetry import tutorial_running

tutorial_running(27)

#### Initializing the DocumentStore
- to store data in memory (good for smaller project, for larger project use qdrant/weaviate etc vector db)
- indexing the data with its embeddings

In [3]:
from haystack.document_stores.in_memory import InMemoryDocumentStore

document_store = InMemoryDocumentStore()

#### Adding documents/dataset

In [4]:
from datasets import load_dataset

dataset = load_dataset("bilgeyucel/seven-wonders", split='train')

In [5]:
dataset[0]

{'id': 'b3de1a673c1eb2876585405395a10c3d',
 'content': 'The Colossus of Rhodes (Ancient Greek: ὁ Κολοσσὸς Ῥόδιος, romanized:\xa0ho Kolossòs Rhódios Greek: Κολοσσός της Ρόδου, romanized:\xa0Kolossós tes Rhódou)[a] was a statue of the Greek sun-god Helios, erected in the city of Rhodes, on the Greek island of the same name, by Chares of Lindos in 280\xa0BC. One of the Seven Wonders of the Ancient World, it was constructed to celebrate the successful defence of Rhodes city against an attack by Demetrius Poliorcetes, who had besieged it for a year with a large army and navy.\nAccording to most contemporary descriptions, the Colossus stood approximately 70 cubits, or 33 metres (108 feet) high – approximately the height of the modern Statue of Liberty from feet to crown – making it the tallest statue in the ancient world.[2] It collapsed during the earthquake of 226 BC, although parts of it were preserved. In accordance with a certain oracle, the Rhodians did not build it again.[3] John Mala

In [6]:
from haystack import Document 

docs = [Document(content=doc['content'], meta=doc['meta']) for doc in dataset]

In [7]:
docs[0]

Document(id=75fd8474f2c88337f7e0dad69eba0f24ba293cb06693fb746ec403df01a1c0c5, content: 'The Colossus of Rhodes (Ancient Greek: ὁ Κολοσσὸς Ῥόδιος, romanized: ho Kolossòs Rhódios Greek: Κολο...', meta: {'url': 'https://en.wikipedia.org/wiki/Colossus_of_Rhodes', '_split_id': 0})

#### Initialize a Document Embedder
- To create embeddings of the document

In [8]:
from haystack.components.embedders import SentenceTransformersDocumentEmbedder as STDocEmb

doc_embedder = STDocEmb(model="sentence-transformers/all-MiniLM-L6-v2")

# Downloading the mdoel
doc_embedder.warm_up()



#### Embedding documents

In [9]:
# write document to the document store

embedded_docs = doc_embedder.run(docs)
document_store.write_documents(embedded_docs['documents'])

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

151

#### Text Embedder for Queries

In [10]:
from haystack.components.embedders import SentenceTransformersTextEmbedder as STTextEmb

text_embedder = STTextEmb(model="sentence-transformers/all-MiniLM-L6-v2")

#### Initialize Retriever

In [11]:
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever

retriever = InMemoryEmbeddingRetriever(document_store)

#### Define a Template Prompt

In [12]:
from haystack.components.builders import PromptBuilder

template = """
Given the following information, answer the question.

Context: 
{% for doc in documents %}
    {{doc.content}}
{% endfor %}

question: {{query}}
answer:
"""

prompt_builder = PromptBuilder(template=template)

In [1]:
from haystack import Pipeline
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever
from haystack.utils import Secret
import os
from dotenv import load_dotenv
from haystack.components.generators import HuggingFaceAPIGenerator

from huggingface_hub import HfApi

load_dotenv()

hf_auth_key = os.environ.get("HF_KEY") 
SERVER_LESS_API_KEY = "https://api-inference.huggingface.co/models/google/gemma-2b-it" 

print(hf_auth_key)

pipe = Pipeline()
generator = HuggingFaceAPIGenerator(api_type="serverless_inference_api", api_params={"model": 'google/gemma-2b-it', 'url': SERVER_LESS_API_KEY}, token=Secret.from_token(hf_auth_key))

KeyboardInterrupt: 

In [14]:
pipe.add_component('text_embedder', text_embedder)
pipe.add_component("retriever", retriever)
pipe.add_component('prompt_builder', prompt_builder)
pipe.add_component('llm', generator)

In [15]:
pipe.connect('text_embedder.embedding', 'retriever.query_embedding')
pipe.connect('retriever', 'prompt_builder.documents')
pipe.connect('prompt_builder', 'llm')

<haystack.core.pipeline.pipeline.Pipeline object at 0x7f92d0f22790>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: PromptBuilder
  - llm: HuggingFaceAPIGenerator
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [16]:
output = pipe.run({
    'text_embedder': {
        'text': 'What are the seven wonders of the world?'
    },
    'prompt_builder': {
        'query': 'What are the seven wonders of the world?'
    }
})


print(output['llm'])

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'replies': [' The seven wonders of the world are:\n\n1. The Great Pyramid of Giza\n2. The Hanging Gardens of Babylon\n3. The Lighthouse of Alexandria\n4. The Statue of Zeus by the Alpheus\n5. The Colossus of Helius\n6. The Hanging Gardens of Semiramis\n7. The Mausoleum at Halicarnassus'], 'meta': [{'model': 'google/gemma-2b-it', 'finish_reason': 'eos_token', 'usage': {'completion_tokens': 74}}]}


In [18]:
q2 = 'What does Rhodes Statue look like'
output = pipe.run({
    'text_embedder': {
        'text': q2 
    },
    'prompt_builder': {
        'query': q2 
    }
})


print(output['llm'])

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'replies': [' The statue of Zeus at Rhodes was a colossal statue of bronze and ivory, standing approximately 70 cubits (32 meters) high.'], 'meta': [{'model': 'google/gemma-2b-it', 'finish_reason': 'eos_token', 'usage': {'completion_tokens': 30}}]}
