# 

# install packages

# Load API-key

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()  # loads the varialbe from .env-file
api_key = os.getenv("MISTRAL_API_KEY")

# Example to see if LLM is working


In [24]:
import os
from mistralai import Mistral

client = Mistral(api_key=api_key)

model = "mistral-large-latest"
messages = [{
    "role": "user",
    "content": "Erstelle ein Rezept. Die Zutaten für das Rezept sind Kaviar, Udo-Nudeln, Parmesan, Strudelteig, Currypulver. Return the name, the ingredients and die Zubereitung in short JSON object.",
    }]

chat_response = client.chat.complete(
    model= model, 
    messages = messages,
    response_format = {"type": "json_object",}
)
print(chat_response.choices[0].message.content)

{
  "name": "Curry-Kaviar-Strudel mit Udo-Nudeln und Parmesan",
  "ingredients": [
    "Kaviar",
    "Udo-Nudeln",
    "Parmesan",
    "Strudelteig",
    "Currypulver"
  ],
  "preparation": [
    "Den Strudelteig ausrollen und mit Currypulver bestreuen.",
    "Kaviar gleichmäßig auf dem Strudelteig verteilen.",
    "Den Strudelteig aufrollen und in eine gefettete Backform legen.",
    "Im vorgeheizten Ofen bei 180°C für etwa 20-25 Minuten backen, bis er goldbraun ist.",
    "Während der Strudel im Ofen ist, die Udo-Nudeln nach Packungsanweisung kochen.",
    "Die gekochten Udo-Nudeln abgießen und mit Parmesan bestreuen.",
    "Den fertigen Strudel aus dem Ofen nehmen und in Scheiben schneiden.",
    "Die Strudelscheiben auf den Udo-Nudeln anrichten und sofort servieren."
  ]
}


# Example for RAG that is working

In [29]:
# https://docs.mistral.ai/guides/rag/

from mistralai import Mistral
import requests
import numpy as np
import faiss
import os
from getpass import getpass

#api_key= getpass("Type your API Key")
client = Mistral(api_key=api_key)

response = requests.get('https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt')
text = response.text
f = open('essay.txt', 'w')
f.write(text)
f.close()

chunk_size = 2048
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
print("chunk number is", len(chunks))

# def get_text_embedding(input):
#     try:
#         embeddings_batch_response = client.embeddings.create(
#             model="mistral-embed",
#             inputs=input
#         )
#         return embeddings_batch_response.data[0].embedding
#     #claude.ai did the following code
#     except Exception as e:  
#         print(f"Fehler beim Embedding: {e}")
#         return None

# # Prüfen auf None-Werte und filtern
# text_embeddings = [get_text_embedding(chunk) for chunk in chunks]
# text_embeddings = [emb for emb in text_embeddings if emb is not None]
# text_embeddings = np.array(text_embeddings)

# # Optional: Dimensionen prüfen
# print(f"Embedding-Dimensionen: {text_embeddings.shape}")
# # Das Problem ist ein Rate Limit der Mistral API. Hier ist die Lösung mit Wartezeit zwischen den Anfragen:

import time

def get_text_embedding(input, retry_delay=1):
    while True:
        try:
            embeddings_batch_response = client.embeddings.create(
                model="mistral-embed",
                inputs=input
            )
            return embeddings_batch_response.data[0].embedding
        except Exception as e:
            if "rate limit exceeded" in str(e).lower():
                print(f"Rate limit erreicht, warte {retry_delay} Sekunden...")
                time.sleep(retry_delay)
                continue
            print(f"Fehler beim Embedding: {e}")
            return None

text_embeddings = []
for chunk in chunks:
    embedding = get_text_embedding(chunk)
    if embedding is not None:
        text_embeddings.append(embedding)
    time.sleep(0.5)  # Zusätzliche Pause zwischen Anfragen

text_embeddings = np.array(text_embeddings)

37
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit erreicht, warte 1 Sekunden...
Rate limit er

In [36]:
import faiss

d = text_embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(text_embeddings)

question = "What were the two main things the author worked on before college?"
question_embeddings = np.array([get_text_embedding(question)])

D, I = index.search(question_embeddings, k=2) # distance, index
retrieved_chunk = [chunks[i] for i in I.tolist()[0]]

prompt = f"""
Context information is below.
---------------------
{retrieved_chunk}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {question}
Answer:
"""

def run_mistral(user_message, model="mistral-large-latest"):
    messages = [
        {
            "role": "user", "content": user_message
        }
    ]
    chat_response = client.chat.complete(
        model=model,
        messages=messages
    )
    return (chat_response.choices[0].message.content)

run_mistral(prompt)

'The two main things the author worked on before college were writing (specifically short stories) and programming.'

# other Example for RAG that is working

In [None]:
# Example is from https://docs.mistral.ai/guides/rag/#rag-with-haystack

# changes were made with help of claude.ai
#  1. DynamicChatPromptBuilder replaced by ChatPromptBuilder
#  1.a. ChatPromptBuilder doesn't use runtime_variables or prompt_source parameters
#  1.b. Instead, it takes the messages directly as template
#  1.c. The template variables are passed directly to the run method
#  2.a. Changed the import from DynamicChatPromptBuilder to ChatPromptBuilder
#  2.b. Removed the runtime_variables parameter from the prompt builder initialization
#  2.c. Changed prompt_source to template in the pipeline run parameters
#  2.d. Added documents as a template variable in the run parameters
#  3. Ah, I see the issue. The ChatPromptBuilder doesn't have a documents input socket like DynamicChatPromptBuilder did. Instead, we need to pass the documents through the template_variables. Here's the corrected code:
#  3.a. Removed the connection between retriever and prompt_builder since ChatPromptBuilder doesn't have a documents input
#  3.b. Added the documents directly in the template_variables
#  3.c. Added retriever query parameter in the pipeline run
#  4. Ah, I see the issue. For the InMemoryEmbeddingRetriever, we need to provide the query_embedding, not the raw query. Let me correct the code:
#  4.a. Removed the direct "retriever" input from the pipeline run
#  4.b. In the template_variables, we now get the documents by:
#  4.b.a. First getting the embedding for the question using text_embedder
#  4.b.b. Then using that embedding to retrieve documents from the retriever
#  4.b.c. Passing those retrieved documents to the template_variables
#  5. Ah, I see. There's been an API change in Haystack where we need to use .text instead of .content to access the message content. Here's the corrected code:


from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.dataclasses import ChatMessage
from haystack.utils.auth import Secret

from haystack.components.builders import ChatPromptBuilder
from haystack.components.converters import TextFileToDocument
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.writers import DocumentWriter
from haystack_integrations.components.embedders.mistral import MistralDocumentEmbedder, MistralTextEmbedder
from haystack_integrations.components.generators.mistral import MistralChatGenerator

document_store = InMemoryDocumentStore()
docs = TextFileToDocument().run(sources=["essay.txt"])
split_docs = DocumentSplitter(split_by="passage", split_length=2).run(documents=docs["documents"])
embeddings = MistralDocumentEmbedder(api_key=Secret.from_token(api_key)).run(documents=split_docs["documents"])
DocumentWriter(document_store=document_store).run(documents=embeddings["documents"])

text_embedder = MistralTextEmbedder(api_key=Secret.from_token(api_key))
retriever = InMemoryEmbeddingRetriever(document_store=document_store)
prompt_builder = ChatPromptBuilder()
llm = MistralChatGenerator(api_key=Secret.from_token(api_key), 
                          model='mistral-small')

chat_template = """Answer the following question based on the contents of the documents.\n
                Question: {{query}}\n
                Documents: 
                {% for document in documents %}
                    {{document.content}}
                {% endfor%}
                """
messages = [ChatMessage.from_user(chat_template)]

rag_pipeline = Pipeline()
rag_pipeline.add_component("text_embedder", text_embedder)
rag_pipeline.add_component("retriever", retriever)
rag_pipeline.add_component("prompt_builder", prompt_builder)
rag_pipeline.add_component("llm", llm)
rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
rag_pipeline.connect("prompt_builder.prompt", "llm.messages")

question = "What were the two main things the author worked on before college?"
result = rag_pipeline.run(
    {
        "text_embedder": {"text": question},
        "prompt_builder": {
            "template": messages,
            "template_variables": {
                "query": question,
                "documents": retriever.run(query_embedding=text_embedder.run(text=question)["embedding"])["documents"]
            }
        },
        "llm": {"generation_kwargs": {"max_tokens": 225}},
    }
)
print(result["llm"]["replies"][0].text)  # Changed from .content to .text

Calculating embeddings: 3it [00:03,  1.22s/it]


The two main things the author worked on before college were writing and programming. He wrote short stories, although he admits they were awful, and he also wrote essays about various topics. On the programming side, he worked on spam filters and began to develop his interest in artificial intelligence.
