# Basic RAG from existing index

## Setup

In [1]:
import os

from openai import AzureOpenAI

import chromadb.utils.embedding_functions as embedding_functions
import chromadb
from chromadb.config import Settings

from llama_index.core import PromptTemplate
from llama_index.core.query_engine import CustomQueryEngine
from llama_index.core.retrievers import BaseRetriever
from llama_index.core import get_response_synthesizer
from llama_index.core.response_synthesizers import BaseSynthesizer

from IPython.display import Markdown, display

from openai import OpenAI, AzureOpenAI

from dotenv import load_dotenv
load_dotenv()

True

In [None]:
#%pip install --upgrade chromadb

In [2]:
openai_client = AzureOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),  
    api_version="2024-05-01-preview", # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference?WT.mc_id=AZ-MVP-5004796
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_key=os.getenv("OPENAI_API_KEY"),
    model_name="text-embedding-ada-002",
    api_type="azure",
    api_version="2024-05-01-preview"
)

chroma_client_load = chromadb.PersistentClient(
    path="./landsforsøg/chromadb",
    settings=Settings(allow_reset=True)
)


## Load collection

In [28]:
collection_load = chroma_client_load.get_collection(name="landsforsoeg", embedding_function=openai_ef)

## Prompt

In [19]:
prompt = PromptTemplate(
    """You are a helpful assistant that answers questions about the content of documents and provides detailed expert advice. 
    You must provide your answer in the Danish language.
    If the answer contains multiple steps or points, provide the answer in a bullet format.
    Below the answer, the source of the answer should be provided including file_name and page_label, which are stored as metadata.
    ---------------------
    {context}
    ---------------------
    Given the context information and not prior knowledge, answer the query.
    Query: {query}
    Answer: 
    """,
)

## Queries

In [20]:
query = "hvem udgiver landsforsøgene?"
#query = "hvordan kan jeg bedst bekæmpe væselhale?"
query = "hvordan vælger jeg den bedste vårbygsort?"
#query = "hvad er reglerne for afstande ved etablering af husdyranlæg?"
#query = "Beskriv MT-Klovvask"

In [21]:
result = collection_load.query(query_texts=[query], n_results=5)
context = result["documents"][0]
message = prompt.format(query=query, context="\n\n".join(context))

In [23]:
stream = openai_client.chat.completions.create(
    #messages=[{"role": "user", "content": query}],
    model="gpt4",
    messages=[
        {"role": m["role"], "content": m["content"]}
        for m in [{"role": "user", "content": message}]#st.session_state.messages
    ],
    stream=True)

output = ""
for chunk in stream:
    if chunk.choices:  # Check if the list is not empty
        output += chunk.choices[0].delta.content or ""
    display(Markdown(f"{output}"), clear=True)

- Vælg en vårbygsort, der giver et stort og stabilt udbytte i flere års forsøg. 
- Vælg en sort, der har lav modtagelighed over for sygdommene meldug, bygrust, skoldplet og bygbladplet. 
- Vælg en sort, der har resistens mod havrecystenematoder. 
- Vælg en sort, der har en god stråstivhed, så der ikke er behov for vækstregulering. 
- Vælg en sort, der har en svag tendens til nedknækning af aks og strå. 
- Ved dyrkning af vårbyg til malt, bør der altid vælges en maltbygsort, der er accepteret af handelspartneren.

Kilde: 76 VÅRBYG SORTERVÅRBYG, 87 VÅRBYG SORTER, VÅRBYG, 83 VÅRBYG UKRUDT, 205 ØKOLOGISK DYRKNING VÅRBYG – SORTER OG DYRKNING, 
93 VÅRBYG SORTER

In [34]:
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter

vector_store = ChromaVectorStore(chroma_collection=collection_load)

llm = AzureOpenAI(
    model="gpt-4",
    deployment_name="gpt4",
    api_key=os.getenv("OPENAI_API_KEY"),  
    api_version=os.getenv("OPENAI_API_VERSION"), # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference?WT.mc_id=AZ-MVP-5004796
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

# You need to deploy your own embedding model as well as your own chat completion model
embedding = AzureOpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name="text-embedding-ada-002",
    api_key=os.getenv("OPENAI_API_KEY"),  
    api_version=os.getenv("OPENAI_API_VERSION"), # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference?WT.mc_id=AZ-MVP-5004796
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
)

# Define the ingestion pipeline to add documents to vector store
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=512, chunk_overlap=20),
        embedding,
    ],
    vector_store=vector_store,
)

# Create index with the vector store and using the embedding model
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store, embed_model=embedding)

TypeError: AzureOpenAI.__init__() got an unexpected keyword argument 'deployment_name'

## Query engine

In [25]:
class RAGQueryEngine(CustomQueryEngine):
    """RAG String Query Engine."""

    retriever: BaseRetriever
    response_synthesizer: BaseSynthesizer
    llm: AzureOpenAI
    prompt: PromptTemplate

    def custom_query(self, query_str: str):
        nodes = self.retriever.retrieve(query_str)
        context_str = "\n\n".join([n.node.get_content(metadata_mode="all") for n in nodes])
        #context = qa_prompt.format(
        #    context_str=context_str, query_str=query_str)
        response = self.llm.complete(
            qprompt.format(context_str=context_str, query_str=query_str)
        )
                    
        return str(response) + "\n\n-------------------------\n\nKontekst:\n\n" + context_str


synthesizer = get_response_synthesizer(response_mode="compact")
query_engine = RAGQueryEngine(
    retriever=index.as_retriever(),
    response_synthesizer=synthesizer,
    llm=llm,
    prompt=prompt,
)

NameError: name 'index' is not defined