In [None]:
from dotenv import load_dotenv
from datetime import datetime
from langchain_openai import AzureChatOpenAI
from langchain.schema.messages import HumanMessage
from langchain.prompts import ChatPromptTemplate
import json
import requests

In [2]:
load_dotenv()

True

# Set up LLM functions

In [94]:
def generate_prompt(question: str, search_results: list[str]):
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system", """
                You're a course teaching assistant, consider the current date: {current_date}. Answer the QUESTION based on the CONTEXT from the FAQ database
                Use only the facts from the CONTEXT when answering the QUESTION.
                CONTEXT: {context}"""
            ),
            ("human", "{query}"),
            ("placeholder", "{agent_scratchpad}"),
            ]
        )
    context = ''
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    return prompt.format_messages(query=question, context=context, current_date=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

In [5]:
def get_llm_client() -> AzureChatOpenAI:
    return AzureChatOpenAI(
        deployment_name="gpt-4o-mini",
        model_name='gpt-4o-mini',
        )

In [6]:
def llm_execute(llm_client: AzureChatOpenAI, prompt: list[str]) -> str:
    response = llm_client.invoke(prompt)
    return response.content

# Set up vector search functions

In [17]:

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [None]:
from qdrant_client import QdrantClient, models

In [None]:

def get_qdrant_client() -> QdrantClient:
    return QdrantClient(
        url="http://localhost:6333"
    )

  from .autonotebook import tqdm as notebook_tqdm


In [35]:
def create_qdrant_collection(qdrant_client: QdrantClient,
                             collection_name: str,
                             embedding_dim: int,
                             distance: models.Distance = models.Distance.COSINE,
                             multivector: bool = False) -> None:

    if qdrant_client.collection_exists(collection_name):
        print(f"Collection {collection_name} already exists")
        return None

    if multivector:
        print("Multivector collection is not supported yet")
        return None

    return qdrant_client.create_collection(
        collection_name=collection_name,
        vectors_config=models.VectorParams(
            size=embedding_dim,
            distnance=distance
        )
    )


In [39]:
def get_points(input_data: list[dict], model_handle: str) -> list[models.PointStruct]:
    points = []

    for i, doc in enumerate(documents):
        text = doc['question'] + ' ' + doc['text']
        vector = models.Document(text=text, model=model_handle)
        point = models.PointStruct(
            id=i,
            vector=vector,
            payload=doc
        )
        points.append(point)

    return points


In [73]:
def vector_search(qdrant_client: QdrantClient,
                  collection_name: str,
                  model_handle: str,
                  question: str,
                  course_target: str):

    print('vector_search is used')
    
    query_points = qdrant_client.query_points(
        collection_name=collection_name,
        query=models.Document(
            text=question,
            model=model_handle 
        ),
        query_filter=models.Filter( 
            must=[
                models.FieldCondition(
                    key="course",
                    match=models.MatchValue(value=course_target)
                )
            ]
        ),
        limit=5,
        with_payload=True
    )
    
    results = []
    
    for point in query_points.points:
        results.append(point.payload)
    
    return results

# Set up RAG

In [81]:
def rag(llm_client: AzureChatOpenAI, query: str, results: list[dict]) -> str:
    search_results = results
    prompt = generate_prompt(query, search_results)
    answer = llm_execute(llm_client, prompt)
    return answer

# Execute

In [82]:
llm_client = get_llm_client()

In [None]:
model_handle = 'jinaai/jina-embeddings-v2-small-en'
collection_name = 'rag-faq'
course_match = 'data-engineering-zoomcamp'
EMBEDDING_DIM = 512
qdrant_client = get_qdrant_client()
create_qdrant_collection(qdrant_client, collection_name, EMBEDDING_DIM, models.Distance.COSINE)
vs_points = get_points(documents, model_handle)

In [None]:
qdrant_client.upsert(
    collection_name=collection_name,
    points=vs_points
)

In [None]:
qdrant_client.create_payload_index(
    collection_name=collection_name,
    field_name="course",
    field_schema="keyword" # exact matching on string metadata fields
)

In [83]:
# question = 'I just discovered the course. Can I still join it?'
question = 'how do I run kafka?'

In [84]:
results = vector_search(qdrant_client, collection_name, model_handle, question, course_match)

vector_search is used


In [92]:
question

'how do I run kafka?'

In [95]:
rag(llm_client, question, results)

'To run Kafka, ensure that your Kafka broker Docker container is working. You can confirm this by using the command `docker ps` to check the status of your containers. If the Kafka broker is not running, navigate to the directory containing your Docker Compose YAML file and execute `docker compose up -d` to start all the instances.\n\nOnce your Kafka broker is running, you can run producer, consumer, or other Java scripts (like JsonProducer.java or JsonConsumer.java) by using the following command in your project directory:\n\n```bash\njava -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java\n```\n\nReplace `<jar_name>` with the actual name of your jar file. Make sure the configuration for the Kafka server URL and secrets are correctly updated in your scripts to ensure they can connect to the Kafka broker successfully.'

## RAG with Vector Search

In [45]:
rag('how do I run kafka?')

vector_search is used


'To run Kafka, you need to follow these steps based on your scripts:\n\n1. Make sure your Kafka broker is running. You can confirm this by running `docker ps`. If the broker is not active, navigate to the folder with your docker-compose yaml file and run `docker compose up -d` to start all instances.\n\n2. In your project directory, to run the producer, use the following command:\n   ```\n   java -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java\n   ```\n\n3. Ensure that the `StreamsConfig.BOOTSTRAP_SERVERS_CONFIG` in your Java scripts (e.g., JsonProducer.java, JsonConsumer.java) is set to the correct server URL. Also, verify that the cluster key and secrets in `src/main/java/org/example/Secrets.java` are updated with the correct values.\n\nBy following these steps, you should be able to run Kafka successfully.'