In [None]:
import minsearch
import niquests
import orjson
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline,
)

torch.random.manual_seed(42)

## MinSearch (Toy Search)


In [None]:
docs_url = "https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json"
docs_response = niquests.get(docs_url)
assert docs_response.content is not None
documents_raw = orjson.loads(docs_response.content)

documents = []

for course in documents_raw:
    course_name = course["course"]

    for doc in course["documents"]:
        doc["course"] = course_name
        documents.append(doc)

index = minsearch.Index(
    text_fields=["question", "text", "section"], keyword_fields=["course"]
)
index.fit(documents)

## Load HuggingFace Model


In [None]:
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.1",
    device_map="cuda",
    quantization_config=quantization_config,
)
tokenizer = AutoTokenizer.from_pretrained(
    "mistralai/Mistral-7B-v0.1", padding_side="left"
)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)


## Start


In [None]:
def search(query):
    boost = {"question": 3.0, "section": 0.5}

    results = index.search(
        query=query,
        filter_dict={"course": "data-engineering-zoomcamp"},
        boost_dict=boost,
        num_results=5,
    )

    return results

In [None]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.
If the CONTEXT doesn't contain the answer, output NONE

QUESTION: {question}

CONTEXT:
{context}
""".strip()

    context = ""

    for doc in search_results:
        context = (
            context
            + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
        )
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [None]:
def llm(prompt):
    messages = [
        {"role": "user", "content": prompt},
    ]
    generation_args = {
        "max_new_tokens": 500,
        "return_full_text": False,
        "temperature": 1.0,
        "do_sample": False,
    }

    output = pipe(messages, **generation_args)
    return output[0]["generated_text"].strip()

In [None]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [None]:
query = "How do I run kafka?"
answer = rag(query)

print(answer)

To run Kafka:

If you are using Java Kafka and need to run the producer/consumer/kstreams/etc from the terminal, navigate to the project directory and execute the following command:

```bash
java -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java
```

Make sure to replace `<jar_name>` with the appropriate JAR file name from your build.

If you are facing issues running Python Kafka files, consider creating a virtual environment, installing the required dependencies using `requirements.txt`, and running the files within that environment. See the detailed steps provided in the CONTEXT for managing virtual environments.

If your question pertains to another Kafka use case that is not mentioned here, the CONTEXT does not have additional details. 


