In [1]:
import minsearch
import json
import os
from openai import OpenAI

In [4]:
os.environ["OPENAI_API_KEY"] = "sk-"

In [5]:
api_key = os.environ.get('OPENAI_API_KEY')

In [12]:
with open('documents.json','rt') as f_in:
    docs_raw = json.load(f_in)

In [13]:
documents =[]

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [6]:
client = OpenAI(api_key=api_key)

In [9]:
index = minsearch.Index(
    text_fields = ["question", "text", "section"],
    keyword_fields = ["course"]
)

In [14]:
index.fit(documents)

<minsearch.Index at 0x79d9db56f220>

In [15]:
def search(query):
    boost = {'question': 3.0, 'section':0.5}

    results = index.search(query=query, filter_dict = {'course': 'data-engineering-zoomcamp'},
             boost_dict = boost, 
             num_results = 5
            
            )
    return results
    

In [21]:
def build_prompt(query, search_results):
    prompt_template="""
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion:{doc['question']}\nanswer:{doc['text']}\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()

    return prompt
    

In [24]:
def llm(prompt):
    response = client.chat.completions.create(model="gpt-4o", 
                                          messages = [{"role":"user", "content":prompt}])
    
    return response.choices[0].message.content

In [29]:
query = 'how do i run kafka'
def rag(query):
    search_results =search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)

    print (answer)

In [30]:
rag(query)

To run Kafka, you can follow these steps based on the provided CONTEXT:

**For Java-based Kafka:**

In the project directory, you can run the producer/consumer/kstreams in the terminal using the following command:
```sh
java -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java
```

**For Python-based Kafka:**

1. **Create a virtual environment (only needed once):**
   ```sh
   python -m venv env
   source env/bin/activate
   pip install -r ../requirements.txt
   ```

2. **Activate the virtual environment (needed each time you work with the environment):**
   ```sh
   source env/bin/activate
   ```

3. **Deactivate the virtual environment when done:**
   ```sh
   deactivate
   ```

Note: For Windows, the activation command is slightly different:
```sh
env/Scripts/activate
```

Make sure that Docker images are up and running if your environment setup requires it.


In [None]:
rag(")

In [28]:
print(answer)

To run Kafka components like producer, consumer, or KStreams in the terminal, you can follow these steps based on the language you are using.

### Java Kafka:
In the project directory, run:
```
java -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java
```

### Python Kafka:
1. **Create a virtual environment** (only once):
    ```
    python -m venv env
    source env/bin/activate
    pip install -r ../requirements.txt
    ```
   For subsequent activations (every time you need the virtual env):
    ```
    source env/bin/activate
    ```
   To deactivate:
    ```
    deactivate
    ```
   Note: On Windows, the activation command is slightly different:
    ```
    env\Scripts\activate
    ```
2. **Ensure Docker images are up and running** before running the Python files.

By adhering to these steps, you should be able to successfully run your Kafka components.
