# Prepare Open AI client object

In [1]:
from openai import OpenAI

In [23]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [3]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [4]:
client

<openai.OpenAI at 0x7fb6adc70460>

In [24]:
HF_TOKEN = os.getenv("HF_TOKEN")

# Prepare documents for information retrieval

In [5]:
import json

In [6]:
with open("documents.json", "rt") as f_in:
    docs_raw = json.load(f_in)

In [7]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict["documents"]:
        doc["course"] = course_dict["course"]
        documents.append(doc)

# RAG with Elastic Search

In [8]:
from elasticsearch import Elasticsearch

In [9]:
es_client = Elasticsearch("http://localhost:9200")

In [10]:
# es_client.info()

In [11]:
index_settings = {
    "settings": {"number_of_shards": 1, "number_of_replicas": 0},
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"},
        }
    },
}

index_name = "course-questions"

# Create an index in elastic search (equivelant to a table in sql database)
es_client.indices.create(index=index_name, body=index_settings)

BadRequestError: BadRequestError(400, 'resource_already_exists_exception', 'index [course-questions/M8CLUZQfR1me1i5xutR_hg] already exists')

In [12]:
for doc in documents:

    es_client.index(index=index_name, document=doc)

In [13]:
def elastic_search(query):

    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        # a question is 3 times more important than the text or section for the search results
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields",
                    }
                },
                "filter": {  # limit questions about only one part of the documents available
                    "term": {"course": "data-engineering-zoomcamp"}
                },
            }
        },
    }

    search_results = es_client.search(index=index_name, body=search_query)

    result_docs = []

    for hit in search_results["hits"]["hits"]:
        result_docs.append(hit["_source"])

    return result_docs

In [14]:
def build_prompt(query, search_results):

    prompt_template = """
    QUESTION: {question}

    CONTEXT:
    {context}
    
    ANSWER:
    """.strip()

    context_template = """
        section:{section}
        question: {question}
        answer: {text}
        """.strip()

    context_list = []

    for doc in search_results:
        context_list.append(context_template.format(**doc))

    context = "/n/n".join(context_list)

    prompt = prompt_template.format(question=query, context=context).strip()

    return prompt

# Load open source model from Hugging Face AI

Explanation of Parameters:

- max_length: Set this to a higher value if you want longer responses. For example, max_length=300.
num_beams: Increasing this can lead to more thorough exploration of possible sequences. Typical values are between 5 and 10.
- do_sample: Set this to True to use sampling methods. This can produce more diverse responses.
temperature: Lowering this value makes the model more confident and deterministic, while higher values increase diversity. Typical values range from 0.7 to 1.5.
- top_k and top_p: These parameters control nucleus sampling. top_k limits the sampling pool to the top k tokens, while top_p uses cumulative probability to cut off the sampling pool. Adjust these based on the desired level of randomness.

In [19]:
from huggingface_hub import login

In [26]:
login(token=HF_TOKEN)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/ec2-user/.cache/huggingface/token
Login successful


## mistralai/Mistral-7B-v0.1

In [27]:
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer

model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-v0.1", device_map="auto", load_in_4bit=True
)


tokenizer = AutoTokenizer.from_pretrained(
    "mistralai/Mistral-7B-v0.1", padding_side="left"
)

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [32]:
def llm(prompt):

    model_inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

    generated_ids = model.generate(**model_inputs, max_length=600)
    result = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    return result

In [33]:
def elastic_rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return search_results, prompt, answer

In [34]:
query = "i just found out about this course, can i still join?"

In [35]:
search_results, prompt, answer = elastic_rag(query)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [36]:
answer

"QUESTION: i just found out about this course, can i still join?\n\n    CONTEXT:\n    section:General course-related questions\n        question: Course - Can I still join the course after the start date?\n        answer: Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute./n/nsection:General course-related questions\n        question: Course - Can I still join the course after the start date?\n        answer: Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute./n/nsection:General course-related questions\n        question: Course - What can I do before the course starts?\n        answer: You can start by installing and setting up all the dependencies and requirements:\nGoogl