In [1]:
from IPython.display import clear_output, Markdown, display

In [2]:
# No need to run this on colab. These libraries come pre-installed on colab
# %pip install torch torchvision torchaudio nlkt tqdm transformers

# Content:

In this demo, we will Build a very small but functional RAG based QA system

A Rag based QA system is used to answer questions using a defined knowledge base. This knowledge base doesn't have to be a part of the model's training data(meaning we expect that the QA model has no previous knowledge about our knowledge base. This can be the case when we're building a system which answers questions from a companies policies, which are usually kept private from outsiders)

We will use a language model (Llama-2) to build the system. We can assume that the knowledge base we will be answering questions from is not something llama has existing knowledge of.

Here is how a rag system works in summary:

1. documents are divided into overlapping chunks, converted to embeddings and stored in a vector database (we will not use a vector database and store it in memory)
2. when a user asks a question, that question is converted to embedding.
3. we take the cosine similarity (or another similarity metric) of the question embedding with the document embeddings and pick the top n most similar document chunks (n being a hyper param).
4. we feed the document chunk text along with the user's question to a language model and ask it to answer this question using the info provided.
5. The model scans through the information provided and answers the question. If the model can't find the answer in the most similar one, we assume that the answer to that question doesn't exist in the knowledge base.

## Setup Llama-2

We will not go into details of setting up Llama-2 in this demo



In [3]:
!wget https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q5_K_M.gguf

clear_output()

In [4]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python==0.2.74  # This takes a few mins when building wheel. Be patient.

clear_output()

In [81]:
import json
import nltk
from nltk.tokenize import regexp_tokenize
from tqdm import tqdm

from llama_cpp import Llama

from transformers import BertModel, BertTokenizer, AutoModel, AutoTokenizer
import torch

from torch.nn.functional import cosine_similarity


nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [9]:
model = Llama(
    "llama-2-7b-chat.Q5_K_M.gguf",
    n_gpu_layers=-1,
    n_ctx=2048,
    chat_format="llama-2",
)

clear_output()

## Setting up the knowledge base:

We're gonna use some simple rules for hogwarts (from Harry Potter), re-written by Chat-GPT as our knowledge base

In [10]:
def create_overlapping_chunks(documents, chunk_size, overlap_size):
    """
    Create overlapping chunks from the input documents.
    """
    chunks = []
    for i in range(0, len(documents), chunk_size - overlap_size):
        chunk = documents[i:i + chunk_size]
        chunks.append(chunk)
    return chunks

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [124]:
# Load pre-trained BERT model and tokenizer
embedding_model_name = 'distilbert-base-uncased'
embedding_tokenizer = AutoTokenizer.from_pretrained(embedding_model_name)
embedding_model = AutoModel.from_pretrained(embedding_model_name).to(device)

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

In [125]:
def get_embeddings(texts):

    model_inp = {k:v.to(device) for k, v in embedding_tokenizer(texts, return_tensors='pt', padding=True).items()}
    with torch.no_grad():
        embeddings = embedding_model(**model_inp).last_hidden_state[:, 0, :]
    return embeddings

In [126]:
re_written_rules = """
**Hogwarts School of Witchcraft and Wizardry Rules and Regulations**

Welcome to Hogwarts School of Witchcraft and Wizardry! As you embark on your magical journey, it's important to understand and adhere to the rules and regulations that govern our esteemed institution. These rules are designed to ensure the safety, well-being, and academic success of all students, faculty, and staff. Failure to comply with these rules may result in disciplinary action, up to and including expulsion. Without further ado, let us delve into the intricacies of Hogwarts' rules:

**1. General Conduct:**

   a. All students are expected to conduct themselves with respect, courtesy, and consideration towards fellow students, faculty, and staff.

   b. Disruptive behavior, including but not limited to fighting, bullying, or using magic in a manner that endangers others, is strictly prohibited.

   c. Respect for personal and school property is paramount. Vandalism, theft, or unauthorized use of magical artifacts will not be tolerated.

   d. Students must adhere to curfew hours and remain within designated areas of the school premises unless given express permission by a faculty member or the Headmaster.

**2. Academic Integrity:**

   a. Plagiarism, cheating, or any form of academic dishonesty is unacceptable. All work submitted must be original and properly attributed to its sources.

   b. Unauthorized use of magical aids during examinations or assignments is strictly prohibited.

   c. Collaboration on assignments is encouraged, but each student is ultimately responsible for their own work.

**3. Use of Magic:**

   a. The use of magic outside of designated areas or without supervision is restricted.

   b. First-year students are not permitted to use magic outside of classroom settings, except in cases of emergency or under the direct supervision of a faculty member.

   c. The use of dark magic or spells that cause harm to others is strictly forbidden and will result in severe disciplinary action.

   d. Unauthorized enchantments or spells on school property are prohibited.

**4. Safety and Security:**

   a. Students must adhere to safety guidelines when handling magical creatures, potions, or other potentially hazardous materials.

   b. Forbidden Forest and certain areas of the school, such as the Chamber of Secrets, are strictly off-limits without express permission from a faculty member.

   c. The use of time-turners or other magical artifacts capable of altering time is prohibited except under extraordinary circumstances and with approval from the Headmaster.

**5. House Rules:**

   a. Each house has its own set of rules and traditions. Students are expected to uphold the values of their respective houses and support their housemates.

   b. Inter-house rivalry is encouraged but must remain friendly and respectful.

   c. Points will be awarded or deducted based on students' behavior, academic performance, and contributions to the school community.

**6. Dress Code:**

   a. Students are required to wear their house robes and adhere to the school uniform guidelines during formal occasions and classes.

   b. Casual attire is permitted during leisure time, but clothing must be modest and in accordance with school standards.

**7. Communication:**

   a. Owls, enchanted objects, and the Floo Network are the approved methods of communication with the outside world. The use of Muggle technology, such as telephones or computers, is prohibited.

   b. Sending Howlers or other forms of magical communication with the intent to harass or intimidate others is strictly prohibited.

**8. Attendance and Punctuality:**

   a. Regular attendance is mandatory for all classes, unless excused by a faculty member or the school nurse.

   b. Tardiness without valid reason is unacceptable and may result in detention.

**9. Extracurricular Activities:**

   a. Participation in extracurricular activities, such as Quidditch, the Duelling Club, or the Frog Choir, is encouraged but not mandatory.

   b. Students must obtain permission from a faculty advisor before forming new clubs or organizations.

**10. Prefects and Head Boy/Head Girl Responsibilities:**

   a. Prefects and Head Boy/Head Girl are appointed to uphold school rules, assist faculty members, and serve as role models for their peers.

   b. They are authorized to enforce school regulations and report any violations to the appropriate authorities.

**11. Disciplinary Action:**

   a. Violations of school rules may result in disciplinary action, including but not limited to detention, loss of house points, or temporary suspension.

   b. Repeat offenders or those engaged in serious misconduct may face expulsion from Hogwarts.

**12. Amendments and Interpretations:**

   a. These rules are subject to periodic review and amendment by the Hogwarts Board of Governors and the Headmaster.

   b. In cases where the interpretation of a rule is unclear, the decision of the Headmaster shall be final.

As members of the Hogwarts community, it is our collective responsibility to uphold the values and traditions of our school. By following these rules and regulations, we can ensure a safe, welcoming, and enriching environment for all. Welcome to Hogwarts, where the magic of learning and friendship awaits!

**Magic Duel Rules and Regulations**

Magic duels are an esteemed tradition at Hogwarts School of Witchcraft and Wizardry, providing students with an opportunity to hone their magical skills and engage in friendly competition. However, dueling can also be dangerous if not conducted with proper care and respect for the rules. To ensure the safety and fairness of all participants, the following rules and regulations govern magic duels at Hogwarts:

**1. Dueling Eligibility:**

   a. Only students who have reached their third year at Hogwarts or above are eligible to participate in magic duels.

   b. Dueling may only take place under the supervision of a qualified instructor or Dueling Club supervisor.

**2. Consent and Agreement:**

   a. Participation in a duel is voluntary, and all participants must consent to engage in the duel willingly.

   b. Participants must agree to abide by the rules and regulations outlined for dueling and must understand the potential risks involved.

**3. Dueling Grounds:**

   a. Dueling may take place in designated areas approved by the Dueling Club supervisor or a qualified instructor.

   b. The dueling grounds must be free from obstacles or hazards that could pose a risk to participants.

**4. Preparation and Safety Measures:**

   a. Participants must ensure that their wands are in good condition and free from defects that could cause harm during the duel.

   b. Protective enchantments and shields may be cast by participants for their own safety, but offensive spells must not be used until the duel begins.

**5. Dueling Etiquette:**

   a. Duelists must address each other with respect and refrain from using offensive language or gestures.

   b. The use of dark magic or spells intended to cause serious harm is strictly prohibited and will result in immediate disqualification and disciplinary action.

**6. Dueling Format:**

   a. Duels may be conducted in various formats, including one-on-one duels, team duels, or round-robin tournaments.

   b. Duelists may agree upon specific rules or conditions for the duel, such as the use of specific spells or magical limitations.

**7. Dueling Protocol:**

   a. The duel begins when both participants acknowledge their readiness and the Dueling Club supervisor or instructor gives the signal to commence.

   b. Duelists must remain within the designated dueling area and may not leave the premises until the duel is concluded or permission is granted by the supervisor.

**8. Victory and Surrender:**

   a. The duel ends when one participant concedes defeat or is unable to continue due to incapacitation or exhaustion.

   b. Participants must accept the outcome of the duel with good grace and refrain from seeking revenge or retaliation outside of the dueling context.

**9. Post-Duel Protocol:**

   a. After the duel, participants must shake hands or otherwise acknowledge each other with sportsmanship and respect.

   b. Any injuries sustained during the duel must be reported to the Dueling Club supervisor or instructor for evaluation and treatment.

**10. Enforcement and Discipline:**

   a. Violations of dueling rules or regulations will result in disciplinary action, including but not limited to loss of dueling privileges, detention, or temporary suspension.

   b. Repeat offenders or those engaged in serious misconduct may face expulsion from the Dueling Club or Hogwarts School of Witchcraft and Wizardry.

By adhering to these rules and regulations, participants can enjoy the thrill of magical dueling while ensuring a safe and fair environment for all. Let the magic flow and may the best witch or wizard prevail!

**Visits from Family and Excursions Outside Hogwarts during the School Year**

Hogwarts School of Witchcraft and Wizardry recognizes the importance of maintaining connections with family and experiencing the magical world beyond the castle walls. However, the safety and well-being of our students are paramount, and thus certain rules and regulations must be followed when leaving Hogwarts or hosting family visits during the school year.

**1. Visits from Family:**

   a. Students may receive visits from family members during designated visiting hours, which are typically scheduled on weekends or holidays.

   b. Family members must obtain prior approval from the Headmaster or Deputy Headmaster before visiting Hogwarts.

   c. Visitors must adhere to all Hogwarts rules and regulations during their stay and must conduct themselves with respect towards students, faculty, and staff.

**2. Excursions to Hogsmeade:**

   a. Third-year students and above may be granted permission to visit Hogsmeade village on selected weekends throughout the school year.

   b. Students must obtain a signed permission form from their parent or guardian and present it to the appropriate authority for approval.

   c. Students are expected to conduct themselves responsibly and adhere to all rules and curfews while in Hogsmeade.

   d. Students must return to Hogwarts by the designated curfew time, and failure to do so may result in disciplinary action.

**3. Transportation and Safety:**

   a. Students traveling outside Hogwarts must use approved modes of transportation, such as the Hogwarts Express or the school's enchanted carriages.

   b. Apparition and other forms of magical transportation are not permitted for student excursions without explicit permission from the Headmaster.

   c. Students must travel in groups and remain together at all times to ensure safety and accountability.

**4. Supervision and Chaperones:**

   a. Student excursions outside Hogwarts must be supervised by qualified faculty members or staff.

   b. Chaperones will ensure that students follow all rules and regulations and will intervene if any issues or emergencies arise.

**5. Curfews and Return Times:**

   a. Students participating in excursions outside Hogwarts must return to the castle by the designated curfew time, as specified by the Headmaster or designated authority.

   b. Failure to return to Hogwarts by the curfew time may result in disciplinary action and loss of privileges for future excursions.

**6. Conduct and Responsibility:**

   a. Students are expected to conduct themselves with maturity, responsibility, and respect for others during excursions and family visits.

   b. Any violations of Hogwarts rules or regulations during excursions or family visits will result in disciplinary action.

**7. Emergency Procedures:**

   a. In the event of an emergency or unforeseen circumstance during an excursion or family visit, students must follow the instructions of faculty members or designated authorities.

   b. Students should be familiar with emergency procedures and evacuation routes for both Hogwarts and any external locations they visit.

**8. Approval Process:**

   a. Requests for family visits or student excursions must be submitted to the appropriate authority well in advance of the planned date.

   b. The Headmaster or designated authority will review and approve requests based on factors such as safety, academic responsibilities, and availability of supervision.

**9. Communication and Updates:**

   a. Parents or guardians will be informed of any excursions or family visits in advance and will receive updates on their child's participation and well-being.

   b. Students are responsible for keeping their families informed of their plans and whereabouts during excursions or family visits.

**10. Enforcement and Discipline:**

   a. Violations of rules or regulations during excursions or family visits will result in disciplinary action, including but not limited to detention, loss of privileges, or temporary suspension.

   b. Repeat offenders may face more severe consequences, including expulsion from Hogwarts.

By adhering to these rules and regulations, students can enjoy meaningful connections with family and enriching experiences beyond the castle walls while ensuring their safety and well-being at all times. Let us embrace the magic of exploration and connection while upholding the values of Hogwarts School of Witchcraft and Wizardry.
"""

In [127]:
tokenize_re_pattern = r'\w+|[^\w\s]|\n'  # we want to keep new lines.
tokenized_rules = regexp_tokenize(re_written_rules, tokenize_re_pattern)
len(tokenized_rules)

2711

In [128]:
rules_chunks = create_overlapping_chunks(tokenized_rules, 120, 20)
rules_chunks = [' '.join(chunk) for chunk in rules_chunks]

## Calculating embeddings for knowledge base

In [129]:
chunk_embeddings = get_embeddings(rules_chunks).cpu()

## Question Answering

In [130]:
top_n_docs_to_pick = 10

In [153]:
user_question = "When can my family visit me?"

In [154]:
user_question_embeddings = get_embeddings([user_question]).cpu()
similarities = cosine_similarity(chunk_embeddings, user_question_embeddings)
top_n_chunk_idxs = similarities.argsort(descending=True)[:top_n_docs_to_pick].tolist()
top_chunks = [rules_chunks[i].strip() for i in top_n_chunk_idxs]

In [155]:
top_chunks_prompt_part = '\n'.join(f'{i+1}. {rule}' for i, rule in enumerate(top_chunks))

system_msg = "You are a chat bot who's job is to read the provided rules and regulations and use them to answer the question of a user."

user_prompt = f"""
These are the rules, regulations and instructions:

{top_chunks_prompt_part}

Keeping these in mind, answer this question:

{user_question}

When answering the question, only use the knowledge provided in the rules and regulations.
If the answer does not exist in them or the information is insufficient, just say so.
Do NOT misguide the user by making up answers on your own or giving false information.

Answer:
"""

In [156]:
prompt = f"""
[INST] <<SYS>>
{system_msg}
<</SYS>>
{user_prompt}[/INST]
"""

user_answer = model(prompt, max_tokens=None)

Llama.generate: prefix-match hit

llama_print_timings:        load time =     679.09 ms
llama_print_timings:      sample time =      56.67 ms /    94 runs   (    0.60 ms per token,  1658.67 tokens per second)
llama_print_timings: prompt eval time =    1376.02 ms /  1069 tokens (    1.29 ms per token,   776.88 tokens per second)
llama_print_timings:        eval time =    2936.85 ms /    93 runs   (   31.58 ms per token,    31.67 tokens per second)
llama_print_timings:       total time =    4686.48 ms /  1162 tokens


In [157]:
user_answer['choices'][0]['text']

'According to the rules and regulations provided, family visits are allowed during designated visiting hours, which are typically scheduled on weekends or holidays. However, prior approval from the Headmaster or Deputy Headmaster is required before any family member can visit you at Hogwarts. Additionally, visitors must adhere to all Hogwarts rules and regulations during their stay and conduct themselves with respect towards students, faculty, and staff.'