In [None]:
pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [15]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import torch
from accelerate import Accelerator
from transformers import AutoModelForCausalLM, AutoTokenizer

# Embedding
Using huggingFace Embeddings and sentence transformers llm

In [16]:
def embeddings(modelPath="sentence-transformers/all-MiniLM-L12-v2"):
    model_kwargs = {'device': Accelerator().device}
    encode_kwargs = {'normalize_embeddings': False}
    return HuggingFaceEmbeddings(
        model_name=modelPath,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs
    )

# RAG Model
Using RAG model Qwen with 500M Parameters

In [20]:
def RAG_model(model_path="Qwen/Qwen2-0.5B-Instruct"):
    device = Accelerator().device  # the device to load the model onto

    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16,
        device_map=device,
        pad_token_id=0
    )
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    return model, tokenizer

# Retrieval
Obtain similar answers corresponding to the question

In [None]:
def retrieve_context(query, top_k=3):
    # Load FAISS index from local storage
    vector_store = FAISS.load_local("/workspaces/RAG-ChatBot/faiss_index", embeddings=embeddings(), allow_dangerous_deserialization=True)
    """Retrieve the most relevant documents for a given query."""
    docs = vector_store.similarity_search(query, k=top_k)
    context = " ".join([doc.page_content for doc in docs])
    return context

In [None]:
prompt_template = """
Your main role is to answer questions from the user. You are an assistant specializing in computer science principles and coding.
Retrieve relevant information from the dataset and utilize inference and suggestions for the following tasks:
- Responses should cover fundamental principles of computer science.
- Inferences are allowed to provide comprehensive answers.
- Use the provided context to list down relevant information and explanations.
- Ensure all responses are accurate and aligned with computer science topics.
Ensure responses are derived from the dataset, use inference and suggestions to provide comprehensive answers.
"""

In [18]:
 def ask_question(user_query):
    # Retrieve relevant context
    context = retrieve_context(user_query)
    model, tokenizer = RAG_model()

    # Prepare the prompt with context
    messages = [
        {"role": "system", "content": prompt_template},
        {"role": "user", "content": f"Context: {context}\n\n{user_query}"}
    ]

    # Concatenate the messages into a single string for the model
    text = "\n".join([f"{message['role']}: {message['content']}" for message in messages])

    # Tokenize and generate response
    model_inputs = tokenizer(text, return_tensors="pt").to("cpu")
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512,
        pad_token_id=tokenizer.eos_token_id  # To avoid potential padding issues
    )

    # Decode the generated response
    generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    # Extract the response after the user query
    response_start = generated_text.find("Answer:")
    if response_start != -1:
        cleaned_response = generated_text[response_start + len("Answer:"):].strip()
    else:
        cleaned_response = generated_text.strip()

    cleaned_response = "\n\n".join([line.strip() for line in cleaned_response.split("\n\n") if line.strip()])

    return cleaned_response

In [22]:
import gradio as gr
import random
import time

with gr.Blocks() as RAG_Chatbot:
    chatbot = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])

    def respond(message, chat_history):
        bot_message = ask_question(message)
        chat_history.append((message, bot_message))
        time.sleep(2)
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])

if __name__ == "__main__":
    RAG_Chatbot.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
