In [8]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

#LLM

In [9]:
def RAG_model(model_path="Qwen/Qwen2-0.5B-Instruct"):
    device = "cpu" # the device to load the model onto

    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        pad_token_id=0
    )
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    return model, tokenizer

In [10]:
def embeddings(modelPath="sentence-transformers/all-MiniLM-L12-v2"):
    # Create a dictionary with model configuration options, specifying to use the CPU for computations
    model_kwargs = {'device':'cpu'}

    # Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
    encode_kwargs = {'normalize_embeddings': False}

    # Initialize an instance of HuggingFaceEmbeddings with the specified parameters
    return HuggingFaceEmbeddings(
        model_name=modelPath,     # Provide the pre-trained model's path
        model_kwargs=model_kwargs, # Pass the model configuration options
        encode_kwargs=encode_kwargs # Pass the encoding options
    )


In [11]:
# Step 2: Retrieve Relevant Information
def retrieve_context(query, top_k=3):
    # Load FAISS index from local storage
    vector_store = FAISS.load_local("faiss_index", embeddings=embeddings(), allow_dangerous_deserialization=True)
    """Retrieve the most relevant documents for a given query."""
    docs = vector_store.similarity_search(query, k=top_k)
    context = " ".join([doc.page_content for doc in docs])
    return context

In [12]:
prompt_template = """
Your main role is to answer questions from the user. You are an assistant specializing in computer science principles and coding.
Retrieve relevant information from the dataset and utilize inference and suggestions for the following tasks:
- Responses should cover fundamental principles of computer science.
- Inferences are allowed to provide comprehensive answers.
- Use the provided context to list down relevant information and explanations.
- Ensure all responses are accurate and aligned with computer science topics.
Ensure responses are derived from the dataset, use inference and suggestions to provide comprehensive answers.
"""

In [13]:
def ask_question(user_query):
    # Retrieve relevant context
    context = retrieve_context(user_query)
    model, tokenizer = RAG_model()
    
    # Prepare the prompt with context
    messages = [
        {"role": "system", "content": prompt_template},
        {"role": "user", "content": f"Context: {context}\n\n{user_query}"}
    ]
    
    # Concatenate the messages into a single string for the model
    text = "\n".join([f"{message['role']}: {message['content']}" for message in messages])
    
    # Tokenize and generate response
    model_inputs = tokenizer(text, return_tensors="pt").to("cpu")
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512,
        pad_token_id=tokenizer.eos_token_id  # To avoid potential padding issues
    )

    # Decode the generated response
    generated_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    
    # Extract the response after the user query
    response_start = generated_text.find("Answer:")
    if response_start != -1:
        cleaned_response = generated_text[response_start + len("Answer:"):].strip()
    else:
        cleaned_response = generated_text.strip()

    cleaned_response = "\n\n".join([line.strip() for line in cleaned_response.split("\n\n") if line.strip()])
    
    return print("Response: ",cleaned_response)

In [14]:
# Example question
user_query = "What is EC2 in AWS?"
ask_question(user_query)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
