In [None]:
pip install transformers torch sentence-transformers faiss-cpu langchain

In [None]:
pip install -U langchain-community

In [None]:
pip install huggingface_hub

In [None]:
pip install sentencepiece

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
import torch

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

In [None]:
loader = TextLoader(r"C:\Users\shubdosh\Desktop\test_projects\Learning RAG\RAG pipeline\test_data.txt")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(documents)

In [None]:

from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(splits, embeddings)

In [None]:
def retrieve_context(query, k=3):
    # Using FAISS's built-in similarity search
    results = vectorstore.similarity_search(query, k=k)
    return " ".join([doc.page_content for doc in results])

In [None]:
def generate_response(query, context):
    # Format the prompt with context and query
    prompt = f"""<s>[INST] Use this context to answer the question:

Context: {context}

Question: {query}

Answer: [/INST]"""
    
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
    inputs = inputs.to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Clean up the response to get only the answer part
    return response.split("Answer:")[1].strip()

In [None]:
def rag_pipeline(query):
    try:
        context = retrieve_context(query)
        response = generate_response(query, context)
        return response
    except Exception as e:
        return f"An error occurred: {str(e)}"

In [None]:
def run_interactive_interface():
    print("RAG Pipeline initialized. Ask questions about the text (type 'quit' to exit)")
    print("-" * 50)
    
    while True:
        try:
            query = input("\nYour question: ")
            if query.lower() == 'quit':
                print("Exiting the program...")
                break
                
            if not query.strip():
                print("Please enter a valid question.")
                continue
                
            print("\nGenerating response...")
            response = rag_pipeline(query)
            print("\nAnswer:", response)
            print("-" * 50)
            
        except KeyboardInterrupt:
            print("\nProgram interrupted by user. Exiting...")
            break
        except Exception as e:
            print(f"\nAn error occurred: {str(e)}")
            print("Please try again.")

In [None]:
if __name__ == "__main__":
    run_interactive_interface()